From d32fe06eb92c59318fee94507346d93553c53bcc Mon Sep 17 00:00:00 2001 From: wwang93 <142034236+wwang93@users.noreply.github.com> Date: Sun, 19 Nov 2023 20:02:38 -0500 Subject: [PATCH 01/27] Update helper_output_dataframe.R Linefeed the parameter "output_df <- function" to ensure that the line of code does not exceed 80 characters. --- R/helper_output_dataframe.R | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/R/helper_output_dataframe.R b/R/helper_output_dataframe.R index 26518a56..8f6cdf40 100644 --- a/R/helper_output_dataframe.R +++ b/R/helper_output_dataframe.R @@ -1,6 +1,15 @@ # Function to output the data frame -output_df <- function(est_eff, beta_threshhold, unstd_beta, bias = NULL, sustain = NULL, recase, obs_r, critical_r, r_con, itcv, non_linear) { +output_df <- function(est_eff, + beta_threshhold, + unstd_beta, + bias = NULL, + sustain = NULL, + recase, obs_r, + critical_r, + r_con, + itcv, + non_linear) { if (abs(est_eff) > abs(beta_threshhold)) { df <- dplyr::tibble( action = "to_invalidate", From 3cbab8b513f3a45e95ca1b63fb1eb9cc6f3f5351 Mon Sep 17 00:00:00 2001 From: wwang93 <142034236+wwang93@users.noreply.github.com> Date: Sun, 19 Nov 2023 21:30:03 -0500 Subject: [PATCH 02/27] Update helper_output_print.R Manual modification: no more than 80 characters per line of code --- R/helper_output_print.R | 130 ++++++++++++++++++++++++++++------------ 1 file changed, 92 insertions(+), 38 deletions(-) diff --git a/R/helper_output_print.R b/R/helper_output_print.R index 09b32dcb..e2f944c8 100644 --- a/R/helper_output_print.R +++ b/R/helper_output_print.R @@ -1,24 +1,47 @@ # Function to output printed text -output_print <- function(eff_diff, beta_threshhold, bias = NULL, sustain = NULL, nu, recase, obs_r, critical_r, r_con, itcv, alpha, index) { - if (index == "RIR"){ - cat(crayon::bold("Robustness of Inference to Replacement (RIR):\n")) - if (abs(eff_diff) > abs(beta_threshhold)) { - cat("To invalidate an inference, ", round(bias, 3), "% of the estimate would have to be due to bias. ") - cat("\n") - cat("This is based on a threshold of ", round(beta_threshhold, 3), " for statistical significance (alpha = ", alpha, ").\n", sep = "") - cat("\n") - cat("To invalidate an inference, ", round(recase, 3), " observations would have to be replaced with cases") - cat("\n") - cat("for which the effect is ", nu, " (RIR = ", round(recase, 3), ").\n", sep = "") - cat("\n") +output_print <- function(eff_diff, + beta_threshhold, + bias = NULL, + sustain = NULL, + nu, + recase, + obs_r, + critical_r, + r_con, + itcv, + alpha, + index) { + if (index == "RIR") { + cat(crayon::bold("Robustness of Inference to Replacement (RIR):\n")) + if (abs(eff_diff) > abs(beta_threshhold)) { + cat("To invalidate an inference, ", round(bias, 3), + "% of the estimate would have to be due to bias. ") + cat("\n") + cat("This is based on a threshold of ", round(beta_threshhold, 3), + " for statistical significance (alpha = ", alpha, ").\n", + sep = "") + cat("\n") + cat("To invalidate an inference, ", round(recase, 3), + " observations would have to be replaced with cases") + cat("\n") + cat("for which the effect is ", nu, + " (RIR = ", round(recase, 3), ").\n", sep = "") + cat("\n") } else if (abs(eff_diff) < abs(beta_threshhold)) { - cat("To sustain an inference, ", round(sustain, 3), "% of the estimate would have to be due to bias. ") + cat("To sustain an inference, ", round(sustain, 3), + "% of the estimate would have to be due to bias. ") cat("\n") - cat("This is based on a threshold of ", round(beta_threshhold, 3), " for statistical significance (alpha = ", alpha, ").\n", sep = "") + cat("This is based on a threshold of ", round(beta_threshhold, 3), + " for statistical significance (alpha = ", alpha, ").\n", + sep = "") cat("\n") - cat("To sustain an inference, ", round(recase, 3), " of the cases with ", nu, " effect would have to be replaced with cases at the threshold of inference"," (RIR = ", round(recase, 3), ").\n", sep = "") + cat("To sustain an inference, ", round(recase, 3), + " of the cases with ", nu, + " effect would have to be replaced with cases at the + threshold of inference", + " (RIR = ", round(recase, 3), ").\n", sep = "") } else if (eff_diff == beta_threshhold) { warning("The coefficient is exactly equal to the threshold.\n") @@ -26,65 +49,94 @@ output_print <- function(eff_diff, beta_threshhold, bias = NULL, sustain = NULL, cat("See Frank et al. (2013) for a description of the method.") cat("\n") cat("\n") - cat(crayon::underline("Citation:"), "Frank, K.A., Maroulis, S., Duong, M., and Kelcey, B. (2013).") + cat(crayon::underline("Citation:"), + "Frank, K.A., Maroulis, S., Duong, M., and Kelcey, B. (2013).") cat("\n") cat("What would it take to change an inference?") cat("\n") - cat("Using Rubin's causal model to interpret the robustness of causal inferences.") + cat("Using Rubin's causal model to interpret the + robustness of causal inferences.") cat("\n") - cat(crayon::italic("Education, Evaluation and Policy Analysis, 35"), "437-460.") + cat(crayon::italic("Education, Evaluation and + Policy Analysis, 35"), "437-460.") cat("\n") } if (index == "IT") { cat(crayon::bold("Impact Threshold for a Confounding Variable:\n")) if (abs(obs_r) > abs(critical_r) & obs_r > 0) { - cat("The minimum impact of an omitted variable to invalidate an inference for a null hypothesis of 0 effect is based on a correlation of ", r_con) + cat("The minimum impact of an omitted variable to invalidate an + inference for a null hypothesis of 0 effect is based on + a correlation of ", r_con) cat("\n") cat(" with the outcome and at ", r_con, - " with the predictor of interest (conditioning on observed covariates) based on a threshold of ") + " with the predictor of interest + (conditioning on observed covariates) based on a threshold of ") cat("\n") - cat(round(critical_r, 3), " for statistical significance (alpha = ", alpha, ").\n", - sep = "") + cat(round(critical_r, 3), " for statistical + significance (alpha = ", alpha, ").\n",sep = "") cat("\n") - cat("Correspondingly the impact of an omitted variable (as defined in Frank 2000) must be ") + cat("Correspondingly the impact of an omitted variable + (as defined in Frank 2000) must be ") cat("\n") - cat(r_con, " X ", r_con, " = ", round(r_con^2, 3), " to invalidate an inference for a null hypothesis of 0 effect.\n", sep = "") + cat(r_con, " X ", r_con, " = ", round(r_con^2, 3), + " to invalidate an inference for a null hypothesis of 0 effect. + \n", sep = "") } else if (abs(obs_r) > abs(critical_r) & obs_r < 0) { - cat("The minimum (in absolute value) impact of an omitted variable to invalidate an inference for a null hypothesis of 0 effect is based on a correlation of ", -r_con) + cat("The minimum (in absolute value) impact of an omitted + variable to invalidate an inference for a null hypothesis of 0 + effect is based on a correlation of ", -r_con) cat("\n") cat(" with the outcome and at ", r_con, - " with the predictor of interest (conditioning on observed covariates; signs are interchangable) based on a threshold of ") + " with the predictor of interest (conditioning on observed + covariates; signs are interchangable) based on a threshold of ") cat("\n") - cat(round(critical_r, 3), " for statistical significance (alpha = ", alpha, ").\n", + cat(round(critical_r, 3), " for statistical significance + (alpha = ", alpha, ").\n", sep = "") cat("\n") - cat("Correspondingly the impact of an omitted variable (as defined in Frank 2000) must be ") + cat("Correspondingly the impact of an omitted variable + (as defined in Frank 2000) must be ") cat("\n") - cat(-r_con, " X ", r_con, " = ", -round(r_con^2, 3), " to invalidate an inference for a null hypothesis of 0 effect.\n", sep = "") + cat(-r_con, " X ", r_con, " = ", -round(r_con^2, 3), + " to invalidate an inference for a null hypothesis + of 0 effect.\n", sep = "") } else if (abs(obs_r) < abs(critical_r) & obs_r > 0) { - cat("The maximum impact (in absolute value) of an omitted variable to sustain an inference for a null hypothesis of 0 effect is based on a correlation of ", -r_con) + cat("The maximum impact (in absolute value) of an omitted variable + to sustain an inference for a null hypothesis of 0 effect is + based on a correlation of ", -r_con) cat("\n") cat(" with the outcome and at ", r_con, - " with the predictor of interest (conditioning on observed covariates; signs are interchangable) based on a threshold of ", round(beta_threshhold, 3)) + " with the predictor of interest (conditioning on observed + covariates; signs are interchangable) based on a threshold of ", + round(beta_threshhold, 3)) cat("\n") cat(" for statistical significance (alpha = ", alpha, ").\n", sep = "") cat("\n") - cat("Correspondingly the impact of an omitted variable (as defined in Frank 2000) must be ") + cat("Correspondingly the impact of an omitted variable + (as defined in Frank 2000) must be ") cat("\n") - cat(-r_con, " X ", r_con, " = ", -round(r_con^2, 3), " to sustain an inference for a null hypothesis of 0 effect.\n", sep = "") + cat(-r_con, " X ", r_con, " = ", -round(r_con^2, 3), + " to sustain an inference for a null hypothesis of 0 effect.\n", + sep = "") } else if (abs(obs_r) < abs(critical_r) & obs_r < 0) { - cat("The maximum impact of an omitted variable to sustain an inference for a null hypothesis of 0 effect is based on a correlation of ", r_con) + cat("The maximum impact of an omitted variable to sustain an + inference for a null hypothesis of 0 effect is based on a + correlation of ", r_con) cat("\n") cat(" with the outcome and at ", r_con, - " with the predictor of interest (conditioning on observed covariates) based on a threshold of ", round(beta_threshhold, 3)) + " with the predictor of interest (conditioning on observed + covariates) based on a threshold of ", round(beta_threshhold, 3)) cat("\n") cat(" for statistical significance (alpha = ", alpha, ").\n", sep = "") cat("\n") - cat("Correspondingly the impact of an omitted variable (as defined in Frank 2000) must be ") + cat("Correspondingly the impact of an omitted variable + (as defined in Frank 2000) must be ") cat("\n") - cat(r_con, " X ", r_con, " = ", round(r_con^2, 3), " to sustain an inference for a null hypothesis of 0 effect.\n", sep = "") + cat(r_con, " X ", r_con, " = ", round(r_con^2, 3), + " to sustain an inference for a null hypothesis of 0 effect.\n", + sep = "") } else if (obs_r == critical_r) { warning("The correlation is exactly equal to the threshold.\n") } @@ -95,7 +147,9 @@ output_print <- function(eff_diff, beta_threshhold, bias = NULL, sustain = NULL, cat("\n") cat("Frank, K. (2000). Impact of a confounding variable on the") cat("\n") - cat("inference of a regression coefficient.", crayon::italic("Sociological Methods and Research, 29"), "(2), 147-194") + cat("inference of a regression coefficient.", + crayon::italic("Sociological Methods and Research, 29"), + "(2), 147-194") cat("\n") } From 4fc720814d4c3bbe3c317088310cc112f20ca023 Mon Sep 17 00:00:00 2001 From: wwang93 <142034236+wwang93@users.noreply.github.com> Date: Sun, 19 Nov 2023 21:46:02 -0500 Subject: [PATCH 03/27] Update helper_output_table.R Manual modification: no more than 80 characters per line of code --- R/helper_output_table.R | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/R/helper_output_table.R b/R/helper_output_table.R index c3211f5a..254a7521 100644 --- a/R/helper_output_table.R +++ b/R/helper_output_table.R @@ -5,18 +5,34 @@ output_table <- function(model_object, tested_variable) { cat("Dependent variable is", p, "\n") model_output <- broom::tidy(model_object) # tidying output var_row <- model_output$term == tested_variable - model_output$itcv[var_row] <- abs(konfound(model_object, !!tested_variable, to_return = "raw_output")$itcv) + model_output$itcv[var_row] <- abs(konfound( + model_object, + !!tested_variable, to_return = "raw_output" + )$itcv) - covariate_names <- model_output$term[!(model_output$term %in% c("(Intercept)", tested_variable))] + covariate_names <- model_output$term[!( + model_output$term %in% c("(Intercept)", tested_variable) +)] - for (i in seq(covariate_names)) { - cov_row <- model_output$term == covariate_names[i] - d <- model_object$model - cor_df <- as.data.frame(stats::cor(d)) - model_output$impact[cov_row] <- round(abs(cor_df[cov_row, 1]) * abs(cor_df[cov_row, tested_variable]), 3) # r_zy * r_zx - } - model_output <- purrr::modify_if(model_output, is.numeric, round, digits = 3) +for (i in seq(covariate_names)) { + cov_row <- model_output$term == covariate_names[i] + d <- model_object$model + cor_df <- as.data.frame(stats::cor(d)) + model_output$impact[cov_row] <- round( + abs(cor_df[cov_row, 1]) * abs(cor_df[cov_row, tested_variable]), + 3 + ) # r_zy * r_zx +} + + + model_output <- purrr::modify_if( + model_output, + is.numeric, + round, + digits = 3 +) + return(model_output) } From eef4c4964afdaa42574d653aa65d435c5c425e71 Mon Sep 17 00:00:00 2001 From: wwang93 <142034236+wwang93@users.noreply.github.com> Date: Mon, 20 Nov 2023 00:18:24 -0500 Subject: [PATCH 04/27] Update helper_plot_correlation.R Manual modification: no more than 80 characters per line of code --- R/helper_plot_correlation.R | 127 +++++++++++++++++++++++++++++------- 1 file changed, 105 insertions(+), 22 deletions(-) diff --git a/R/helper_plot_correlation.R b/R/helper_plot_correlation.R index e622bc66..5a329b60 100644 --- a/R/helper_plot_correlation.R +++ b/R/helper_plot_correlation.R @@ -13,28 +13,111 @@ plot_correlation <- function(r_con, obs_r, critical_r) { p <- ggplot2::ggplot(d, ggplot2::aes_string(x = "x", y = "y")) + - # main arrows - ggplot2::geom_segment(ggplot2::aes(y = .1), xend = 0, yend = .9, arrow = ggplot2::arrow(), size = 2.5, color = "#A6CEE3") + # straight up - ggplot2::geom_segment(ggplot2::aes(x = .1), xend = 1, yend = .9, arrow = ggplot2::arrow(), size = 2.5, color = "#A6CEE3") + # hypotenuse - ggplot2::geom_segment(ggplot2::aes(x = .15, y = 1), xend = .9, yend = 1, arrow = ggplot2::arrow(), size = 2.5, color = "#A6CEE3") + # straight across - - # lagel annotations - ggplot2::annotate("text", x = 0, y = 0, label = paste0("Confounding\nVariable"), fontface = 3) + - ggplot2::annotate("text", x = 0, y = 1, label = paste0("Predictor of Interest"), fontface = 3) + - ggplot2::annotate("text", x = 1, y = 1, label = paste0("Outcome"), fontface = 3) + - - # CV arrows - # ggplot2::geom_segment(ggplot2::aes(x = .05, y = .25), xend = .275, yend = .65, arrow = ggplot2::arrow(), size = 2.5, color = "#1F78B4") + # straight across - # ggplot2::geom_segment(ggplot2::aes(x = .175, y = .15), xend = .3, yend = .625, arrow = ggplot2::arrow(), size = 2.5, color = "#1F78B4") + # straight across - ggplot2::geom_curve(ggplot2::aes(x = .04, y = .325, xend = .35, yend = .825), curvature = -.35, size = 2.5, color = "#1F78B4", arrow = ggplot2::arrow()) + - ggplot2::geom_curve(ggplot2::aes(x = .225, y = .23, xend = .4, yend = .8), curvature = .35, size = 2.5, color = "#1F78B4", arrow = ggplot2::arrow()) + - - ggplot2::geom_segment(ggplot2::aes(x = .37, y = .81), xend = .465, yend = .925, arrow = ggplot2::arrow(), size = 2.5, color = "#1F78B4") + # straight across - - # corr. annotations - ggplot2::annotate("text", x = -.125, y = .5, label = paste0("Rx.cv | Z =\n ", r_con), fontface = 1) + - ggplot2::annotate("text", x = .575, y = .35, label = paste0("Ry.cv | Z =\n", r_con), fontface = 1) + - ggplot2::annotate("text", x = .25, y = .525, label = paste0("Rx.cv | Z * Ry.cv | Z =\n", round(r_con^2, 3)), fontface = 1) + +# main arrows +ggplot2::geom_segment( + ggplot2::aes(y = .1), + xend = 0, + yend = .9, + arrow = ggplot2::arrow(), + size = 2.5, + color = "#A6CEE3" +) + # straight up +ggplot2::geom_segment( + ggplot2::aes(x = .1), + xend = 1, + yend = .9, + arrow = ggplot2::arrow(), + size = 2.5, + color = "#A6CEE3" +) + # hypotenuse +ggplot2::geom_segment( + ggplot2::aes(x = .15, y = 1), + xend = .9, + yend = 1, + arrow = ggplot2::arrow(), + size = 2.5, + color = "#A6CEE3" +) + # straight across + + +# label annotations +ggplot2::annotate( + "text", + x = 0, + y = 0, + label = paste0("Confounding\nVariable"), + fontface = 3 +) + +ggplot2::annotate( + "text", + x = 0, + y = 1, + label = paste0("Predictor of Interest"), + fontface = 3 +) + +ggplot2::annotate( + "text", + x = 1, + y = 1, + label = paste0("Outcome"), + fontface = 3 +) + + + +# CV arrows +# ggplot2::geom_segment(ggplot2::aes(x = .05, y = .25), xend = .275, +#yend = .65, arrow = ggplot2::arrow(), size = 2.5, color = "#1F78B4") + +# straight across +# ggplot2::geom_segment(ggplot2::aes(x = .175, y = .15), xend = .3, +#yend = .625, arrow = ggplot2::arrow(), size = 2.5, color = "#1F78B4") + + # straight across +ggplot2::geom_curve( + ggplot2::aes(x = .04, y = .325, xend = .35, yend = .825), + curvature = -.35, + size = 2.5, + color = "#1F78B4", + arrow = ggplot2::arrow() +) + +ggplot2::geom_curve( + ggplot2::aes(x = .225, y = .23, xend = .4, yend = .8), + curvature = .35, + size = 2.5, + color = "#1F78B4", + arrow = ggplot2::arrow() +) + +ggplot2::geom_segment( + ggplot2::aes(x = .37, y = .81), + xend = .465, + yend = .925, + arrow = ggplot2::arrow(), + size = 2.5, + color = "#1F78B4" +) + # straight across + + +# corr. annotations +ggplot2::annotate( + "text", + x = -.125, + y = .5, + label = paste0("Rx.cv | Z =\n ", r_con), + fontface = 1 +) + +ggplot2::annotate( + "text", + x = .575, + y = .35, + label = paste0("Ry.cv | Z =\n", r_con), + fontface = 1 +) + +ggplot2::annotate( + "text", + x = .25, + y = .525, + label = paste0("Rx.cv | Z * Ry.cv | Z =\n", round(r_con^2, 3)), + fontface = 1 +) + + # plot modifications ggplot2::xlim(-.15, 1.1) + From 64e1584208590285180a2edeed39e0021eac9d30 Mon Sep 17 00:00:00 2001 From: wwang93 <142034236+wwang93@users.noreply.github.com> Date: Mon, 20 Nov 2023 00:34:00 -0500 Subject: [PATCH 05/27] Update helper_plot_threshold.R Manual modification: no more than 80 characters per line of code --- R/helper_plot_threshold.R | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/R/helper_plot_threshold.R b/R/helper_plot_threshold.R index aa388c5f..63b3de35 100644 --- a/R/helper_plot_threshold.R +++ b/R/helper_plot_threshold.R @@ -27,7 +27,8 @@ plot_threshold <- function(beta_threshold, est_eff) { } else if (est_eff < beta_threshold) { # beta is below threshold dd <- dplyr::tibble(est_eff = est_eff, beta_threshold = beta_threshold) - dd <- dplyr::mutate(dd, `Above Estimated Effect, Below Threshold` = abs(est_eff - beta_threshold)) + dd <- dplyr::mutate( dd,`Above Estimated Effect, Below Threshold` + = abs(est_eff - beta_threshold)) dd <- dplyr::mutate(dd, `Below Threshold` = est_eff) dd <- dplyr::select(dd, -beta_threshold) @@ -43,22 +44,30 @@ plot_threshold <- function(beta_threshold, est_eff) { y_thresh_text <- y_thresh + sqrt(.005 * abs(y_thresh)) effect_text <- est_eff + sqrt(.025 * abs(est_eff)) # y-value of text - cols <- c("lightgray", "#1F78B4") # dark blue and green (green not used right now) + cols <- c("lightgray", "#1F78B4") + # dark blue and green (green not used right now) } p <- ggplot2::ggplot(dd, ggplot2::aes(x = inference, y = val, fill = key)) + ggplot2::geom_col(position = "stack") + ggplot2::geom_hline(yintercept = est_eff, color = "black") + - ggplot2::annotate("text", x = 1, y = effect_text, label = "Estimated Effect") + + ggplot2::annotate("text", x = 1, y = effect_text, + label = "Estimated Effect") + ggplot2::geom_hline(yintercept = y_thresh, color = "red") + - ggplot2::annotate("text", x = 1, y = y_thresh_text, label = "Threshold") + - # ggplot2::geom_text(aes(label = "Effect"), vjust = -.5) + this is discussed here: https://github.com/jrosen48/konfound/issues/5 + ggplot2::annotate("text", x = 1, y = y_thresh_text, + label = "Threshold") + + # ggplot2::geom_text(aes(label = "Effect"), vjust = -.5) + + #this is discussed here: https://github.com/jrosen48/konfound/issues/5 ggplot2::scale_fill_manual("", values = cols) + ggplot2::theme_bw() + - ggplot2::theme(axis.text.x = ggplot2::element_blank(), axis.ticks = ggplot2::element_blank()) + + ggplot2::theme( + axis.text.x = ggplot2::element_blank(), + axis.ticks = ggplot2::element_blank() +) + + ggplot2::xlab(NULL) + ggplot2::ylab("Effect (abs. value)") + ggplot2::theme(legend.position = "top") From b0e5adc594e19481752075ea741eb8c196427a5f Mon Sep 17 00:00:00 2001 From: wwang93 <142034236+wwang93@users.noreply.github.com> Date: Mon, 20 Nov 2023 00:36:31 -0500 Subject: [PATCH 06/27] Update konfound-glm-dichotomous.R Manual modification: no more than 80 characters per line of code --- R/konfound-glm-dichotomous.R | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/R/konfound-glm-dichotomous.R b/R/konfound-glm-dichotomous.R index 4aa1287b..6785e23e 100644 --- a/R/konfound-glm-dichotomous.R +++ b/R/konfound-glm-dichotomous.R @@ -1,7 +1,13 @@ # konfound-glm -konfound_glm_dichotomous <- function(model_object, tested_variable_string, test_all, alpha, tails, - to_return, n_treat, switch_trm, replace) { +konfound_glm_dichotomous <- function(model_object, + tested_variable_string, + test_all, + alpha, tails, + to_return, + n_treat, + switch_trm, + replace) { tidy_output <- broom::tidy(model_object) # tidying output glance_output <- broom::glance(model_object) From 68d4824b3a66c0d56212e176771899f8995c765f Mon Sep 17 00:00:00 2001 From: wwang93 <142034236+wwang93@users.noreply.github.com> Date: Mon, 20 Nov 2023 00:51:06 -0500 Subject: [PATCH 07/27] Update konfound-glm.R Manual modification: no more than 80 characters per line of code --- R/konfound-glm.R | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/R/konfound-glm.R b/R/konfound-glm.R index aef0941c..c65cc32e 100644 --- a/R/konfound-glm.R +++ b/R/konfound-glm.R @@ -1,6 +1,12 @@ # konfound-glm -konfound_glm <- function(model_object, tested_variable_string, test_all, alpha, tails, index = "RIR", to_return) { +konfound_glm <- function(model_object, + tested_variable_string, + test_all, + alpha, + tails, + index = "RIR", + to_return) { tidy_output <- broom::tidy(model_object) # tidying output glance_output <- broom::glance(model_object) @@ -8,11 +14,18 @@ konfound_glm <- function(model_object, tested_variable_string, test_all, alpha, coef_df <- tidy_output[tidy_output$term == tested_variable_string, ] } else { coef_df <- tidy_output[-1, ] - coef_df$est_eff <- suppressWarnings(summary(margins::margins(model_object))$AME[names(summary(margins::margins(model_object))$AME) == tested_variable_string]) + coef_df$est_eff <- suppressWarnings(summary( + margins::margins(model_object))$AME[ + names(summary( + margins::margins( + model_object))$AME) == tested_variable_string]) } # to remove intercept est_eff <- coef_df$estimate - est_eff <- suppressWarnings(summary(margins::margins(model_object))$AME[names(summary(margins::margins(model_object))$AME) == tested_variable_string]) + est_eff <- suppressWarnings(summary( + margins::margins(model_object))$AME[ + names(summary( + margins::margins(model_object))$AME) == tested_variable_string]) std_err <- coef_df$std.error n_obs <- glance_output$nobs n_covariates <- glance_output$df.null - glance_output$df.residual @@ -33,11 +46,14 @@ konfound_glm <- function(model_object, tested_variable_string, test_all, alpha, ) return(out) } else { - message("Note that this output is calculated based on the correlation-based approach used in mkonfound()") - stop("Multiple variables cannot presently be tested for models fit using glm(); this will be added in the future.") + message("Note that this output is calculated based on + the correlation-based approach used in mkonfound()") + stop("Multiple variables cannot presently be tested + for models fit using glm(); this will be added in the future.") d <- data.frame(t = est_eff / std_err, df = (n_obs - n_covariates - 1)) o <- mkonfound(d, .data$t, .data$df) - term_names <- dplyr::select(tidy_output, var_name = .data$term) # remove the first row for intercept + term_names <- dplyr::select(tidy_output, var_name = .data$term) + # remove the first row for intercept term_names <- dplyr::filter(term_names, .data$var_name != "(Intercept)") return(dplyr::bind_cols(term_names, o)) } From 5fd0bca206229d3097ff74b14a48276bdc588a5e Mon Sep 17 00:00:00 2001 From: wwang93 <142034236+wwang93@users.noreply.github.com> Date: Mon, 20 Nov 2023 00:53:19 -0500 Subject: [PATCH 08/27] Update konfound-lm.R Manual modification: no more than 80 characters per line of code --- R/konfound-lm.R | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/R/konfound-lm.R b/R/konfound-lm.R index ff2923b3..f0d9bd1b 100644 --- a/R/konfound-lm.R +++ b/R/konfound-lm.R @@ -1,6 +1,11 @@ # konfound-lm -konfound_lm <- function(model_object, tested_variable_string, test_all, alpha, tails, index, to_return) { +konfound_lm <- function(model_object, + tested_variable_string, + test_all, alpha, + tails, + index, + to_return) { tidy_output <- broom::tidy(model_object) # tidying output glance_output <- broom::glance(model_object) @@ -31,10 +36,12 @@ konfound_lm <- function(model_object, tested_variable_string, test_all, alpha, t ) return(out) } else { - message("Note that this output is calculated based on the correlation-based approach used in mkonfound()") + message("Note that this output is calculated based on + the correlation-based approach used in mkonfound()") d <- data.frame(t = est_eff / std_err, df = (n_obs - n_covariates - 1)) o <- mkonfound(d, .data$t, .data$df) - term_names <- dplyr::select(tidy_output, var_name = .data$term) # remove the first row for intercept + term_names <- dplyr::select(tidy_output, var_name = .data$term) + # remove the first row for intercept term_names <- dplyr::filter(term_names, .data$var_name != "(Intercept)") return(dplyr::bind_cols(term_names, o)) } From e2a806b8b23f50ef5203e95a442290c19360bea7 Mon Sep 17 00:00:00 2001 From: wwang93 <142034236+wwang93@users.noreply.github.com> Date: Mon, 20 Nov 2023 00:55:15 -0500 Subject: [PATCH 09/27] Update konfound-lmer.R Manual modification: no more than 80 characters per line of code --- R/konfound-lmer.R | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/R/konfound-lmer.R b/R/konfound-lmer.R index afb773d1..63906eac 100644 --- a/R/konfound-lmer.R +++ b/R/konfound-lmer.R @@ -3,12 +3,18 @@ get_kr_df <- function(model_object) { L <- diag(rep(1, length(lme4::fixef(model_object)))) L <- as.data.frame(L) - out <- suppressWarnings(purrr::map_dbl(L, pbkrtest::get_Lb_ddf, object = model_object)) + out <- suppressWarnings(purrr::map_dbl( + L, pbkrtest::get_Lb_ddf, object = model_object)) names(out) <- names(lme4::fixef(model_object)) out } -konfound_lmer <- function(model_object, tested_variable_string, test_all, alpha, tails, index, to_return) { +konfound_lmer <- function(model_object, + tested_variable_string, + test_all, alpha, + tails, + index, + to_return) { tidy_output <- broom.mixed::tidy(model_object) # tidying output if (test_all == FALSE) { From 1c0d2b479aeb5acc81b36e6193330be6c60348a4 Mon Sep 17 00:00:00 2001 From: wwang93 <142034236+wwang93@users.noreply.github.com> Date: Mon, 20 Nov 2023 10:22:18 -0500 Subject: [PATCH 10/27] Update konfound.R some comment need to lines break --- R/konfound.R | 49 ++++++++++++++++++++++++++++++++++++------------- 1 file changed, 36 insertions(+), 13 deletions(-) diff --git a/R/konfound.R b/R/konfound.R index d9b1c3c3..e9ea1a84 100644 --- a/R/konfound.R +++ b/R/konfound.R @@ -52,13 +52,18 @@ konfound <- function(model_object, # Stop messages if (!(class(model_object)[1] %in% c("lm", "glm", "lmerMod"))) { - stop("konfound() is currently implemented for models estimated with lm(), glm(), and lme4::lmer(); consider using pkonfound() instead") + stop("konfound() is currently implemented for models estimated with + lm(), glm(), and lme4::lmer(); consider using pkonfound() instead") } - if ("table" %in% to_return & test_all == TRUE) stop("cannot return a table when test_all is set to TRUE") - + if ("table" %in% to_return & test_all == TRUE){ + stop("cannot return a table when test_all is set to TRUE") + } # Dealing with non-standard evaluation - tested_variable_enquo <- rlang::enquo(tested_variable) # dealing with non-standard evaluation (so unquoted names for tested_variable can be used) + + # dealing with non-standard evaluation + #(so unquoted names for tested_variable can be used) + tested_variable_enquo <- rlang::enquo(tested_variable) tested_variable_string <- rlang::quo_name(tested_variable_enquo) # Dispatching based on class @@ -81,9 +86,14 @@ konfound <- function(model_object, } if (inherits(model_object, "glm") & two_by_two == FALSE) { - message("Note that for a non-linear model, impact threshold should not be interpreted.") - message("Note that this is only an approximation. For exact results in terms of the number of cases that must be switched from treatment success to treatment failure to invalidate the inference see: https://msu.edu/~kenfrank/non%20linear%20replacement%20treatment.xlsm") - message("If a dichotomous independent variable is used, consider using the 2X2 table approach enabled with the argument two_by_two = TRUE") + message("Note that for a non-linear model, + impact threshold should not be interpreted.") + message("Note that this is only an approximation. For exact results + in terms of the number of cases that must be switched from treatment + success to treatment failure to invalidate the inference see: + https://msu.edu/~kenfrank/non%20linear%20replacement%20treatment.xlsm") + message("If a dichotomous independent variable is used, consider using + the 2X2 table approach enabled with the argument two_by_two = TRUE") output <- konfound_glm( model_object = model_object, tested_variable_string = tested_variable_string, @@ -98,9 +108,13 @@ konfound <- function(model_object, if (inherits(model_object, "glm") & two_by_two == TRUE) { - if(is.null(n_treat)) stop("Please provide a value for n_treat to use this functionality with a dichotomous predictor") - if (test_all == TRUE) stop("test_all = TRUE is not supported when two_by_two is specified") - + if(is.null(n_treat)){ + stop("Please provide a value for + n_treat to use this functionality with a dichotomous predictor") + } + if (test_all == TRUE) { + stop("test_all = TRUE is not supported when two_by_two is specified") + } output <- konfound_glm_dichotomous( model_object = model_object, tested_variable_string = tested_variable_string, @@ -128,7 +142,14 @@ konfound <- function(model_object, to_return = to_return ) - message("Note that the Kenward-Roger approximation is used to estimate degrees of freedom for the predictor(s) of interest. We are presently working to add other methods for calculating the degrees of freedom for the predictor(s) of interest. If you wish to use other methods now, consider those detailed here: https://bbolker.github.io/mixedmodels-misc/glmmFAQ.html#why-doesnt-lme4-display-denominator-degrees-of-freedomp-values-what-other-options-do-i-have. You can then enter degrees of freedom obtained from another method along with the coefficient, number of observations, and number of covariates to the pkonfound() function to quantify the robustness of the inference.") + message("Note that the Kenward-Roger approximation is used to + estimate degrees of freedom for the predictor(s) of interest. + We are presently working to add other methods for calculating + the degrees of freedom for the predictor(s) of interest. + If you wish to use other methods now, consider those detailed here: + https://bbolker.github.io/mixedmodels-misc/glmmFAQ.html#why-doesnt-lme4-display-denominator-degrees-of-freedomp-values-what-other-options-do-i-have. + You can then enter degrees of freedom obtained from another method along with the coefficient, + number of observations, and number of covariates to the pkonfound() function to quantify the robustness of the inference.") return(output) } @@ -138,8 +159,10 @@ konfound <- function(model_object, } if (test_all == FALSE) { - message("To consider other predictors of interest, consider setting `test_all` to TRUE.") + message("To consider other predictors of interest, + consider setting `test_all` to TRUE.") } else { - message("Note that presently these predictors of interest are tested independently; future output may use the approach used in mkonfound.") + message("Note that presently these predictors of interest are tested + independently; future output may use the approach used in mkonfound.") } } From 470657531ec1b087433d3b51c4863955285b860b Mon Sep 17 00:00:00 2001 From: wwang93 <142034236+wwang93@users.noreply.github.com> Date: Mon, 20 Nov 2023 22:59:00 -0500 Subject: [PATCH 11/27] Update mkonfound.R Manual modification --- R/mkonfound.R | 40 +++++++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/R/mkonfound.R b/R/mkonfound.R index 5f926de3..5bed869e 100644 --- a/R/mkonfound.R +++ b/R/mkonfound.R @@ -1,13 +1,19 @@ #' Perform meta-analyses including sensitivity analysis -#' @description For fitted models, this command carries out sensitivity analysis for a number of models, when their parameters stored in a data.frame. -#' @param d data.frame or tibble with the t-statistics and associated degrees of freedom +#' @description For fitted models, this command carries out sensitivity analysis +#' for a number of models, when their parameters stored in a data.frame. +#' @param d data.frame or tibble with the t-statistics and +#' associated degrees of freedom #' @param t t-statistic or vector of t-statistics -#' @param df degrees of freedom or vector of degrees of freedom associated with the t-statistics in the t argument -#' @param return_plot whether to return a plot of the percent bias; defaults to FALSE +#' @param df degrees of freedom or vector of degrees of +#' freedom associated with the t-statistics in the t argument +#' @param return_plot whether to return a plot of the percent bias; +#' defaults to FALSE #' @inheritParams pkonfound #' @import rlang #' @import dplyr -#' @return prints the bias and the number of cases that would have to be replaced with cases for which there is no effect to invalidate the inference for each of the cases in the data.frame +#' @return prints the bias and the number of cases that would have to be +#' replaced with cases for which there is no effect to invalidate the inference +#' for each of the cases in the data.frame #' @examples #' \dontrun{ #' mkonfound_ex @@ -25,18 +31,23 @@ mkonfound <- function(d, t, df, alpha = .05, tails = 2, return_plot = FALSE) { df_vec <- pull(select(d, !!df_enquo)) if (length(t_vec) <= 1) { - stop("To carry out sensitivity analysis for a single study, use pkonfound()") + stop("To carry out sensitivity analysis + for a single study, use pkonfound()") } - results_df <- suppressWarnings(purrr::map2_dfr(.x = t_vec, .y = df_vec, .f = core_sensitivity_mkonfound)) - + results_df <- suppressWarnings( + purrr::map2_dfr( + .x = t_vec, .y = df_vec, .f = core_sensitivity_mkonfound) +) if (return_plot == TRUE) { results_df$action <- dplyr::case_when( results_df$action == "to_invalidate" ~ "To Invalidate", results_df$action == "to_sustain" ~ "To Sustain" ) - p <- ggplot2::ggplot(results_df, ggplot2::aes_string(x = "pct_bias_to_change_inference", fill = "action")) + + p <- ggplot2::ggplot(results_df, ggplot2::aes_string( + x = "pct_bias_to_change_inference", fill = "action" +)) + ggplot2::geom_histogram() + ggplot2::scale_fill_manual("", values = c("#1F78B4", "#A6CEE3")) + ggplot2::theme_bw() + @@ -86,8 +97,15 @@ core_sensitivity_mkonfound <- function(t, df, alpha = .05, tails = 2) { r_con <- round(sqrt(abs(itcv)), 3) out <- dplyr::data_frame(t, df, action, inference, pct_bias, itcv, r_con) - names(out) <- c("t", "df", "action", "inference", "pct_bias_to_change_inference", "itcv", "r_con") - out$pct_bias_to_change_inference <- round(out$pct_bias_to_change_inference, 3) + names(out) <- c("t", + "df", + "action", + "inference", + "pct_bias_to_change_inference", + "itcv", + "r_con") + out$pct_bias_to_change_inference <- round( + out$pct_bias_to_change_inference, 3) out$itcv <- round(out$itcv, 3) out$action <- as.character(out$action) out$inference <- as.character(out$inference) From c4cc779996cd7d91f19afab00631281b0d9dc76c Mon Sep 17 00:00:00 2001 From: wwang93 <142034236+wwang93@users.noreply.github.com> Date: Tue, 21 Nov 2023 09:12:21 -0500 Subject: [PATCH 12/27] Update nonlinear_auxiliary.R All manual modifications, except for the calculation formula --- R/nonlinear_auxiliary.R | 548 +++++++++++++++++++++++++++------------- 1 file changed, 371 insertions(+), 177 deletions(-) diff --git a/R/nonlinear_auxiliary.R b/R/nonlinear_auxiliary.R index 6b60f6b6..9ba87925 100644 --- a/R/nonlinear_auxiliary.R +++ b/R/nonlinear_auxiliary.R @@ -8,7 +8,7 @@ isinvalidate <- function(thr_t, ob_t) { return(x) } -# to evaluate what kind of switches we need - increase or decrease the odds ratio +#to evaluate what kind of switches we need-increase or decrease the odds ratio isdcroddsratio <- function(thr_t, ob_t) { if (thr_t < ob_t) { x <- T @@ -18,7 +18,8 @@ isdcroddsratio <- function(thr_t, ob_t) { return(x) } -# get the a cell (control failure) for given odds_ratio, se and treatment cases - first solution +# get the a cell (control failure) for given odds_ratio, +# se and treatment cases - first solution get_a1_kfnl <- function(odds_ratio, std_err, n_obs, n_trm) { a1 <- -(1 / (2 * (1 + odds_ratio^2 + odds_ratio * (-2 + n_trm * std_err^2)))) * (2 * n_trm * (-1 + odds_ratio) * odds_ratio + n_trm^2 * odds_ratio * std_err^2 - @@ -28,7 +29,8 @@ get_a1_kfnl <- function(odds_ratio, std_err, n_obs, n_trm) { return(a1) } -# get the c cell (treatment failure) for given odds_ratio, se and treatment cases - first solution +# get the c cell (treatment failure) for given odds_ratio, +# se and treatment cases - first solution get_c1_kfnl <- function(odds_ratio, std_err, n_obs, n_trm) { c1 <- -((-2 * n_trm + 2 * n_trm * odds_ratio - n_obs * n_trm * odds_ratio * std_err^2 + n_trm^2 * odds_ratio * std_err^2 + @@ -38,7 +40,8 @@ get_c1_kfnl <- function(odds_ratio, std_err, n_obs, n_trm) { return(c1) } -# get the a cell (control failure) for given odds_ratio, se and treatment cases - second solution +# get the a cell (control failure) for given odds_ratio, +# se and treatment cases - second solution get_a2_kfnl <- function(odds_ratio, std_err, n_obs, n_trm) { a2 <- (1 / (2 * (1 + odds_ratio^2 + odds_ratio * (-2 + n_trm * std_err^2)))) * (-2 * n_trm * (-1 + odds_ratio) * odds_ratio - n_trm^2 * odds_ratio * std_err^2 + @@ -48,7 +51,8 @@ get_a2_kfnl <- function(odds_ratio, std_err, n_obs, n_trm) { return(a2) } -# get the c cell (treatment failure) for given odds_ratio, se and treatment cases - second solution +# get the c cell (treatment failure) for given odds_ratio, +# se and treatment cases - second solution get_c2_kfnl <- function(odds_ratio, std_err, n_obs, n_trm) { c2 <- (2 * n_trm - 2 * n_trm * odds_ratio + n_obs * n_trm * odds_ratio * std_err^2 - n_trm^2 * odds_ratio * std_err^2 + @@ -75,8 +79,10 @@ taylorexp <- function(a, b, c, d, q, thr) { d1square <- Num1 / Den1 - Num2 / Den2 # d1unsquare is the first derivative of the unsquared term d1unsquare <- d1square / (2 * log(a * (d - q) / (b * (c + q))) / sqrt(1 / a + 1 / b + 1 / (c + q) + 1 / (d - q))) - # x is the number of cases need to be replaced solved based on the taylor expansion - # this is the (linear approximation) of the original/unsquared term around the value of q + # x is the number of cases need to be replaced + # solved based on the taylor expansion + # this is the (linear approximation) of the + # original/unsquared term around the value of q x <- (thr - log(a * (d - q) / (b * (c + q))) / sqrt((1 / a + 1 / b + 1 / (c + q) + 1 / (d - q)))) / d1unsquare + q return(x) } @@ -129,7 +135,9 @@ getswitch <- function(table_bstart, thr_t, switch_trm, n_obs) { b_tryall <- b - (b - 1) * (1 - as.numeric(switch_trm)) tryall_t <- get_t_kfnl(a_tryall, b_tryall, c_tryall, d_tryall) tryall_est <- log(a_tryall*d_tryall/c_tryall/b_tryall) - allnotenough <- isTRUE(thr_t - tryall_t > 0 & tryall_est*est_eff_start > 0) + allnotenough <- isTRUE( + thr_t - tryall_t > 0 & tryall_est*est_eff_start > 0 + ) } if (t_start > thr_t) { # transfer cases from A to B or D to C to decrease odds ratio @@ -139,7 +147,8 @@ getswitch <- function(table_bstart, thr_t, switch_trm, n_obs) { b_tryall <- b + (a - 1) * (1 - as.numeric(switch_trm)) tryall_t <- get_t_kfnl(a_tryall, b_tryall, c_tryall, d_tryall) tryall_est <- log(a_tryall*d_tryall/c_tryall/b_tryall) - allnotenough <- isTRUE(tryall_t - thr_t > 0 & tryall_est*est_eff_start > 0) + allnotenough <- isTRUE( + tryall_t - thr_t > 0 & tryall_est*est_eff_start > 0) } ### run following if transfering one row is enough @@ -165,20 +174,23 @@ getswitch <- function(table_bstart, thr_t, switch_trm, n_obs) { ### calculate predicted switches based on Taylor expansion if (switch_trm) { - taylor_pred <- abs(taylorexp(a, b, c, d, step * perc_bias_pred, thr_t)) + taylor_pred <- abs( + taylorexp(a, b, c, d, step * perc_bias_pred, thr_t)) a_taylor <- round(a) b_taylor <- round(b) c_taylor <- round(c + taylor_pred * step) d_taylor <- round(d - taylor_pred * step) } else { - taylor_pred <- abs(taylorexp(d, c, b, a, step * perc_bias_pred, thr_t)) + taylor_pred <- abs( + taylorexp(d, c, b, a, step * perc_bias_pred, thr_t)) a_taylor <- round(a - taylor_pred * step) b_taylor <- round(b + taylor_pred * step) c_taylor <- round(c) d_taylor <- round(d) } - ### check whether taylor_pred move too many and causes non-positive odds ratio + ### check whether taylor_pred move too many and + ### causes non-positive odds ratio if (a_taylor <= 0) { b_taylor <- a_taylor + b_taylor - 1 a_taylor <- 1 @@ -205,9 +217,11 @@ getswitch <- function(table_bstart, thr_t, switch_trm, n_obs) { t_loop <- t_taylor } - ### when we need to transfer two rows the previously defined tryall are the starting point for brute force + ### when we need to transfer two rows the previously defined + ### tryall are the starting point for brute force if (allnotenough) { - ### Later: set tryall as the starting point and call this getswitch function again + ### Later: set tryall as the starting point and + ### call this getswitch function again a_loop <- a_tryall b_loop <- b_tryall c_loop <- c_tryall @@ -263,7 +277,8 @@ getswitch <- function(table_bstart, thr_t, switch_trm, n_obs) { est_eff_final <- log(a_final * d_final / (b_final * c_final)) std_err_final <- sqrt(1 / a_final + 1 / b_final + 1 / c_final + 1 / d_final) t_final <- est_eff_final / std_err_final - table_final <- matrix(c(a_final, b_final, c_final, d_final), byrow = TRUE, 2, 2) + table_final <- matrix( + c(a_final, b_final, c_final, d_final), byrow = TRUE, 2, 2) if (switch_trm == allnotenough) { final <- abs(a - a_final) + as.numeric(allnotenough) * abs(c - c_final) } else { @@ -289,9 +304,12 @@ getswitch <- function(table_bstart, thr_t, switch_trm, n_obs) { ### return result result <- list( - final_switch = final, table_start = table_start, table_final = table_final, est_eff_start = est_eff_start, - est_eff_final = est_eff_final, std_err_start = std_err_start, std_err_final = std_err_final, - t_start = t_start, t_final = t_final, taylor_pred = taylor_pred, perc_bias_pred = perc_bias_pred, + final_switch = final, table_start = table_start, + table_final = table_final, est_eff_start = est_eff_start, + est_eff_final = est_eff_final, std_err_start = std_err_start, + std_err_final = std_err_final, + t_start = t_start, t_final = t_final, taylor_pred = taylor_pred, + perc_bias_pred = perc_bias_pred, step = step, needtworows = allnotenough, final_extra = final_extra ) @@ -354,11 +372,13 @@ table_ob <- matrix(c(a, b, c, d), byrow = TRUE, 2, 2) p_ob <- chisq_p(a, b, c, d) chisq_ob <- chisq_value(a, b, c, d) -# to evaluate whether we are moving cases to invalidate or sustain the inference +# to evaluate whether we are moving cases to +# invalidate or sustain the inference if (p_ob < thr_p){isinvalidate_ob <- TRUE} if (p_ob > thr_p){isinvalidate_ob <- FALSE} -# to evaluate what kind of switches we need - increase or decrease the odds ratio +# to evaluate what kind of switches we need-increase +#or decrease the odds ratio if (odds_ratio >= 1) {dcroddsratio_ob <- isinvalidate_ob} if (odds_ratio < 1) {dcroddsratio_ob <- !isinvalidate_ob} @@ -388,7 +408,8 @@ if (!dcroddsratio_start) { b_tryall <- b - (b - 1) * (1 - as.numeric(switch_trm)) tryall_p <- chisq_p(a_tryall, b_tryall, c_tryall, d_tryall) tryall_est <- log(a_tryall*d_tryall/c_tryall/b_tryall) - allnotenough <- isTRUE((thr_p-tryall_p)*tryall_est< 0 & tryall_est*est > 0) + allnotenough <- isTRUE( + (thr_p-tryall_p)*tryall_est< 0 & tryall_est*est > 0) } if (dcroddsratio_start ) { # transfer cases from A to B or D to C to decrease odds ratio @@ -398,7 +419,8 @@ if (dcroddsratio_start ) { b_tryall <- b + (a - 1) * (1 - as.numeric(switch_trm)) tryall_p <- chisq_p(a_tryall, b_tryall, c_tryall, d_tryall) tryall_est <- log(a_tryall*d_tryall/c_tryall/b_tryall) - allnotenough <- isTRUE((thr_p-tryall_p)*tryall_est> 0 & tryall_est*est > 0) + allnotenough <- isTRUE( + (thr_p-tryall_p)*tryall_est> 0 & tryall_est*est > 0) } ### run following if transfering one row is enough @@ -424,20 +446,23 @@ if (!allnotenough) { ### calculate predicted switches based on Taylor expansion if (switch_trm) { - taylor_pred <- abs(taylorexp(a, b, c, d, step * perc_bias_pred, thr_t)) + taylor_pred <- abs( + taylorexp(a, b, c, d, step * perc_bias_pred, thr_t)) a_taylor <- round(a) b_taylor <- round(b) c_taylor <- round(c + taylor_pred * step) d_taylor <- round(d - taylor_pred * step) } else { - taylor_pred <- abs(taylorexp(d, c, b, a, step * perc_bias_pred, thr_t)) + taylor_pred <- abs( + taylorexp(d, c, b, a, step * perc_bias_pred, thr_t)) a_taylor <- round(a - taylor_pred * step) b_taylor <- round(b + taylor_pred * step) c_taylor <- round(c) d_taylor <- round(d) } - ### check whether taylor_pred move too many and causes non-positive odds ratio + ### check whether taylor_pred move too many + ### and causes non-positive odds ratio if (a_taylor <= 0) { b_taylor <- a_taylor + b_taylor - 1 a_taylor <- 1 @@ -465,9 +490,11 @@ if (!allnotenough) { t_loop <- get_t_kfnl(a_loop, b_loop, c_loop, d_loop) } -### when we need to transfer two rows the previously defined tryall are the starting point for brute force +### when we need to transfer two rows the previously defined +### tryall are the starting point for brute force if (allnotenough) { - ### Later: set tryall as the starting point and call this getswitch function again + ### Later: set tryall as the starting point + ### and call this getswitch function again a_loop <- a_tryall b_loop <- b_tryall c_loop <- c_tryall @@ -487,8 +514,10 @@ if (t_loop < thr_t) { } ### make a small adjustment to make it just below/above the thresold if (t_start > thr_t) { - c_loopsec <- c_loop + 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loop - 1 * (1 - as.numeric(switch_trm == allnotenough)) + c_loopsec <- c_loop + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loop - + 1 * (1 - as.numeric(switch_trm == allnotenough)) a_loopsec <- a_loop - 1 * as.numeric(switch_trm == allnotenough) b_loopsec <- b_loop + 1 * as.numeric(switch_trm == allnotenough) } else if (t_start < thr_t) { @@ -508,8 +537,10 @@ if (t_loop > thr_t) { t_loop <- get_t_kfnl(a_loop, b_loop, c_loop, d_loop) } if (t_start < thr_t) { - c_loopsec <- c_loop - 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loop + 1 * (1 - as.numeric(switch_trm == allnotenough)) + c_loopsec <- c_loop - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loop + + 1 * (1 - as.numeric(switch_trm == allnotenough)) a_loopsec <- a_loop + 1 * as.numeric(switch_trm == allnotenough) b_loopsec <- b_loop - 1 * as.numeric(switch_trm == allnotenough) } else if (t_start > thr_t) { @@ -522,15 +553,21 @@ if (t_loop > thr_t) { p_loopsec <- chisq_p(a_loopsec, b_loopsec, c_loopsec, d_loopsec) -### start 2nd round brute force - use fisher test p value as evaluation criterion -#### scenario 1 need to reduce odds ratio to invalidate the inference-need to increase p +### start 2nd round brute force - use fisher +###test p value as evaluation criterion +#### scenario 1 need to reduce odds ratio to +###invalidate the inference-need to increase p if (isinvalidate_start & dcroddsratio_start){ if (p_loopsec < thr_p) { while (p_loopsec < thr_p) { - c_loopsec <- c_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_loopsec <- a_loopsec - 1 * as.numeric(switch_trm == allnotenough) - b_loopsec <- b_loopsec + 1 * as.numeric(switch_trm == allnotenough) + c_loopsec <- c_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- + d_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_loopsec <- a_loopsec - + 1 * as.numeric(switch_trm == allnotenough) + b_loopsec <- b_loopsec + + 1 * as.numeric(switch_trm == allnotenough) p_loopsec <- chisq_p(a_loopsec, b_loopsec, c_loopsec, d_loopsec) } c_final <- c_loopsec @@ -540,41 +577,63 @@ if (isinvalidate_start & dcroddsratio_start){ } if (p_loopsec > thr_p){ #taylor too much, return some odds ratio while (p_loopsec > thr_p) { - c_loopsec <- c_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_loopsec <- a_loopsec + 1 * as.numeric(switch_trm == allnotenough) - b_loopsec <- b_loopsec - 1 * as.numeric(switch_trm == allnotenough) + c_loopsec <- c_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_loopsec <- a_loopsec + + 1 * as.numeric(switch_trm == allnotenough) + b_loopsec <- b_loopsec - + 1 * as.numeric(switch_trm == allnotenough) p_loopsec <- chisq_p(a_loopsec, b_loopsec, c_loopsec, d_loopsec) } - c_final <- c_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_final <- d_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) + c_final <- c_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_final <- d_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) a_final <- a_loopsec - 1 * as.numeric(switch_trm == allnotenough) b_final <- b_loopsec + 1 * as.numeric(switch_trm == allnotenough) } } -#### scenario 2 need to reduce odds ratio to sustain the inference-need to reduce p +#### scenario 2 need to reduce odds ratio to +#### sustain the inference-need to reduce p if (!isinvalidate_start & dcroddsratio_start) { if (p_loopsec < thr_p) { # taylor too much, return some odds ratio while (p_loopsec < thr_p) { - c_loopsec <- c_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_loopsec <- a_loopsec + 1 * as.numeric(switch_trm == allnotenough) - b_loopsec <- b_loopsec - 1 * as.numeric(switch_trm == allnotenough) + c_loopsec <- c_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_loopsec <- a_loopsec + + 1 * as.numeric(switch_trm == allnotenough) + b_loopsec <- b_loopsec - + 1 * as.numeric(switch_trm == allnotenough) p_loopsec <- chisq_p(a_loopsec, b_loopsec, c_loopsec, d_loopsec) } - c_final <- c_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_final <- d_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_final <- a_loopsec - 1 * as.numeric(switch_trm == allnotenough) - b_final <- b_loopsec + 1 * as.numeric(switch_trm == allnotenough) + c_final <- c_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_final <- d_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_final <- a_loopsec - + 1 * as.numeric(switch_trm == allnotenough) + b_final <- b_loopsec + + 1 * as.numeric(switch_trm == allnotenough) } - if (p_loopsec > thr_p){ # taylor not enough, continue to reduce odds ratio + # taylor not enough, continue to reduce odds ratio + if (p_loopsec > thr_p){ while (p_loopsec > thr_p) { - c_loopsec <- c_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_loopsec <- a_loopsec - 1 * as.numeric(switch_trm == allnotenough) - b_loopsec <- b_loopsec + 1 * as.numeric(switch_trm == allnotenough) - p_loopsec <- chisq_p(a_loopsec, b_loopsec, c_loopsec, d_loopsec) + c_loopsec <- c_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_loopsec <- a_loopsec - + 1 * as.numeric(switch_trm == allnotenough) + b_loopsec <- b_loopsec + + 1 * as.numeric(switch_trm == allnotenough) + p_loopsec <- chisq_p( + a_loopsec, b_loopsec, c_loopsec, d_loopsec + ) } c_final <- c_loopsec d_final <- d_loopsec @@ -583,14 +642,20 @@ if (!isinvalidate_start & dcroddsratio_start) { } } -#### scenario 3 need to increase odds ratio to invalidate the inference-need to increase p +#### scenario 3 need to increase odds ratio to +#### invalidate the inference-need to increase p if (isinvalidate_start & !dcroddsratio_start){ - if (p_loopsec < thr_p){ #taylor not enough, continue to increase odds ratio + #taylor not enough, continue to increase odds ratio + if (p_loopsec < thr_p){ while (p_loopsec < thr_p) { - c_loopsec <- c_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_loopsec <- a_loopsec + 1 * as.numeric(switch_trm == allnotenough) - b_loopsec <- b_loopsec - 1 * as.numeric(switch_trm == allnotenough) + c_loopsec <- c_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_loopsec <- a_loopsec + + 1 * as.numeric(switch_trm == allnotenough) + b_loopsec <- b_loopsec - + 1 * as.numeric(switch_trm == allnotenough) p_loopsec <- chisq_p(a_loopsec, b_loopsec, c_loopsec, d_loopsec) } c_final <- c_loopsec @@ -598,29 +663,42 @@ if (isinvalidate_start & !dcroddsratio_start){ a_final <- a_loopsec b_final <- b_loopsec } - if (p_loopsec > thr_p){#taylor too much, returns some odds ratio - decrease + #taylor too much, returns some odds ratio - decrease + if (p_loopsec > thr_p){ while(p_loopsec > thr_p) { - c_loopsec <- c_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_loopsec <- a_loopsec - 1 * as.numeric(switch_trm == allnotenough) - b_loopsec <- b_loopsec + 1 * as.numeric(switch_trm == allnotenough) + c_loopsec <- c_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_loopsec <- a_loopsec - + 1 * as.numeric(switch_trm == allnotenough) + b_loopsec <- b_loopsec + + 1 * as.numeric(switch_trm == allnotenough) p_loopsec <- chisq_p(a_loopsec, b_loopsec, c_loopsec, d_loopsec) } - c_final <- c_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_final <- d_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) + c_final <- c_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_final <- d_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) a_final <- a_loopsec + 1 * as.numeric(switch_trm == allnotenough) b_final <- b_loopsec - 1 * as.numeric(switch_trm == allnotenough) } } -#### scenario 4 need to increase odds ratio to sustain the inference-need to decrease p +#### scenario 4 need to increase odds ratio +#### to sustain the inference-need to decrease p if (!isinvalidate_start & !dcroddsratio_start){ - if (p_loopsec > thr_p){#taylor not enough, continue to increase odds ratio + #taylor not enough, continue to increase odds ratio + if (p_loopsec > thr_p){ while (p_loopsec > thr_p){ - c_loopsec <- c_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_loopsec <- a_loopsec + 1 * as.numeric(switch_trm == allnotenough) - b_loopsec <- b_loopsec - 1 * as.numeric(switch_trm == allnotenough) + c_loopsec <- c_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_loopsec <- a_loopsec + + 1 * as.numeric(switch_trm == allnotenough) + b_loopsec <- b_loopsec - + 1 * as.numeric(switch_trm == allnotenough) p_loopsec <- chisq_p(a_loopsec, b_loopsec, c_loopsec, d_loopsec) } c_final <- c_loopsec @@ -628,23 +706,34 @@ if (!isinvalidate_start & !dcroddsratio_start){ a_final <- a_loopsec b_final <- b_loopsec } - if (p_loopsec < thr_p){#taylor too much, return some odds ratio - decrease + #taylor too much, return some odds ratio - decrease + if (p_loopsec < thr_p){ while (p_loopsec < thr_p){ - c_loopsec <- c_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_loopsec <- a_loopsec - 1 * as.numeric(switch_trm == allnotenough) - b_loopsec <- b_loopsec + 1 * as.numeric(switch_trm == allnotenough) + c_loopsec <- c_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_loopsec <- a_loopsec - + 1 * as.numeric(switch_trm == allnotenough) + b_loopsec <- b_loopsec + + 1 * as.numeric(switch_trm == allnotenough) p_loopsec <- chisq_p(a_loopsec, b_loopsec, c_loopsec, d_loopsec) } - c_final <- c_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_final <- d_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_final <- a_loopsec + 1 * as.numeric(switch_trm == allnotenough) - b_final <- b_loopsec - 1 * as.numeric(switch_trm == allnotenough) + c_final <- c_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_final <- d_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_final <- a_loopsec + + 1 * as.numeric(switch_trm == allnotenough) + b_final <- b_loopsec - + 1 * as.numeric(switch_trm == allnotenough) } } ### so the final results (after switching) is as follows: -table_final <- matrix(c(a_final, b_final, c_final, d_final), byrow = TRUE, 2, 2) +table_final <- matrix( + c(a_final, b_final, c_final, d_final), byrow = TRUE, 2, 2 +) p_final <- chisq_p(a_final, b_final, c_final, d_final) chisq_final <- chisq_value(a_final, b_final, c_final, d_final) @@ -669,11 +758,18 @@ if (allnotenough) { total_switch <- final + allnotenough*final_extra -result <- list(final_switch = final, User_enter_value = table_start, Transfer_Table = table_final, - p_final = p_final, chisq_final = chisq_final, - needtworows=allnotenough, taylor_pred = taylor_pred, - perc_bias_pred = perc_bias_pred, final_extra = final_extra, - dcroddsratio_ob = dcroddsratio_ob, total_switch = total_switch, isinvalidate_ob = isinvalidate_ob) +result <- list(final_switch = final, + User_enter_value = table_start, + Transfer_Table = table_final, + p_final = p_final, + chisq_final = chisq_final, + needtworows=allnotenough, + taylor_pred = taylor_pred, + perc_bias_pred = perc_bias_pred, + final_extra = final_extra, + dcroddsratio_ob = dcroddsratio_ob, + total_switch = total_switch, + isinvalidate_ob = isinvalidate_ob) return(result) } @@ -693,11 +789,13 @@ getswitch_fisher <- function(a, b, c, d, thr_p = 0.05, switch_trm = T){ table_ob <- matrix(c(a, b, c, d), byrow = TRUE, 2, 2) p_ob <- fisher_p(a, b, c, d) - # to evaluate whther we are moving cases to invalidate or sustain the inference + # to evaluate whther we are moving cases + # to invalidate or sustain the inference if (p_ob < thr_p){isinvalidate_ob <- TRUE} if (p_ob > thr_p){isinvalidate_ob <- FALSE} - # to evaluate what kind of switches we need - increase or decrease the odds ratio + # to evaluate what kind of switches + # we need-increase or decrease the odds ratio if (odds_ratio >= 1) {dcroddsratio_ob <- isinvalidate_ob} if (odds_ratio < 1) {dcroddsratio_ob <- !isinvalidate_ob} @@ -732,7 +830,9 @@ getswitch_fisher <- function(a, b, c, d, thr_p = 0.05, switch_trm = T){ if (d_tryall == 0) {d_tryall <- d_tryall + 0.5} tryall_p <- fisher_p(a_tryall, b_tryall, c_tryall, d_tryall) tryall_est <- log(a_tryall*d_tryall/c_tryall/b_tryall) - allnotenough <- isTRUE((thr_p-tryall_p)*tryall_est< 0 & tryall_est*est > 0) + allnotenough <- isTRUE( + (thr_p-tryall_p)*tryall_est< 0 & tryall_est*est > 0 + ) } if (dcroddsratio_start ) { # transfer cases from A to B or D to C to decrease odds ratio @@ -746,7 +846,9 @@ getswitch_fisher <- function(a, b, c, d, thr_p = 0.05, switch_trm = T){ if (d_tryall == 0) {d_tryall <- d_tryall + 0.5} tryall_p <- fisher_p(a_tryall, b_tryall, c_tryall, d_tryall) tryall_est <- log(a_tryall*d_tryall/c_tryall/b_tryall) - allnotenough <- isTRUE((thr_p-tryall_p)*tryall_est> 0 & tryall_est*est > 0) + allnotenough <- isTRUE( + (thr_p-tryall_p)*tryall_est> 0 & tryall_est*est > 0 + ) } ### run following if transfering one row is enough @@ -772,20 +874,25 @@ getswitch_fisher <- function(a, b, c, d, thr_p = 0.05, switch_trm = T){ ### calculate predicted switches based on Taylor expansion if (switch_trm) { - taylor_pred <- abs(taylorexp(a, b, c, d, step * perc_bias_pred, thr_t)) + taylor_pred <- abs( + taylorexp(a, b, c, d, step * perc_bias_pred, thr_t) + ) a_taylor <- round(a) b_taylor <- round(b) c_taylor <- round(c + taylor_pred * step) d_taylor <- round(d - taylor_pred * step) } else { - taylor_pred <- abs(taylorexp(d, c, b, a, step * perc_bias_pred, thr_t)) + taylor_pred <- abs( + taylorexp(d, c, b, a, step * perc_bias_pred, thr_t) + ) a_taylor <- round(a - taylor_pred * step) b_taylor <- round(b + taylor_pred * step) c_taylor <- round(c) d_taylor <- round(d) } - ### check whether taylor_pred move too many and causes non-positive odds ratio + ### check whether taylor_pred move too many and + ### causes non-positive odds ratio if (a_taylor < 0) { b_taylor <- a_taylor + b_taylor a_taylor <- 0 @@ -813,9 +920,11 @@ getswitch_fisher <- function(a, b, c, d, thr_p = 0.05, switch_trm = T){ t_loop <- get_t_kfnl(a_loop, b_loop, c_loop, d_loop) } - ### when we need to transfer two rows the previously defined tryall are the starting point for brute force + ### when we need to transfer two rows the previously defined + ### tryall are the starting point for brute force if (allnotenough) { - ### Later: set tryall as the starting point and call this getswitch function again + ### Later: set tryall as the starting point and + ### call this getswitch function again a_loop <- a_tryall b_loop <- b_tryall c_loop <- c_tryall @@ -839,8 +948,10 @@ getswitch_fisher <- function(a, b, c, d, thr_p = 0.05, switch_trm = T){ } ### make a small adjustment to make it just below/above the thresold if (t_start > thr_t) { - c_loopsec <- c_loop + 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loop - 1 * (1 - as.numeric(switch_trm == allnotenough)) + c_loopsec <- c_loop + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loop - + 1 * (1 - as.numeric(switch_trm == allnotenough)) a_loopsec <- a_loop - 1 * as.numeric(switch_trm == allnotenough) b_loopsec <- b_loop + 1 * as.numeric(switch_trm == allnotenough) } else if (t_start < thr_t) { @@ -864,8 +975,10 @@ getswitch_fisher <- function(a, b, c, d, thr_p = 0.05, switch_trm = T){ t_loop <- get_t_kfnl(a_loop, b_loop, c_loop, d_loop) } if (t_start < thr_t) { - c_loopsec <- c_loop - 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loop + 1 * (1 - as.numeric(switch_trm == allnotenough)) + c_loopsec <- c_loop - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loop + + 1 * (1 - as.numeric(switch_trm == allnotenough)) a_loopsec <- a_loop + 1 * as.numeric(switch_trm == allnotenough) b_loopsec <- b_loop - 1 * as.numeric(switch_trm == allnotenough) } else if (t_start > thr_t) { @@ -878,59 +991,94 @@ getswitch_fisher <- function(a, b, c, d, thr_p = 0.05, switch_trm = T){ p_loopsec <- fisher_p(a_loopsec, b_loopsec, c_loopsec, d_loopsec) - ### start 2nd round brute force - use fisher test p value as evaluation criterion - #### scenario 1 need to reduce odds ratio to invalidate the inference-need to increase p + ### start 2nd round brute force - use fisher test + ### p value as evaluation criterion + #### scenario 1 need to reduce odds ratio to + ### invalidate the inference-need to increase p if (isinvalidate_start & dcroddsratio_start){ if (p_loopsec < thr_p) { while (p_loopsec < thr_p) { - c_loopsec <- c_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_loopsec <- a_loopsec - 1 * as.numeric(switch_trm == allnotenough) - b_loopsec <- b_loopsec + 1 * as.numeric(switch_trm == allnotenough) - p_loopsec <- fisher_p(a_loopsec, b_loopsec, c_loopsec, d_loopsec) + c_loopsec <- c_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_loopsec <- a_loopsec - + 1 * as.numeric(switch_trm == allnotenough) + b_loopsec <- b_loopsec + + 1 * as.numeric(switch_trm == allnotenough) + p_loopsec <- fisher_p( + a_loopsec, b_loopsec, c_loopsec, d_loopsec + ) } c_final <- c_loopsec d_final <- d_loopsec a_final <- a_loopsec b_final <- b_loopsec } - if (p_loopsec > thr_p){ #taylor too much, return some odds ratio + #taylor too much, return some odds ratio + if (p_loopsec > thr_p){ while (p_loopsec > thr_p) { - c_loopsec <- c_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_loopsec <- a_loopsec + 1 * as.numeric(switch_trm == allnotenough) - b_loopsec <- b_loopsec - 1 * as.numeric(switch_trm == allnotenough) - p_loopsec <- fisher_p(a_loopsec, b_loopsec, c_loopsec, d_loopsec) + c_loopsec <- c_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_loopsec <- a_loopsec + + 1 * as.numeric(switch_trm == allnotenough) + b_loopsec <- b_loopsec - + 1 * as.numeric(switch_trm == allnotenough) + p_loopsec <- fisher_p( + a_loopsec, b_loopsec, c_loopsec, d_loopsec + ) } - c_final <- c_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_final <- d_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) + c_final <- c_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_final <- d_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) a_final <- a_loopsec - 1 * as.numeric(switch_trm == allnotenough) b_final <- b_loopsec + 1 * as.numeric(switch_trm == allnotenough) } } - #### scenario 2 need to reduce odds ratio to sustain the inference-need to reduce p + #### scenario 2 need to reduce odds ratio + ### to sustain the inference-need to reduce p if (!isinvalidate_start & dcroddsratio_start) { if (p_loopsec < thr_p) { # taylor too much, return some odds ratio while (p_loopsec < thr_p) { - c_loopsec <- c_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_loopsec <- a_loopsec + 1 * as.numeric(switch_trm == allnotenough) - b_loopsec <- b_loopsec - 1 * as.numeric(switch_trm == allnotenough) - p_loopsec <- fisher_p(a_loopsec, b_loopsec, c_loopsec, d_loopsec) + c_loopsec <- c_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_loopsec <- a_loopsec + + 1 * as.numeric(switch_trm == allnotenough) + b_loopsec <- b_loopsec - + 1 * as.numeric(switch_trm == allnotenough) + p_loopsec <- fisher_p( + a_loopsec, b_loopsec, c_loopsec, d_loopsec + ) } - c_final <- c_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_final <- d_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_final <- a_loopsec - 1 * as.numeric(switch_trm == allnotenough) - b_final <- b_loopsec + 1 * as.numeric(switch_trm == allnotenough) + c_final <- c_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_final <- d_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_final <- a_loopsec - + 1 * as.numeric(switch_trm == allnotenough) + b_final <- b_loopsec + + 1 * as.numeric(switch_trm == allnotenough) } - if (p_loopsec > thr_p){ # taylor not enough, continue to reduce odds ratio + # taylor not enough, continue to reduce odds ratio + if (p_loopsec > thr_p){ while (p_loopsec > thr_p) { - c_loopsec <- c_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_loopsec <- a_loopsec - 1 * as.numeric(switch_trm == allnotenough) - b_loopsec <- b_loopsec + 1 * as.numeric(switch_trm == allnotenough) - p_loopsec <- fisher_p(a_loopsec, b_loopsec, c_loopsec, d_loopsec) + c_loopsec <- c_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_loopsec <- a_loopsec - + 1 * as.numeric(switch_trm == allnotenough) + b_loopsec <- b_loopsec + + 1 * as.numeric(switch_trm == allnotenough) + p_loopsec <- fisher_p( + a_loopsec, b_loopsec, c_loopsec, d_loopsec + ) } c_final <- c_loopsec d_final <- d_loopsec @@ -939,68 +1087,108 @@ getswitch_fisher <- function(a, b, c, d, thr_p = 0.05, switch_trm = T){ } } - #### scenario 3 need to increase odds ratio to invalidate the inference-need to increase p + #### scenario 3 need to increase odds + ### ratio to invalidate the inference-need to increase p if (isinvalidate_start & !dcroddsratio_start){ - if (p_loopsec < thr_p){ #taylor not enough, continue to increase odds ratio + #taylor not enough, continue to increase odds ratio + if (p_loopsec < thr_p){ while (p_loopsec < thr_p) { - c_loopsec <- c_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_loopsec <- a_loopsec + 1 * as.numeric(switch_trm == allnotenough) - b_loopsec <- b_loopsec - 1 * as.numeric(switch_trm == allnotenough) - p_loopsec <- fisher_p(a_loopsec, b_loopsec, c_loopsec, d_loopsec) + c_loopsec <- c_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_loopsec <- a_loopsec + + 1 * as.numeric(switch_trm == allnotenough) + b_loopsec <- b_loopsec - + 1 * as.numeric(switch_trm == allnotenough) + p_loopsec <- fisher_p( + a_loopsec, b_loopsec, c_loopsec, d_loopsec + ) } c_final <- c_loopsec d_final <- d_loopsec a_final <- a_loopsec b_final <- b_loopsec } - if (p_loopsec > thr_p){#taylor too much, returns some odds ratio - decrease + #taylor too much, returns some odds ratio - decrease + if (p_loopsec > thr_p){ while(p_loopsec > thr_p) { - c_loopsec <- c_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_loopsec <- a_loopsec - 1 * as.numeric(switch_trm == allnotenough) - b_loopsec <- b_loopsec + 1 * as.numeric(switch_trm == allnotenough) - p_loopsec <- fisher_p(a_loopsec, b_loopsec, c_loopsec, d_loopsec) + c_loopsec <- c_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_loopsec <- a_loopsec - + 1 * as.numeric(switch_trm == allnotenough) + b_loopsec <- b_loopsec + + 1 * as.numeric(switch_trm == allnotenough) + p_loopsec <- fisher_p( + a_loopsec, b_loopsec, c_loopsec, d_loopsec + ) } - c_final <- c_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_final <- d_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_final <- a_loopsec + 1 * as.numeric(switch_trm == allnotenough) - b_final <- b_loopsec - 1 * as.numeric(switch_trm == allnotenough) + c_final <- c_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_final <- d_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_final <- a_loopsec + + 1 * as.numeric(switch_trm == allnotenough) + b_final <- b_loopsec - + 1 * as.numeric(switch_trm == allnotenough) } } - #### scenario 4 need to increase odds ratio to sustain the inference-need to decrease p + #### scenario 4 need to increase odds ratio + #### to sustain the inference-need to decrease p if (!isinvalidate_start & !dcroddsratio_start){ - if (p_loopsec > thr_p){#taylor not enough, continue to increase odds ratio + #taylor not enough, continue to increase odds ratio + if (p_loopsec > thr_p){ while (p_loopsec > thr_p){ - c_loopsec <- c_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_loopsec <- a_loopsec + 1 * as.numeric(switch_trm == allnotenough) - b_loopsec <- b_loopsec - 1 * as.numeric(switch_trm == allnotenough) - p_loopsec <- fisher_p(a_loopsec, b_loopsec, c_loopsec, d_loopsec) + c_loopsec <- c_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_loopsec <- a_loopsec + + 1 * as.numeric(switch_trm == allnotenough) + b_loopsec <- b_loopsec - + 1 * as.numeric(switch_trm == allnotenough) + p_loopsec <- fisher_p( + a_loopsec, b_loopsec, c_loopsec, d_loopsec + ) } c_final <- c_loopsec d_final <- d_loopsec a_final <- a_loopsec b_final <- b_loopsec } - if (p_loopsec < thr_p){#taylor too much, return some odds ratio - decrease + #taylor too much, return some odds ratio - decrease + if (p_loopsec < thr_p){ while (p_loopsec < thr_p){ - c_loopsec <- c_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_loopsec <- a_loopsec - 1 * as.numeric(switch_trm == allnotenough) - b_loopsec <- b_loopsec + 1 * as.numeric(switch_trm == allnotenough) - p_loopsec <- fisher_p(a_loopsec, b_loopsec, c_loopsec, d_loopsec) + c_loopsec <- c_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_loopsec <- a_loopsec - + 1 * as.numeric(switch_trm == allnotenough) + b_loopsec <- b_loopsec + + 1 * as.numeric(switch_trm == allnotenough) + p_loopsec <- fisher_p( + a_loopsec, b_loopsec, c_loopsec, d_loopsec + ) } - c_final <- c_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_final <- d_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_final <- a_loopsec + 1 * as.numeric(switch_trm == allnotenough) - b_final <- b_loopsec - 1 * as.numeric(switch_trm == allnotenough) + c_final <- c_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_final <- d_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_final <- a_loopsec + + 1 * as.numeric(switch_trm == allnotenough) + b_final <- b_loopsec - + 1 * as.numeric(switch_trm == allnotenough) } } ### so the final results (after switching) is as follows: - table_final <- matrix(c(a_final, b_final, c_final, d_final), byrow = TRUE, 2, 2) + table_final <- matrix( + c(a_final, b_final, c_final, d_final), byrow = TRUE, 2, 2 + ) p_final <- fisher_p(a_final, b_final, c_final, d_final) fisher_final <- fisher_oddsratio(a_final, b_final, c_final, d_final) @@ -1025,11 +1213,17 @@ getswitch_fisher <- function(a, b, c, d, thr_p = 0.05, switch_trm = T){ total_switch <- final + allnotenough*final_extra - result <- list(final_switch = final, User_enter_value = table_start, Transfer_Table = table_final, - p_final = p_final, fisher_final = fisher_final, - needtworows=allnotenough, taylor_pred = taylor_pred, - perc_bias_pred = perc_bias_pred, final_extra = final_extra, - dcroddsratio_ob = dcroddsratio_ob, total_switch = total_switch, isinvalidate_ob = isinvalidate_ob) + result <- list(final_switch = final, User_enter_value = table_start, + Transfer_Table = table_final, + p_final = p_final, + fisher_final = fisher_final, + needtworows=allnotenough, + taylor_pred = taylor_pred, + perc_bias_pred = perc_bias_pred, + final_extra = final_extra, + dcroddsratio_ob = dcroddsratio_ob, + total_switch = total_switch, + isinvalidate_ob = isinvalidate_ob) return(result) -} \ No newline at end of file +} From 50da8c90f574cedb3e1de9753be515bfb23ed5cd Mon Sep 17 00:00:00 2001 From: wwang93 <142034236+wwang93@users.noreply.github.com> Date: Tue, 21 Nov 2023 09:22:20 -0500 Subject: [PATCH 13/27] Update pkonfound.R Manual modification --- R/pkonfound.R | 85 +++++++++++++++++++++++++++++++++++---------------- 1 file changed, 59 insertions(+), 26 deletions(-) diff --git a/R/pkonfound.R b/R/pkonfound.R index b2146218..19d699d5 100644 --- a/R/pkonfound.R +++ b/R/pkonfound.R @@ -1,34 +1,59 @@ #' Perform sensitivity analysis for published studies -#' @description For published studies, this command calculates (1) how much bias there must be in an estimate to invalidate/sustain an inference; (2) the impact of an omitted variable necessary to invalidate/sustain an inference for a regression coefficient. -#' @param est_eff the estimated effect (such as an unstandardized beta coefficient or a group mean difference) -#' @param std_err the standard error of the estimate of the unstandardized regression coefficient +#' @description For published studies, this command calculates +#' (1) how much bias there must be in an estimate to invalidate/sustain +#' an inference; (2) the impact of an omitted variable necessary to +#' invalidate/sustain an inference for a regression coefficient. +#' @param est_eff the estimated effect (such as an unstandardized beta +#' coefficient or a group mean difference) +#' @param std_err the standard error of the estimate of the unstandardized +#' regression coefficient #' @param n_obs the number of observations in the sample #' @param n_covariates the number of covariates in the regression model #' @param alpha probability of rejecting the null hypothesis (defaults to 0.05) -#' @param tails integer whether hypothesis testing is one-tailed (1) or two-tailed (2; defaults to 2) -#' @param index whether output is RIR or IT (impact threshold); defaults to "RIR" -#' @param nu what hypothesis to be tested; defaults to testing whether est_eff is significantly different from 0 -#' @param model_type the type of model being estimated; defaults to "ols" for a linear regression model; the other option is "logistic" -#' @param n_treat the number of cases associated with the treatment condition; applicable only when model_type = "logistic" -#' @param switch_trm whether to switch the treatment and control cases; defaults to FALSE; applicable only when model_type = "logistic" -#' @param a cell is the number of cases in the control group showing unsuccessful results -#' @param b cell is the number of cases in the control group showing successful results -#' @param c cell is the number of cases in the treatment group showing unsuccessful results -#' @param d cell is the number of cases in the treatment group showing successful results -#' @param two_by_two_table table that is a matrix or can be coerced to one (data.frame, tibble, tribble) from which the a, b, c, and d arguments can be extracted -#' @param test whether using Fisher's Exact Test or A chi-square test; defaults to Fisher's Exact Test -#' @param replace whether using entire sample or the control group to calculate the base rate; default is the control group +#' @param tails integer whether hypothesis testing is one-tailed (1) or +#' two-tailed (2; defaults to 2) +#' @param index whether output is RIR or IT (impact threshold); +#' defaults to "RIR" +#' @param nu what hypothesis to be tested; defaults to testing whether +#' est_eff is significantly different from 0 +#' @param model_type the type of model being estimated; defaults to "ols" for +#' a linear regression model; the other option is "logistic" +#' @param n_treat the number of cases associated with the treatment condition; +#' applicable only when model_type = "logistic" +#' @param switch_trm whether to switch the treatment and control cases; defaults +#' to FALSE; applicable only when model_type = "logistic" +#' @param a cell is the number of cases in the control group showing +#' unsuccessful results +#' @param b cell is the number of cases in the control group showing successful +#' results +#' @param c cell is the number of cases in the treatment group showing +#' unsuccessful results +#' @param d cell is the number of cases in the treatment group showing +#' successful results +#' @param two_by_two_table table that is a matrix or can be coerced to one +#' (data.frame, tibble, tribble) from which the a, b, c, and d arguments can +#' be extracted +#' @param test whether using Fisher's Exact Test or A chi-square test; defaults +#' to Fisher's Exact Test +#' @param replace whether using entire sample or the control group to calculate +#' the base rate; default is the control group #' @param sdx the standard deviation of X #' @param sdy the standard deviation of Y #' @param R2 the unadjusted,original R2 in the observed function #' @param eff_thr unstandardized coefficient threshold to change an inference -#' @param FR2max the largest R2, or R2max, in the final model with unobserved confounder -#' @param FR2max_multiplier the multiplier of R2 to get R2max, default is set to 1.3 -#' @param to_return whether to return a data.frame (by specifying this argument to equal "raw_output" for use in other analyses) or a plot ("plot"); default is to print ("print") the output to the console; can specify a vector of output to return +#' @param FR2max the largest R2, or R2max, in the final model with unobserved +#' confounder +#' @param FR2max_multiplier the multiplier of R2 to get R2max, +#' default is set to 1.3 +#' @param to_return whether to return a data.frame +#' (by specifying this argument to equal "raw_output" for use in other analyses) +#' or a plot ("plot"); default is to print ("print") the output to the console; +#' can specify a vector of output to return #' @importFrom stats fisher.test #' @import lavaan #' @import ggplot2 -#' @return prints the bias and the number of cases that would have to be replaced with cases for which there is no effect to invalidate the inference +#' @return prints the bias and the number of cases that would have to be +#' replaced with cases for which there is no effect to invalidate the inference #' @examples #' # using pkonfound for linear models #' pkonfound(2, .4, 100, 3) @@ -62,9 +87,11 @@ # pkonfound(two_by_two_table = my_table) # #' # use pkonfound to calculate delta* and delta_exact -#' pkonfound(est_eff = .4, std_err = .1, n_obs = 290, sdx = 2, sdy = 6, R2 = .7, eff_thr = 0, FR2max = .8, index = "COP", to_return = "raw_output") +#' pkonfound(est_eff = .4, std_err = .1, n_obs = 290, sdx = 2, sdy = 6, R2 = .7, +#' eff_thr = 0, FR2max = .8, index = "COP", to_return = "raw_output") #' # use pkonfound to calculate rxcv and rycv when preserving standard error -#' pkonfound(est_eff = .5, std_err = .056, n_obs = 6174, eff_thr = .1, sdx = 0.22, sdy = 1, R2 = .3, index = "PSE", to_return = "raw_output") +#' pkonfound(est_eff = .5, std_err = .056, n_obs = 6174, eff_thr = .1, +#' sdx = 0.22, sdy = 1, R2 = .3, index = "PSE", to_return = "raw_output") #' @export pkonfound <- function(est_eff, @@ -92,7 +119,8 @@ FR2max, FR2max_multiplier = 1.3, to_return = "print") { - if ("table" %in% to_return) stop("a table can only be output when using konfound") + if ("table" %in% to_return) stop("a table can only be + output when using konfound") if (index == "COP") { @@ -139,7 +167,10 @@ ) } else if (model_type == "logistic" & is.null(n_treat)) { - warning("For a logistic regression, the log-odds coefficients are used for the pkonfound() calculations. To carry out an analysis using average marginal effects, you can use the konfound() function with the results of a model estimated in R.") + warning("For a logistic regression, the log-odds coefficients + are used for the pkonfound() calculations. To carry out an + analysis using average marginal effects, you can use the + konfound() function with the results of a model estimated in R.") out <- test_sensitivity_ln( est_eff = est_eff, @@ -157,7 +188,8 @@ } else if(!is.null(a)) { # error handling if (is.null(a) | is.null(b) | is.null(c) | is.null(d)) { - stop("Please enter values for a, b, c, and d to use the 2 x 2 table functionality") + stop("Please enter values for a, b, c, + and d to use the 2 x 2 table functionality") } out <- tkonfound(a = a, @@ -208,7 +240,8 @@ if (!is.null(out)) { # dealing with a strange print issue } if (to_return == "print") { - message("For other forms of output, run ?pkonfound and inspect the to_return argument") + message("For other forms of output, run ? + pkonfound and inspect the to_return argument") } message("For models fit in R, consider use of konfound().") From 76eb2a6ce939735e32f606ea5fbaadb7ebd84eef Mon Sep 17 00:00:00 2001 From: wwang93 <142034236+wwang93@users.noreply.github.com> Date: Tue, 21 Nov 2023 10:27:32 -0500 Subject: [PATCH 14/27] Update test_cop.R Manual modification --- R/test_cop.R | 720 ++++++++++++++++++++++++++++----------------------- 1 file changed, 401 insertions(+), 319 deletions(-) diff --git a/R/test_cop.R b/R/test_cop.R index 5f94ff95..f07824f7 100644 --- a/R/test_cop.R +++ b/R/test_cop.R @@ -10,327 +10,409 @@ test_cop <- function(est_eff, # unstandardized R2, # NOT the adjusted R2, should be the original R2 eff_thr = 0, # this is the unstandardized version FR2max_multiplier = 1.3, - FR2max = 0, # NOT the adjusted R2, should be the original R2 + FR2max = 0, #NOT the adjusted R2, should be the original R2 alpha = 0.05, tails = 2, to_return = to_return){ - - ## test example - # est_eff = .125 - # std_err = .050 - # n_obs = 6174 - # n_covariates = 7 - # sdx = .217 - # sdy = .991 - # R2 = .251 - # eff_thr = 0 - # FR2max = .61 - # test_cop(est_eff = .4, std_err = .1, n_obs = 290, - # sdx = 2, sdy = 6, R2 = .7, eff_thr = 0, FR2max = .8, to_return = "short") - - ## prepare input - df = n_obs - n_covariates - 3 ## df of M3 - var_x = sdx^2 - var_y = sdy^2 - - ### if the user specifies R2max directly then we use the specified R2max - if (FR2max == 0){FR2max = FR2max_multiplier * R2} - var_z = sdz = 1 - - ## error message if input is inappropriate - if (!(std_err > 0)) {stop("Did not run! Standard error needs to be greater than zero.")} - if (!(sdx > 0)) {stop("Did not run! Standard deviation of x needs to be greater than zero.")} - if (!(sdy > 0)) {stop("Did not run! Standard deviation of y needs to be greater than zero.")} - if (!(n_obs > n_covariates + 3)) {stop("Did not run! There are too few observations relative to the number of observations and covariates. Please specify a less complex model to use KonFound-It.")} - if (!(R2 < FR2max)) {stop("Did not run! R2 Max needs to be greater than R2.")} - if (!(FR2max < 1)) {stop("Did not run! R2 Max needs to be less than 1.")} - if (!(1-((sdy^2/sdx^2)*(1-R2)/((df+1) * std_err^2))>0)) {stop("Did not run! Entered values produced Rxz^2 <=0, consider adding more significant digits to your entered values.")} - - negest <- 0 # an indicator of whether the use specified est_eff is negative, 1 is yes negative - if (est_eff < 0) { - est_eff <- abs(est_eff) - negest <- 1 - } - - ## now standardize - beta_thr = eff_thr * sdx / sdy - beta = est_eff * sdx / sdy - SE = std_err * sdx / sdy - - ## observed regression, reg y on x Given z - tyxGz = beta / SE ### this should be equal to est_eff / std_err - ryxGz = tyxGz / sqrt(df + 1 + tyxGz^2) - ## df + 1 because omitted variable is NOT included in M2 - ryxGz_M2 = tyxGz / sqrt(n_obs + tyxGz^2) - ## ryxGz_M2 is only for simulation to recover the exact number - - ## make sure R2 due to x alone is not larger than overall or observed R2 - if (ryxGz^2 > R2) {illcnd_ryxGz = T} else {illcond_ryxGz = F} - - ## calculate ryz, rxz, rxy - ryz = rzy = cal_ryz(ryxGz, R2) - rxz = rzx = cal_rxz(var_x, var_y, R2, df + 1, std_err) - ## df + 1 because omitted variable is NOT included in M2 - #### we change the n_obs to df to recover the rxz as in the particular sample - - ## note that in the updated approach rxy is not necessary to calculate rxcv_exact, ryxcv_exact and delta_exact - rxy = ryx = cal_rxy(ryxGz, rxz, ryz) - rxy_M2 = cal_rxy(ryxGz_M2, rxz, ryz) - ## rxy_M2 is only for simulation to recover the exact number - - ## baseline regression model, no z (unconditional) - eff_uncond = sqrt((var_y / var_x)) * rxy - t_uncond = rxy * sqrt(n_obs - 2)/sqrt(1 - rxy^2) - ## n_obs - 2 - adjust the df in the M1 - std_err_uncond = eff_uncond / t_uncond - R2_uncond = rxy^2 - - ## calculate delta_star - delta_star = cal_delta_star(FR2max, R2, R2_uncond, est_eff, eff_thr, var_x, var_y, eff_uncond, rxz, n_obs) - - ## now introduce cv - sdcv = var_cv = 1 - rcvz = rzcv = 0 - v = 1 - rxz^2 # this is to simplify calculation later - D = sqrt(FR2max - R2) # same above - - ## calculate rxcv & rycv implied by Oster from delta_star (assumes rcvz=0) - rxcv_oster = rcvx_oster = delta_star * rxz * (sdcv / sdz) - if (abs(rcvx_oster) <1 && (rcvx_oster^2/v)<1) - {rcvy_oster = rycv_oster = - D * sqrt(1 - (rcvx_oster^2 / v)) + - (ryx * rcvx_oster) / (v) - - (ryz * rcvx_oster * rxz) / (v)} - - # Checking beta, R2, se generated by delta_star with a regression - verify_oster = verify_reg_Gzcv(n_obs, sdx, sdy, sdz, sdcv, - rxy, rxz, rzy, rycv_oster, rxcv_oster, rcvz) - - # prepare some other values in the final Table (long output) - R2_M3_oster = as.numeric(verify_oster[1]) - eff_x_M3_oster = as.numeric(verify_oster[2]) # should be equivalent or very close to eff_thr - se_x_M3_oster = as.numeric(verify_oster[3]) - beta_x_M3_oster = as.numeric(verify_oster[9]) # should be equivalent or very close to beta_thr - t_x_M3_oster = eff_x_M3_oster / se_x_M3_oster - eff_z_M3_oster = as.numeric(verify_oster[4]) - se_z_M3_oster = as.numeric(verify_oster[5]) - eff_cv_M3_oster = as.numeric(verify_oster[6]) - se_cv_M3_oster = as.numeric(verify_oster[7]) - cov_oster = verify_oster[[11]] - cor_oster = verify_oster[[12]] - - ## calculate the exact/true rcvx, rcvy, delta (updated version that does not need rxy) - ### the idea is to calculate everything conditional on z - sdxGz = sdx * sqrt(1 - rxz^2) - sdyGz = sdy * sqrt(1 - ryz^2) - ryxcvGz_exact_sq = (FR2max - ryz^2) / (1 - ryz^2) - ### equation 7 in the manuscript - rxcvGz_exact = (ryxGz - sdxGz / sdyGz * beta_thr) / - sqrt((sdxGz^2) / (sdyGz^2) * (beta_thr^2) - - 2 * ryxGz * sdxGz / sdyGz * beta_thr + - ryxcvGz_exact_sq) - ### equation 6 in the manuscript - rycvGz_exact = ryxGz * rxcvGz_exact + - sqrt((ryxcvGz_exact_sq - ryxGz^2) * - (1 - rxcvGz_exact^2)) - ### now get unconditional exact rxcv and rycv - rycv_exact = sqrt(1 - ryz^2) * rycvGz_exact - rxcv_exact = sqrt(1 - rxz^2) * rxcvGz_exact - delta_exact = rxcv_exact / rxz - - ## previous approach - comment out, but could find in cop_pse_auxiliary - ## exact_result = cal_delta_exact(ryx, ryz, rxz, beta_thr, FR2max, R2, sdx, sdz) - ## rxcv_exact = rcvx_exact = as.numeric(exact_result[1]) - ## rycv_exact = rcvy_exact = as.numeric(exact_result[2]) - ## delta_exact = as.numeric(exact_result[3]) - - # Checking beta, R2, se generated by True/Exact Delta with a regression - verify_exact = verify_reg_Gzcv(n_obs, sdx, sdy, sdz, sdcv, - rxy, rxz, rzy, rycv_exact, rxcv_exact, rcvz) - ### verify_exact[1] == verify_exact[4] == FR2max - ### verify_exact[2] == eff_thr - ### verify_exact[5] == beta_thr - - # calculate % bias in delta comparing oster's delta_star with true delta - delta_pctbias = 100 * (delta_star - delta_exact) / delta_exact - - # prepare some other values in the final Table (long output) - R2_M3 = as.numeric(verify_exact[1]) - eff_x_M3 = as.numeric(verify_exact[2]) # should be equivalent or very close to eff_thr - se_x_M3 = as.numeric(verify_exact[3]) - beta_x_M3 = as.numeric(verify_exact[9]) # should be equivalent or very close to beta_thr - t_x_M3 = eff_x_M3 / se_x_M3 - eff_z_M3 = as.numeric(verify_exact[4]) - se_z_M3 = as.numeric(verify_exact[5]) - eff_cv_M3 = as.numeric(verify_exact[6]) - se_cv_M3 = as.numeric(verify_exact[7]) - cov_exact = verify_exact[[11]] - cor_exact = verify_exact[[12]] - - verify_pse_reg_M2 = verify_reg_Gz(n_obs, sdx, sdy, sdz, rxy_M2, rxz, rzy) - R2_M2 = as.numeric(verify_pse_reg_M2[1]) - eff_x_M2 = as.numeric(verify_pse_reg_M2[2]) # should be equivalent or very close to est_eff - se_x_M2 = as.numeric(verify_pse_reg_M2[3]) # should be equivalent or very close to std_err - eff_z_M2 = as.numeric(verify_pse_reg_M2[4]) - se_z_M2 = as.numeric(verify_pse_reg_M2[5]) - t_x_M2 = eff_x_M2 / se_x_M2 - - verify_pse_reg_M1 = verify_reg_uncond(n_obs, sdx, sdy, rxy) - R2_M1 = as.numeric(verify_pse_reg_M1[1]) # should be equivalent or very close to rxy^2 - eff_x_M1 = as.numeric(verify_pse_reg_M1[2]) # should be equivalent or very close to rxy*sdy/sdx - se_x_M1 = as.numeric(verify_pse_reg_M1[3]) - t_x_M1 = eff_x_M1 / se_x_M1 - - delta_star_restricted = ((est_eff - eff_thr)/(eff_x_M1 - est_eff))* - ((R2_M2 - R2_M1)/(R2_M3 - R2_M2)) - - fTable <- matrix(c(R2_M1, R2_M2, R2_M3, R2_M3_oster, # R2 for three reg models - eff_x_M1, eff_x_M2, eff_x_M3, eff_x_M3_oster, # unstd reg coef for X in three reg models - se_x_M1, se_x_M2, se_x_M3, se_x_M3_oster, # unstd reg se for X in three reg models - rxy, ryxGz, beta_x_M3, beta_x_M3_oster, # std reg coef for X in three reg models - t_x_M1, t_x_M2, t_x_M3, t_x_M3_oster, # t values for X in three reg models - # NA, eff_z_M2, eff_z_M3, eff_z_M3_oster, # reg coef for Z in three reg models - # NA, se_z_M2, se_z_M3, se_z_M3_oster, # se for Z in three reg models - # NA, eff_z_M2 / se_z_M2, eff_z_M3 / se_z_M3, eff_z_M3_oster / se_z_M3_oster, # t for Z in three reg models, - NA, NA, eff_cv_M3, eff_cv_M3_oster, # reg coef for CV in three reg models - NA, NA, se_cv_M3, se_cv_M3_oster, # se for CV in three reg models - NA, NA, eff_cv_M3 / se_cv_M3, eff_cv_M3_oster / se_cv_M3_oster), # t for CV in three reg models - nrow = 8, ncol = 4, byrow = T) - - rownames(fTable) <- c("R2", "coef_X", "SE_X", "std_coef_X", "t_X", - # "coef_Z", "SE_Z", "t_Z", - "coef_CV", "SE_CV", "t_CV") - - colnames(fTable) <- c("M1:X", "M2:X,Z", - "M3(delta_exact):X,Z,CV", - "M3(delta*):X,Z,CV") - - ## figure - figTable <- matrix(c("Baseline(M1)", eff_x_M1, R2_M1, "exact", - "Intermediate(M2)", eff_x_M2, R2, "exact", - "Final(M3)", eff_x_M3, FR2max, "exact", - "Intermediate(M2)", eff_x_M2, R2, "star", - "Final(M3)", eff_x_M3_oster, FR2max, "star"), nrow = 5, ncol = 4, byrow = T) - colnames(figTable) <- c("ModelLabel", "coef_X", "R2", "cat") - - figTable <- as.data.frame(figTable) - figTable$ModelLabel <- as.character(figTable$ModelLabel) - figTable$ModelLabel <- factor(figTable$ModelLabel, - levels = c("Baseline(M1)", - "Intermediate(M2)", - "Final(M3)")) - figTable$cat <- as.character(figTable$cat) - figTable$cat <- factor(figTable$cat, - levels = c("exact", - "star")) - figTable$coef_X <- as.numeric(figTable$coef_X) - figTable$R2 <- as.numeric(figTable$R2) - -scale = 1/(round(max(figTable$coef_X)*10)/10) - -fig <- ggplot2::ggplot(figTable, ggplot2::aes(x = ModelLabel)) + - ggplot2::geom_point(ggplot2::aes(y = coef_X, group = cat, shape = cat), color = "blue", size = 3) + - ggplot2::scale_shape_manual(values = c(16, 1)) + - ggplot2::geom_point(ggplot2::aes(y = R2/scale), color = "#7CAE00", shape = 18, size = 4) + - # scale_linetype_manual(values=c("solid", "dotted")) + - ggplot2::geom_line(ggplot2::aes(y = R2/scale, group = cat), linetype = "solid", color = "#7CAE00") + - ggplot2::geom_line(ggplot2::aes(y = coef_X, group = cat, linetype = cat), color = "blue") + - ggplot2::scale_y_continuous( - # Features of the first axis - name = "Coeffcient (X)", - # Add a second axis and specify its features - sec.axis = ggplot2::sec_axis(~.* scale, - name="R2")) + - ggplot2::theme(axis.title.x = ggplot2::element_blank(), - legend.position = "none", - axis.line.y.right = ggplot2::element_line(color = "#7CAE00"), - axis.title.y.right = ggplot2::element_text(color = "#7CAE00"), - axis.text.y.right = ggplot2::element_text(color = "#7CAE00"), - axis.line.y.left = ggplot2::element_line(color = "blue"), - axis.title.y.left = ggplot2::element_text(color = "blue"), - axis.text.y.left = ggplot2::element_text(color = "blue"), - axis.line.x.bottom = ggplot2::element_line(color = "black"), - axis.text.x.bottom = ggplot2::element_text(color = "black")) - - ############################################## - ######### conditional RIR #################### - - # calculating critical r - if (est_eff < 0) { - critical_t <- stats::qt(1 - (alpha / tails), n_obs - n_covariates - 2) * -1 - } else { - critical_t <- stats::qt(1 - (alpha / tails), n_obs - n_covariates - 2) - } - critical_r <- critical_t / sqrt((critical_t^2) + (n_obs - n_covariates - 2)) - - # final solutions - cond_RIRpi_fixedY <- (R2 - ryz^2 + ryz^2 * critical_r^2 - critical_r^2) / - (R2 - ryz^2 + ryz^2 * critical_r^2) - cond_RIR_fixedY <- cond_RIRpi_fixedY * n_obs - - cond_RIRpi_null <- 1 - sqrt(critical_r^2 / - (R2 - ryz^2 + ryz^2 * critical_r^2)) - cond_RIR_null <- cond_RIRpi_null * n_obs - - cond_RIRpi_rxyz <- 1 - sqrt((critical_r^2 * (1 - ryz^2)) / - (R2 - ryz^2)) - cond_RIR_rxyz <- cond_RIRpi_rxyz * n_obs - - ############################################## - ######### output ############################# - - if (to_return == "raw_output") { - if (negest == 1) { - cat("Using the absolute value of the estimated effect, results can be interpreted by symmetry.") - cat("\n") + + ## test example + # est_eff = .125 + # std_err = .050 + # n_obs = 6174 + # n_covariates = 7 + # sdx = .217 + # sdy = .991 + # R2 = .251 + # eff_thr = 0 + # FR2max = .61 + # test_cop(est_eff = .4, std_err = .1, n_obs = 290, + # sdx = 2, sdy = 6, R2 = .7, eff_thr = 0, + # FR2max = .8, to_return = "short") + + ## prepare input + df = n_obs - n_covariates - 3 ## df of M3 + var_x = sdx^2 + var_y = sdy^2 + + ### if the user specifies R2max directly then we use the specified R2max + if (FR2max == 0){FR2max = FR2max_multiplier * R2} + var_z = sdz = 1 + + ## error message if input is inappropriate + if (!(std_err > 0)) {stop("Did not run! Standard error needs to be greater + than zero.")} + if (!(sdx > 0)) {stop("Did not run! Standard deviation of x needs to + be greater than zero.")} + if (!(sdy > 0)) {stop("Did not run! Standard deviation of y needs to + be greater than zero.")} + if (!(n_obs > n_covariates + 3)) + {stop("Did not run! There are too few observations relative to the + number of observations and covariates. Please specify a less + complex model to use KonFound-It.") + } + if (!(R2 < FR2max)) {stop("Did not run! R2 Max needs + to be greater than R2.")} + if (!(FR2max < 1)) {stop("Did not run! R2 Max needs to be less than 1.")} + if (!(1-((sdy^2/sdx^2)*(1-R2)/((df+1) * std_err^2))>0)) + {stop("Did not run! Entered values produced Rxz^2 <=0, consider + adding more significant digits to your entered values.") + } + # an indicator of whether the use specified est_eff is + # negative, 1 is yes negative + negest <- 0 + if (est_eff < 0) { + est_eff <- abs(est_eff) + negest <- 1 } - output <- list("delta*" = delta_star, - "delta*restricted" = delta_star_restricted, - "delta_exact" = delta_exact, - "delta_pctbias" = delta_pctbias, - #"cov_oster" = cov_oster, - #"cov_exact" = cov_exact, - "cor_oster" = cor_oster, - "cor_exact" = cor_exact, - "var(Y)" = sdy^2, - "var(X)" = sdx^2, - #"var(Z)" = sdz^2, - "var(CV)" = sdcv^2, - "Table" = fTable, - "Figure" = fig, - "conditional RIR pi (fixed y)" = cond_RIRpi_fixedY, - "conditional RIR (fixed y)" = cond_RIR_fixedY, - "conditional RIR pi (null)" = cond_RIRpi_null, - "conditional RIR (null)" = cond_RIR_null, - "conditional RIR pi (rxyGz)" = cond_RIRpi_rxyz, - "conditional RIR (rxyGz)" = cond_RIR_rxyz) - return(output) - } - - if (to_return == "print") { - cat("This function calculates delta* and the exact value of delta.") - cat("\n") - if (negest == 1) { - cat("Using the absolute value of the estimated effect, results can be interpreted by symmetry.") - cat("\n") + + ## now standardize + beta_thr = eff_thr * sdx / sdy + beta = est_eff * sdx / sdy + SE = std_err * sdx / sdy + + ## observed regression, reg y on x Given z + tyxGz = beta / SE ### this should be equal to est_eff / std_err + ryxGz = tyxGz / sqrt(df + 1 + tyxGz^2) + ## df + 1 because omitted variable is NOT included in M2 + ryxGz_M2 = tyxGz / sqrt(n_obs + tyxGz^2) + ## ryxGz_M2 is only for simulation to recover the exact number + + ## make sure R2 due to x alone is not larger than overall or observed R2 + if (ryxGz^2 > R2) {illcnd_ryxGz = T} else {illcond_ryxGz = F} + + ## calculate ryz, rxz, rxy + ryz = rzy = cal_ryz(ryxGz, R2) + rxz = rzx = cal_rxz(var_x, var_y, R2, df + 1, std_err) + ## df + 1 because omitted variable is NOT included in M2 + ### we change the n_obs to df to recover the rxz as in the particular sample + + ## note that in the updated approach rxy is not necessary to calculate + ## rxcv_exact, ryxcv_exact and delta_exact + rxy = ryx = cal_rxy(ryxGz, rxz, ryz) + rxy_M2 = cal_rxy(ryxGz_M2, rxz, ryz) + ## rxy_M2 is only for simulation to recover the exact number + + ## baseline regression model, no z (unconditional) + eff_uncond = sqrt((var_y / var_x)) * rxy + t_uncond = rxy * sqrt(n_obs - 2)/sqrt(1 - rxy^2) + ## n_obs - 2 - adjust the df in the M1 + std_err_uncond = eff_uncond / t_uncond + R2_uncond = rxy^2 + + ## calculate delta_star + delta_star = cal_delta_star(FR2max, + R2, + R2_uncond, + est_eff, + eff_thr, + var_x, + var_y, + eff_uncond, + rxz, + n_obs) + + ## now introduce cv + sdcv = var_cv = 1 + rcvz = rzcv = 0 + v = 1 - rxz^2 # this is to simplify calculation later + D = sqrt(FR2max - R2) # same above + + ## calculate rxcv & rycv implied by Oster from delta_star (assumes rcvz=0) + rxcv_oster = rcvx_oster = delta_star * rxz * (sdcv / sdz) + if (abs(rcvx_oster) <1 && (rcvx_oster^2/v)<1) + {rcvy_oster = rycv_oster = + D * sqrt(1 - (rcvx_oster^2 / v)) + + (ryx * rcvx_oster) / (v) - + (ryz * rcvx_oster * rxz) / (v)} + + # Checking beta, R2, se generated by delta_star with a regression + verify_oster = verify_reg_Gzcv(n_obs, sdx, sdy, sdz, sdcv, + rxy, rxz, rzy, rycv_oster, rxcv_oster, rcvz) + + # prepare some other values in the final Table (long output) + R2_M3_oster = as.numeric(verify_oster[1]) + + eff_x_M3_oster = as.numeric(verify_oster[2]) + # should be equivalent or very close to eff_thr + se_x_M3_oster = as.numeric(verify_oster[3]) + beta_x_M3_oster = as.numeric(verify_oster[9]) + # should be equivalent or very close to beta_thr + t_x_M3_oster = eff_x_M3_oster / se_x_M3_oster + eff_z_M3_oster = as.numeric(verify_oster[4]) + se_z_M3_oster = as.numeric(verify_oster[5]) + eff_cv_M3_oster = as.numeric(verify_oster[6]) + se_cv_M3_oster = as.numeric(verify_oster[7]) + cov_oster = verify_oster[[11]] + cor_oster = verify_oster[[12]] + + ## calculate the exact/true rcvx, rcvy, + ## delta (updated version that does not need rxy) + ### the idea is to calculate everything conditional on z + sdxGz = sdx * sqrt(1 - rxz^2) + sdyGz = sdy * sqrt(1 - ryz^2) + ryxcvGz_exact_sq = (FR2max - ryz^2) / (1 - ryz^2) + ### equation 7 in the manuscript + rxcvGz_exact = (ryxGz - sdxGz / sdyGz * beta_thr) / + sqrt((sdxGz^2) / (sdyGz^2) * (beta_thr^2) - + 2 * ryxGz * sdxGz / sdyGz * beta_thr + + ryxcvGz_exact_sq) + ### equation 6 in the manuscript + rycvGz_exact = ryxGz * rxcvGz_exact + + sqrt((ryxcvGz_exact_sq - ryxGz^2) * + (1 - rxcvGz_exact^2)) + ### now get unconditional exact rxcv and rycv + rycv_exact = sqrt(1 - ryz^2) * rycvGz_exact + rxcv_exact = sqrt(1 - rxz^2) * rxcvGz_exact + delta_exact = rxcv_exact / rxz + + ## previous approach - comment out, but could find in cop_pse_auxiliary + ## exact_result = cal_delta_exact + ## (ryx, ryz, rxz, beta_thr, FR2max, R2, sdx, sdz) + ## rxcv_exact = rcvx_exact = as.numeric(exact_result[1]) + ## rycv_exact = rcvy_exact = as.numeric(exact_result[2]) + ## delta_exact = as.numeric(exact_result[3]) + + # Checking beta, R2, se generated by True/Exact Delta with a regression + verify_exact = verify_reg_Gzcv(n_obs, sdx, sdy, sdz, sdcv, + rxy, rxz, rzy, rycv_exact, rxcv_exact, rcvz) + ### verify_exact[1] == verify_exact[4] == FR2max + ### verify_exact[2] == eff_thr + ### verify_exact[5] == beta_thr + + # calculate % bias in delta comparing oster's delta_star with true delta + delta_pctbias = 100 * (delta_star - delta_exact) / delta_exact + + # prepare some other values in the final Table (long output) + R2_M3 = as.numeric(verify_exact[1]) + eff_x_M3 = as.numeric(verify_exact[2]) + # should be equivalent or very close to eff_thr + se_x_M3 = as.numeric(verify_exact[3]) + beta_x_M3 = as.numeric(verify_exact[9]) + # should be equivalent or very close to beta_thr + t_x_M3 = eff_x_M3 / se_x_M3 + eff_z_M3 = as.numeric(verify_exact[4]) + se_z_M3 = as.numeric(verify_exact[5]) + eff_cv_M3 = as.numeric(verify_exact[6]) + se_cv_M3 = as.numeric(verify_exact[7]) + cov_exact = verify_exact[[11]] + cor_exact = verify_exact[[12]] + + verify_pse_reg_M2 = verify_reg_Gz(n_obs, sdx, sdy, sdz, rxy_M2, rxz, rzy) + R2_M2 = as.numeric(verify_pse_reg_M2[1]) + eff_x_M2 = as.numeric(verify_pse_reg_M2[2]) + # should be equivalent or very close to est_eff + se_x_M2 = as.numeric(verify_pse_reg_M2[3]) + # should be equivalent or very close to std_err + eff_z_M2 = as.numeric(verify_pse_reg_M2[4]) + se_z_M2 = as.numeric(verify_pse_reg_M2[5]) + t_x_M2 = eff_x_M2 / se_x_M2 + + verify_pse_reg_M1 = verify_reg_uncond(n_obs, sdx, sdy, rxy) + R2_M1 = as.numeric(verify_pse_reg_M1[1]) + # should be equivalent or very close to rxy^2 + eff_x_M1 = as.numeric(verify_pse_reg_M1[2]) + # should be equivalent or very close to rxy*sdy/sdx + se_x_M1 = as.numeric(verify_pse_reg_M1[3]) + t_x_M1 = eff_x_M1 / se_x_M1 + + delta_star_restricted = ((est_eff - eff_thr)/(eff_x_M1 - est_eff))* + ((R2_M2 - R2_M1)/(R2_M3 - R2_M2)) + + fTable <- matrix(c(R2_M1, R2_M2, R2_M3, R2_M3_oster, + # R2 for three reg models + eff_x_M1, eff_x_M2, eff_x_M3, eff_x_M3_oster, + # unstd reg coef for X in three reg models + se_x_M1, se_x_M2, se_x_M3, se_x_M3_oster, + # unstd reg se for X in three reg models + rxy, ryxGz, beta_x_M3, beta_x_M3_oster, + # std reg coef for X in three reg models + t_x_M1, t_x_M2, t_x_M3, t_x_M3_oster, + # t values for X in three reg models + + # NA, eff_z_M2, eff_z_M3, eff_z_M3_oster, + # reg coef for Z in three reg models + # NA, se_z_M2, se_z_M3, se_z_M3_oster, + # se for Z in three reg models +# NA, eff_z_M2 / se_z_M2, eff_z_M3 / se_z_M3, eff_z_M3_oster / se_z_M3_oster, + # t for Z in three reg models, + NA, NA, eff_cv_M3, eff_cv_M3_oster, + # reg coef for CV in three reg models + NA, NA, se_cv_M3, se_cv_M3_oster, + # se for CV in three reg models + NA, NA, eff_cv_M3 / se_cv_M3, + eff_cv_M3_oster / se_cv_M3_oster), + # t for CV in three reg models + nrow = 8, ncol = 4, byrow = T) + + rownames(fTable) <- c("R2", "coef_X", "SE_X", "std_coef_X", "t_X", + # "coef_Z", "SE_Z", "t_Z", + "coef_CV", "SE_CV", "t_CV") + + colnames(fTable) <- c("M1:X", "M2:X,Z", + "M3(delta_exact):X,Z,CV", + "M3(delta*):X,Z,CV") + + ## figure + figTable <- matrix(c("Baseline(M1)", eff_x_M1, R2_M1, "exact", + "Intermediate(M2)", eff_x_M2, R2, "exact", + "Final(M3)", eff_x_M3, FR2max, "exact", + "Intermediate(M2)", eff_x_M2, R2, "star", + "Final(M3)", eff_x_M3_oster, FR2max, "star"), + nrow = 5, ncol = 4, byrow = T) + colnames(figTable) <- c("ModelLabel", "coef_X", "R2", "cat") + + figTable <- as.data.frame(figTable) + figTable$ModelLabel <- as.character(figTable$ModelLabel) + figTable$ModelLabel <- factor(figTable$ModelLabel, + levels = c("Baseline(M1)", + "Intermediate(M2)", + "Final(M3)")) + figTable$cat <- as.character(figTable$cat) + figTable$cat <- factor(figTable$cat, + levels = c("exact", + "star")) + figTable$coef_X <- as.numeric(figTable$coef_X) + figTable$R2 <- as.numeric(figTable$R2) + + scale = 1/(round(max(figTable$coef_X)*10)/10) + + fig <- ggplot2::ggplot(figTable, ggplot2::aes(x = ModelLabel)) + + ggplot2::geom_point( + ggplot2::aes( + y = coef_X, + group = cat, + shape = cat + ), + color = "blue", + size = 3 + ) + + ggplot2::scale_shape_manual(values = c(16, 1)) + + ggplot2::geom_point( + ggplot2::aes( + y = R2/scale), color = "#7CAE00", shape = 18, size = 4) + + # scale_linetype_manual(values=c("solid", "dotted")) + + ggplot2::geom_line( + ggplot2::aes( + y = R2/scale, group = cat), + linetype = "solid", color = "#7CAE00") + + ggplot2::geom_line( + ggplot2::aes( + y = coef_X, group = cat, linetype = cat), color = "blue") + + ggplot2::scale_y_continuous( + # Features of the first axis + name = "Coeffcient (X)", + # Add a second axis and specify its features + sec.axis = ggplot2::sec_axis(~.* scale, + name="R2")) + + ggplot2::theme(axis.title.x = ggplot2::element_blank(), + legend.position = "none", + axis.line.y.right = ggplot2::element_line( + color = "#7CAE00"), + axis.title.y.right = ggplot2::element_text( + color = "#7CAE00"), + axis.text.y.right = ggplot2::element_text( + color = "#7CAE00"), + axis.line.y.left = ggplot2::element_line( + color = "blue"), + axis.title.y.left = ggplot2::element_text( + color = "blue"), + axis.text.y.left = ggplot2::element_text( + color = "blue"), + axis.line.x.bottom = ggplot2::element_line( + color = "black"), + axis.text.x.bottom = ggplot2::element_text( + color = "black")) + + ############################################## + ######### conditional RIR #################### + + # calculating critical r + if (est_eff < 0) { + critical_t <- stats::qt(1 - (alpha / tails), + n_obs - n_covariates - 2) * -1 + } else { + critical_t <- stats::qt(1 - (alpha / tails), n_obs - n_covariates - 2) + } + critical_r <- critical_t / sqrt((critical_t^2) + (n_obs - n_covariates - 2)) + + # final solutions + cond_RIRpi_fixedY <- (R2 - ryz^2 + ryz^2 * critical_r^2 - critical_r^2) / + (R2 - ryz^2 + ryz^2 * critical_r^2) + cond_RIR_fixedY <- cond_RIRpi_fixedY * n_obs + + cond_RIRpi_null <- 1 - sqrt(critical_r^2 / + (R2 - ryz^2 + ryz^2 * critical_r^2)) + cond_RIR_null <- cond_RIRpi_null * n_obs + + cond_RIRpi_rxyz <- 1 - sqrt((critical_r^2 * (1 - ryz^2)) / + (R2 - ryz^2)) + cond_RIR_rxyz <- cond_RIRpi_rxyz * n_obs + + ############################################## + ######### output ############################# + + if (to_return == "raw_output") { + if (negest == 1) { + cat("Using the absolute value of the estimated + effect, results can be interpreted by symmetry.") + cat("\n") + } + output <- list("delta*" = delta_star, + "delta*restricted" = delta_star_restricted, + "delta_exact" = delta_exact, + "delta_pctbias" = delta_pctbias, + #"cov_oster" = cov_oster, + #"cov_exact" = cov_exact, + "cor_oster" = cor_oster, + "cor_exact" = cor_exact, + "var(Y)" = sdy^2, + "var(X)" = sdx^2, + #"var(Z)" = sdz^2, + "var(CV)" = sdcv^2, + "Table" = fTable, + "Figure" = fig, + "conditional RIR pi (fixed y)" = cond_RIRpi_fixedY, + "conditional RIR (fixed y)" = cond_RIR_fixedY, + "conditional RIR pi (null)" = cond_RIRpi_null, + "conditional RIR (null)" = cond_RIR_null, + "conditional RIR pi (rxyGz)" = cond_RIRpi_rxyz, + "conditional RIR (rxyGz)" = cond_RIR_rxyz) + return(output) } - cat(sprintf("delta* is %.3f (assuming no covariates in the baseline model M1), the exact delta is %.3f, with a bias of %.3f%%", delta_star, delta_exact, delta_pctbias)) - cat("\n") - cat(sprintf("With delta*, the coefficient in the final model will be %.3f. With the exact delta, the coefficient will be %.3f.", - eff_x_M3_oster,eff_x_M3)) - cat("\n") - cat("Use to_return = raw_ouput to see more specific results and graphic presentation of the result.") - cat("\n") - cat("\n") - cat("This function also calculates conditional RIR that invalidates the statistical inference.") - cat("\n") - cat("If the replacement cases have a fixed value, then RIR =", cond_RIR_fixedY) - cat("\n") - cat("If the replacement cases follow a null distribution, then RIR =", cond_RIR_null) - cat("\n") - cat("If the replacement cases satisfy rxy|Z = 0, then RIR =", cond_RIR_rxyz) - cat("\n") - cat("\n") - } - -} \ No newline at end of file + + if (to_return == "print") { + cat("This function calculates delta* and the exact value of delta.") + cat("\n") + if (negest == 1) { + cat("Using the absolute value of the estimated effect, + results can be interpreted by symmetry.") + cat("\n") + } + cat(sprintf("delta* is %.3f (assuming no covariates in the baseline + model M1), the exact delta is %.3f, with a bias of %.3f%%", + delta_star, delta_exact, delta_pctbias)) + cat("\n") + cat(sprintf("With delta*, the coefficient in the final model will be + %.3f. With the exact delta, the coefficient will be %.3f.", + eff_x_M3_oster,eff_x_M3)) + cat("\n") + cat("Use to_return = raw_ouput to see more specific results and graphic + presentation of the result.") + cat("\n") + cat("\n") + cat("This function also calculates conditional RIR that invalidates the + statistical inference.") + cat("\n") + cat("If the replacement cases have a fixed value, then RIR =", + cond_RIR_fixedY) + cat("\n") + cat("If the replacement cases follow a null distribution, then RIR =", + cond_RIR_null) + cat("\n") + cat("If the replacement cases satisfy rxy|Z = 0, then RIR =", + cond_RIR_rxyz) + cat("\n") + cat("\n") + } + +} From b5e431d46b063f754546ad0254207d6a47392bdc Mon Sep 17 00:00:00 2001 From: wwang93 <142034236+wwang93@users.noreply.github.com> Date: Tue, 21 Nov 2023 10:34:16 -0500 Subject: [PATCH 15/27] Update test_pse.R Manual modification --- R/test_pse.R | 94 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 63 insertions(+), 31 deletions(-) diff --git a/R/test_pse.R b/R/test_pse.R index 0bdb147f..ba8ea77b 100644 --- a/R/test_pse.R +++ b/R/test_pse.R @@ -17,13 +17,21 @@ test_pse <- function(est_eff, df = n_obs - n_covariates - 3 ## error message if input is inappropriate - if (!(std_err > 0)) {stop("Did not run! Standard error needs to be greater than zero.")} - if (!(sdx > 0)) {stop("Did not run! Standard deviation of x needs to be greater than zero.")} - if (!(sdy > 0)) {stop("Did not run! Standard deviation of y needs to be greater than zero.")} - if (!(n_obs > n_covariates + 3)) {stop("Did not run! There are too few observations relative to the number of observations and covariates. Please specify a less complex model to use KonFound-It.")} + if (!(std_err > 0)) {stop("Did not run! Standard error needs + to be greater than zero.")} + if (!(sdx > 0)) {stop("Did not run! Standard deviation of x + needs to be greater than zero.")} + if (!(sdy > 0)) {stop("Did not run! Standard deviation of y + needs to be greater than zero.")} + if (!(n_obs > n_covariates + 3)) + {stop("Did not run! There are too few observations relative to the + number of observations and covariates. Please specify a less + complex model to use KonFound-It.")} if (!(0 < R2)) {stop("Did not run! R2 needs to be greater than zero.")} if (!(R2 < 1)) {stop("Did not run! R2 needs to be less than one.")} - if (!(1-((sdy^2/sdx^2)*(1-R2)/((df+1) * std_err^2))>0)) {stop("Did not run! Entered values produced Rxz^2 <=0, consider adding more significant digits to your entered values.")} + if (!(1-((sdy^2/sdx^2)*(1-R2)/((df+1) * std_err^2))>0)) + {stop("Did not run! Entered values produced Rxz^2 <=0, + consider adding more significant digits to your entered values.")} ## now standardize @@ -51,19 +59,24 @@ test_pse <- function(est_eff, rxcvGz = as.numeric(Gz_pse[1]) rycvGz = as.numeric(Gz_pse[2]) - # convert conditional correlations to unconditional correlations to be used in new regression + # convert conditional correlations to unconditional + # correlations to be used in new regression rxcv = rxcvGz * sqrt((1 - rcvz^2) * (1 - rxz^2)) + rxz * rcvz rycv = rycvGz * sqrt((1 - rcvz^2) * (1 - rzy^2)) + rzy * rcvz - verify_pse_reg_M3 = verify_reg_Gzcv(n_obs, sdx, sdy, sdz, sdcv, rxy, rxz, rzy, rycv, rxcv, rcvz) - verfiy_pse_manual_thr = verify_manual(rxy, rxz, rxcv, ryz, rycv, rzcv, sdy, sdx) + verify_pse_reg_M3 = verify_reg_Gzcv(n_obs, sdx, sdy, sdz, sdcv, rxy, rxz, + rzy, rycv, rxcv, rcvz) + verfiy_pse_manual_thr = verify_manual(rxy, rxz, rxcv, ryz, rycv, + rzcv, sdy, sdx) cov_pse = verify_pse_reg_M3[[11]] # prepare some other values in the final Table (long output) R2_M3 = as.numeric(verify_pse_reg_M3[1]) - eff_x_M3 = as.numeric(verify_pse_reg_M3[2]) # should be equivalent or very close to eff_thr + eff_x_M3 = as.numeric(verify_pse_reg_M3[2]) + # should be equivalent or very close to eff_thr se_x_M3 = as.numeric(verify_pse_reg_M3[3]) - beta_x_M3 = as.numeric(verify_pse_reg_M3[9]) # should be equivalent or very close to thr + beta_x_M3 = as.numeric(verify_pse_reg_M3[9]) + # should be equivalent or very close to thr t_x_M3 = eff_x_M3 / se_x_M3 eff_z_M3 = as.numeric(verify_pse_reg_M3[4]) se_z_M3 = as.numeric(verify_pse_reg_M3[5]) @@ -72,30 +85,44 @@ test_pse <- function(est_eff, verify_pse_reg_M2 = verify_reg_Gz(n_obs, sdx, sdy, sdz, rxy, rxz, rzy) R2_M2 = as.numeric(verify_pse_reg_M2[1]) - eff_x_M2 = as.numeric(verify_pse_reg_M2[2]) # should be equivalent or very close to est_eff - se_x_M2 = as.numeric(verify_pse_reg_M2[3]) # should be equivalent or very close to std_err + eff_x_M2 = as.numeric(verify_pse_reg_M2[2]) + # should be equivalent or very close to est_eff + se_x_M2 = as.numeric(verify_pse_reg_M2[3]) + # should be equivalent or very close to std_err eff_z_M2 = as.numeric(verify_pse_reg_M2[4]) se_z_M2 = as.numeric(verify_pse_reg_M2[5]) t_x_M2 = eff_x_M2 / se_x_M2 verify_pse_reg_M1 = verify_reg_uncond(n_obs, sdx, sdy, rxy) - R2_M1 = as.numeric(verify_pse_reg_M1[1]) # should be equivalent or very close to rxy^2 - eff_x_M1 = as.numeric(verify_pse_reg_M1[2]) # should be equivalent or very close to rxy*sdy/sdx + R2_M1 = as.numeric(verify_pse_reg_M1[1]) + # should be equivalent or very close to rxy^2 + eff_x_M1 = as.numeric(verify_pse_reg_M1[2]) + # should be equivalent or very close to rxy*sdy/sdx se_x_M1 = as.numeric(verify_pse_reg_M1[3]) t_x_M1 = eff_x_M1 / se_x_M1 fTable <- matrix(c(R2_M1, R2_M2, R2_M3, # R2 for three reg models - eff_x_M1, eff_x_M2, eff_x_M3, # unstd reg coef for X in three reg models - se_x_M1, se_x_M2, se_x_M3, # unstd reg se for X in three reg models - rxy, ryxGz, beta_x_M3, # std reg coef for X in three reg models - t_x_M1, t_x_M2, t_x_M3, # t values for X in three reg models - NA, eff_z_M2, eff_z_M3, # reg coef for Z in three reg models - NA, se_z_M2, se_z_M3, # se for Z in three reg models - NA, eff_z_M2 / se_z_M2, eff_z_M3 / se_z_M3, # t for Z in three reg models, - NA, NA, eff_cv_M3, # reg coef for CV in three reg models - NA, NA, se_cv_M3, # se for CV in three reg models - NA, NA, eff_cv_M3 / se_cv_M3), # t for CV in three reg models - nrow = 11, ncol = 3, byrow = T) + eff_x_M1, eff_x_M2, eff_x_M3, + # unstd reg coef for X in three reg models + se_x_M1, se_x_M2, se_x_M3, + # unstd reg se for X in three reg models + rxy, ryxGz, beta_x_M3, + # std reg coef for X in three reg models + t_x_M1, t_x_M2, t_x_M3, + # t values for X in three reg models + NA, eff_z_M2, eff_z_M3, + # reg coef for Z in three reg models + NA, se_z_M2, se_z_M3, + # se for Z in three reg models + NA, eff_z_M2 / se_z_M2, eff_z_M3 / se_z_M3, + # t for Z in three reg models, + NA, NA, eff_cv_M3, + # reg coef for CV in three reg models + NA, NA, se_cv_M3, + # se for CV in three reg models + NA, NA, eff_cv_M3 / se_cv_M3), + # t for CV in three reg models + nrow = 11, ncol = 3, byrow = T) rownames(fTable) <- c("R2", "coef_X", "SE_X", "std_coef_X", "t_X", "coef_Z", "SE_Z", "t_Z", @@ -114,17 +141,22 @@ test_pse <- function(est_eff, } if (to_return == "print") { - cat("This function calculates the conditions that set the estimated effect approximately equal to the threshold while preserving the standard error.") + cat("This function calculates the conditions that set the + estimated effect approximately equal to the threshold + while preserving the standard error.") cat("\n") - cat(sprintf("The correlation between X and CV is %.3f, and the correlation between Y and CV is %.3f.", rxcv, rycv)) + cat(sprintf("The correlation between X and CV is %.3f, + and the correlation between Y and CV is %.3f.", rxcv, rycv)) cat("\n") - cat(sprintf("Conditional on the covariates, the correlation between X and CV is %.3f, and the correlation between Y and CV is %.3f.", rxcvGz, rycvGz)) + cat(sprintf("Conditional on the covariates, the correlation between X + and CV is %.3f, and the correlation between Y and CV + is %.3f.", rxcvGz, rycvGz)) cat("\n") - cat(sprintf("Including such CV, the coefficient changes to %.3f, and standard error is %.3f.", eff_x_M3, se_x_M3)) + cat(sprintf("Including such CV, the coefficient changes to %.3f, + and standard error is %.3f.", eff_x_M3, se_x_M3)) cat("\n") cat("Use to_return = raw_ouput to see more specific results.") } - + } - \ No newline at end of file From 91b62745d53fea1e67b7663dd3599803430db5b2 Mon Sep 17 00:00:00 2001 From: wwang93 <142034236+wwang93@users.noreply.github.com> Date: Tue, 21 Nov 2023 10:40:06 -0500 Subject: [PATCH 16/27] Update test_sensitivity.R Manual modification --- R/test_sensitivity.R | 261 +++++++++++++++++++++++-------------------- 1 file changed, 142 insertions(+), 119 deletions(-) diff --git a/R/test_sensitivity.R b/R/test_sensitivity.R index 8a160d30..35150591 100644 --- a/R/test_sensitivity.R +++ b/R/test_sensitivity.R @@ -1,25 +1,27 @@ # helpers for the core sensitivity analysis function create_konfound_class <- function(x) { - structure(x, class = "konfound") + structure(x, class = "konfound") } #' Concise summary of konfound output -#' @details Prints a concise summary of konfound output with multiple types of data specified in the to_return argument +#' @details Prints a concise summary of konfound output with multiple types +#' of data specified in the to_return argument #' @param object A `konfound` object #' @param ... Additional arguments #' @export summary.konfound <- function(object, ...) { - cat("Created", length(object), "forms of output. To access type: \n") - cat("\n") - - for (name in names(object)) { - cat(rlang::expr_text(substitute(object)), "$", name, "\n", sep = "") - } + cat("Created", length(object), "forms of output. To access type: \n") + cat("\n") + + for (name in names(object)) { + cat(rlang::expr_text(substitute(object)), "$", name, "\n", sep = "") + } } -# Main function to test sensitivity to be wrapped with pkonfound(), konfound(), and mkonfound() +# Main function to test sensitivity to be wrapped with pkonfound(), +# konfound(), and mkonfound() test_sensitivity <- function(est_eff, std_err, @@ -32,114 +34,135 @@ test_sensitivity <- function(est_eff, to_return, model_object, tested_variable) { - if (nu != 0) warning("You entered a non-zero null hypothesis about an effect; this is being interpreted in terms of a partial correlation. Sampling variability is not accounted for.") - - ## error message if input is inappropriate - if (!(std_err > 0)) {stop("Did not run! Standard error needs to be greater than zero.")} - if (!(n_obs > n_covariates + 3)) {stop("Did not run! There are too few observations relative to the number of observations and covariates. Please specify a less complex model to use KonFound-It.")} - - # calculating statistics used in every case - if (est_eff < 0) { - critical_t <- stats::qt(1 - (alpha / tails), n_obs - n_covariates - 3) * -1 - } - else { - critical_t <- stats::qt(1 - (alpha / tails), n_obs - n_covariates - 3) - } - - beta_threshold <- critical_t * std_err - # dealing with cases where hypotheses other than whether est_eff differs from 0 - if (nu != 0) { - est_eff <- abs(est_eff - nu) - } else { - est_eff <- est_eff - 0 - } # this is just to make what this is doing evident - - # for replacement of cases approach - - # calculating percentage of effect and number of observations to sustain or invalidate inference - if (abs(est_eff) > abs(beta_threshold)) { - bias <- 100 * (1 - (beta_threshold / est_eff)) - recase <- round(n_obs * (bias / 100)) - } - else if (abs(est_eff) < abs(beta_threshold)) { - sustain <- 100 * (1 - (est_eff / beta_threshold)) - recase <- round(n_obs * (sustain / 100)) - } - else if (est_eff == beta_threshold) { - stop("The coefficient is exactly equal to the threshold.") - } - - # for correlation-based approach - - # transforming t into r - obs_r <- (est_eff / std_err) / sqrt(((n_obs - n_covariates - 3) + ((est_eff / std_err)^2))) - # finding critical r - critical_r <- critical_t / sqrt((critical_t^2) + (n_obs - n_covariates - 3)) - # calculating threshold - if ((abs(obs_r) > abs(critical_r)) & ((obs_r * critical_r) > 0)) { - mp <- -1 - } else { - mp <- 1 - } - # calculating impact of the confounding variable - itcv <- (obs_r - critical_r) / (1 + mp * abs(critical_r)) - # finding correlation of confound to invalidate / sustain inference - r_con <- round(sqrt(abs(itcv)), 3) - - # if (component_correlations == FALSE){ - # rsq <- # has to come from some kind of model object - # varY <- # has to come from some kind of model object - # varX <- # has to come from some kind of model object - # sdX <- # has to come from some kind of model object - # - # rsqYZ = (((obs_r ^ 2) - Rsq) / ((obs_r ^ 2) - 1)) - # - # rsqXZ = max(0, 1 - ((VarY * (1 - RSQ))) / (VarX * (n_obs - n_covariates - 2) * (sdx * 2))) - # - # r_ycv = r_con * sqrt(1 - rsqYZ) - # r_xcv = r_con * sqrt(1 - rsqXZ) - # # before conditioning on observed covariates - # } - - # output dispatch - - if (length(to_return) > 1) { - to_return <- to_return[!(to_return == "print")] - - konfound_output <- purrr::map( - to_return, - ~ test_sensitivity( - est_eff = est_eff, - std_err = std_err, - n_obs = n_obs, - n_covariates = n_covariates, - alpha = alpha, - tails = tails, - nu = nu, - to_return = . - ) - ) - konfound_output <- create_konfound_class(konfound_output) - names(konfound_output) <- to_return - output_print(est_eff, beta_threshold, bias, sustain, nu, recase, obs_r, critical_r, r_con, itcv, alpha, index) - - cat("\n") - message(paste("Print output created by default. Created", length(konfound_output), "other forms of output. Use list indexing or run summary() on the output to see how to access.")) - - return(konfound_output) - } - - else if (to_return == "raw_output") { - return(output_df(est_eff, beta_threshold, est_eff, bias, sustain, recase, obs_r, critical_r, r_con, itcv)) - } else if (to_return == "thresh_plot") { # this still makes sense for NLMs (just not quite as accurate) - return(plot_threshold(beta_threshold = beta_threshold, est_eff = est_eff)) - } else if (to_return == "corr_plot") { - return(plot_correlation(r_con = r_con, obs_r = obs_r, critical_r = critical_r)) - } else if (to_return == "print") { - return(output_print(est_eff, beta_threshold, bias, sustain, nu, recase, obs_r, critical_r, r_con, itcv, alpha, index)) - } else if (to_return == "table") { - return(output_table(model_object, tested_variable)) - } else { - stop("to_return must be set to 'raw_output', 'print', 'table', 'thresh_plot', or 'corr_plot' or some combination thereof") - } + if (nu != 0) warning("You entered a non-zero null hypothesis about an + effect;this is being interpreted in terms of a partial + correlation.Sampling variability is not accounted for.") + + ## error message if input is inappropriate + if (!(std_err > 0)) {stop("Did not run! Standard error needs to + be greater than zero.")} + if (!(n_obs > n_covariates + 3)) + {stop("Did not run! There are too few observations relative to + the number of observations and covariates. Please specify a + less complex model to use KonFound-It.")} + + # calculating statistics used in every case + if (est_eff < 0) { + critical_t <- stats::qt(1 - (alpha / tails), + n_obs - n_covariates - 3) * -1 + } + else { + critical_t <- stats::qt(1 - (alpha / tails), n_obs - n_covariates - 3) + } + + beta_threshold <- critical_t * std_err + # dealing with cases where hypotheses other than whether + # est_eff differs from 0 + if (nu != 0) { + est_eff <- abs(est_eff - nu) + } else { + est_eff <- est_eff - 0 + } # this is just to make what this is doing evident + + # for replacement of cases approach + + # calculating percentage of effect and number of observations to + # sustain or invalidate inference + if (abs(est_eff) > abs(beta_threshold)) { + bias <- 100 * (1 - (beta_threshold / est_eff)) + recase <- round(n_obs * (bias / 100)) + } + else if (abs(est_eff) < abs(beta_threshold)) { + sustain <- 100 * (1 - (est_eff / beta_threshold)) + recase <- round(n_obs * (sustain / 100)) + } + else if (est_eff == beta_threshold) { + stop("The coefficient is exactly equal to the threshold.") + } + + # for correlation-based approach + + # transforming t into r + obs_r <- (est_eff / std_err) / sqrt(((n_obs - n_covariates - 3) + + ((est_eff / std_err)^2))) + # finding critical r + critical_r <- critical_t / sqrt((critical_t^2) + (n_obs - n_covariates - 3)) + # calculating threshold + if ((abs(obs_r) > abs(critical_r)) & ((obs_r * critical_r) > 0)) { + mp <- -1 + } else { + mp <- 1 + } + # calculating impact of the confounding variable + itcv <- (obs_r - critical_r) / (1 + mp * abs(critical_r)) + # finding correlation of confound to invalidate / sustain inference + r_con <- round(sqrt(abs(itcv)), 3) + + # if (component_correlations == FALSE){ + # rsq <- # has to come from some kind of model object + # varY <- # has to come from some kind of model object + # varX <- # has to come from some kind of model object + # sdX <- # has to come from some kind of model object + # + # rsqYZ = (((obs_r ^ 2) - Rsq) / ((obs_r ^ 2) - 1)) + # + # rsqXZ = max(0, 1 - ((VarY * (1 - RSQ))) / + # (VarX * (n_obs - n_covariates - 2) * (sdx * 2))) + # + # r_ycv = r_con * sqrt(1 - rsqYZ) + # r_xcv = r_con * sqrt(1 - rsqXZ) + # # before conditioning on observed covariates + # } + + # output dispatch + + if (length(to_return) > 1) { + to_return <- to_return[!(to_return == "print")] + + konfound_output <- purrr::map( + to_return, + ~ test_sensitivity( + est_eff = est_eff, + std_err = std_err, + n_obs = n_obs, + n_covariates = n_covariates, + alpha = alpha, + tails = tails, + nu = nu, + to_return = . + ) + ) + konfound_output <- create_konfound_class(konfound_output) + names(konfound_output) <- to_return + output_print(est_eff, beta_threshold, bias, sustain, nu, + recase, obs_r, critical_r, r_con, itcv, alpha, index) + + cat("\n") + message(paste( + "Print output created by default. Created", length(konfound_output), + "other forms of output. Use list indexing or run summary() on the + output to see how to access.")) + + return(konfound_output) + } + + else if (to_return == "raw_output") { + return(output_df(est_eff, beta_threshold, est_eff, bias, sustain, + recase, obs_r, critical_r, r_con, itcv)) + } else if (to_return == "thresh_plot") { + # this still makes sense for NLMs (just not quite as accurate) + return(plot_threshold(beta_threshold = beta_threshold, + est_eff = est_eff)) + } else if (to_return == "corr_plot") { + return(plot_correlation(r_con = r_con, obs_r = obs_r, + critical_r = critical_r)) + } else if (to_return == "print") { + return(output_print(est_eff, beta_threshold, bias, sustain, nu, recase, + obs_r, critical_r, r_con, itcv, alpha, index)) + } else if (to_return == "table") { + return(output_table(model_object, tested_variable)) + } else { + stop("to_return must be set to 'raw_output', 'print', 'table', + 'thresh_plot', or 'corr_plot' or some combination thereof") + } } From c049eef0b667b57a59df1369884fc9888c6bdfb3 Mon Sep 17 00:00:00 2001 From: wwang93 <142034236+wwang93@users.noreply.github.com> Date: Tue, 21 Nov 2023 14:03:53 -0500 Subject: [PATCH 17/27] Update test_sensitivity_ln.R Manual modification --- R/test_sensitivity_ln.R | 688 +++++++++++++++++++++------------------- 1 file changed, 370 insertions(+), 318 deletions(-) diff --git a/R/test_sensitivity_ln.R b/R/test_sensitivity_ln.R index cca907ae..4d13bd05 100644 --- a/R/test_sensitivity_ln.R +++ b/R/test_sensitivity_ln.R @@ -1,5 +1,5 @@ -# Main function to test sensitivity for non-linear models to be wrapped with pkonfound(), konfound(), and mkonfound() - +# Main function to test sensitivity for non-linear models to +# be wrapped with pkonfound(), konfound(), and mkonfound() test_sensitivity_ln <- function(est_eff, std_err, n_obs, @@ -13,347 +13,399 @@ test_sensitivity_ln <- function(est_eff, to_return, model_object, tested_variable) { - - ## error message if input is inappropriate - if (!(std_err > 0)) {stop("Did not run! Standard error needs to be greater than zero.")} - if (!(n_obs > n_covariates + 3)) {stop("Did not run! There are too few observations relative to the number of observations and covariates. Please specify a less complex model to use KonFound-It.")} - - if (est_eff < 0) { - thr_t <- stats::qt(1 - (alpha / tails), n_obs - n_covariates - 3) * -1 - } else { - thr_t <- stats::qt(1 - (alpha / tails), n_obs - n_covariates - 3) - } - - # stop message - if (n_obs <= 0 || n_treat <= 0) { - stop("Please enter positive integers for sample size and number of treatment group cases.") - } - if (n_obs <= n_treat) { - stop("The total sample size should be larger than the number of treatment group cases.") - } - - odds_ratio <- exp(est_eff) - - # updated approach to deal with imaginary - minse <- sqrt((4 * n_obs + - sqrt(16 * n_obs^2 + 4 * n_treat * (n_obs - n_treat) * - ((4 + 4 * odds_ratio^2) / odds_ratio - 7.999999)))/ - (2 * n_treat * (n_obs - n_treat))) - # check if the implied table solution may contain imaginary numbers - changeSE <- F - if (std_err < minse) { - haveimaginary <- T - changeSE <- T - user_std_err <- std_err - std_err <- minse - } + ## error message if input is inappropriate + if (!(std_err > 0)) {stop("Did not run! Standard error needs + to be greater than zero.")} + if (!(n_obs > n_covariates + 3)) + {stop("Did not run! There are too few observations relative to the + number of observations and covariates. Please specify a less + complex model to use KonFound-It.")} - # n_treat is the number of observations in the treatment group (c+d) - # n_cnt is the number of observations in the control group (a+b) - n_cnt <- n_obs - n_treat - # t_ob is the t value calculated based on observed estimate and standard error - t_ob <- est_eff / std_err - # invalidate_ob is true - the observed result is significant - we are invalidating the observed result - invalidate_ob <- isinvalidate(thr_t, t_ob) - # dcroddsratio_ob is true - our goal is to decrease the odds ratio - dcroddsratio_ob <- isdcroddsratio(thr_t, t_ob) - - # previous approach to deal with imaginary - # to record the original treatment cases in case we need to adjust it - # user_ntrm <- n_treat - # check if the implied table solution may contain imaginary numbers - # haveimaginary <- F - # changepi <- F - # set the default value for whether we need and can adjust pi (ratio of treatment cases) - # to remove the imaginary part - # keyimagin <- (4 + 4 * odds_ratio^2 + odds_ratio * - # (-8 + 4 * n_obs * std_err^2 - n_obs * n_treat * std_err^4 + n_treat^2 * std_err^4)) - # minimgain <- 4 + 4 * odds_ratio^2 + odds_ratio * (-8 + n_obs * std_err^2 * (4 - 0.25 * n_obs * std_err^2)) - # keyx1 <- 4 + 4 * odds_ratio^2 + odds_ratio * (-8 + 4 * n_obs * std_err^2) - # if (keyimagin > 0) { + if (est_eff < 0) { + thr_t <- stats::qt(1 - (alpha / tails), n_obs - n_covariates - 3) * -1 + } else { + thr_t <- stats::qt(1 - (alpha / tails), n_obs - n_covariates - 3) + } + + # stop message + if (n_obs <= 0 || n_treat <= 0) { + stop("Please enter positive integers for sample size and number of + treatment group cases.") + } + if (n_obs <= n_treat) { + stop("The total sample size should be larger than the number of + treatment group cases.") + } + + odds_ratio <- exp(est_eff) + + # updated approach to deal with imaginary + minse <- sqrt((4 * n_obs + + sqrt(16 * n_obs^2 + 4 * n_treat * (n_obs - n_treat) * + ((4 + 4 * odds_ratio^2) / + odds_ratio - 7.999999)))/ + (2 * n_treat * (n_obs - n_treat))) + # check if the implied table solution may contain imaginary numbers + changeSE <- F + if (std_err < minse) { + haveimaginary <- T + changeSE <- T + user_std_err <- std_err + std_err <- minse + } + + # n_treat is the number of observations in the treatment group (c+d) + # n_cnt is the number of observations in the control group (a+b) + n_cnt <- n_obs - n_treat + #t_ob is the t value calculated based on observed estimate and standard error + t_ob <- est_eff / std_err + # invalidate_ob is true - the observed result is significant + # - we are invalidating the observed result + invalidate_ob <- isinvalidate(thr_t, t_ob) + # dcroddsratio_ob is true - our goal is to decrease the odds ratio + dcroddsratio_ob <- isdcroddsratio(thr_t, t_ob) + + # previous approach to deal with imaginary + # to record the original treatment cases in case we need to adjust it + # user_ntrm <- n_treat + # check if the implied table solution may contain imaginary numbers + # haveimaginary <- F + # changepi <- F + # set the default value for whether we need and can adjust pi + # (ratio of treatment cases) + # to remove the imaginary part + # keyimagin <- (4 + 4 * odds_ratio^2 + odds_ratio * + # (-8 + 4 * n_obs * std_err^2 - n_obs * n_treat * std_err^4 + # + n_treat^2 * std_err^4)) + # minimgain <- 4 + 4 * odds_ratio^2 + odds_ratio * (-8 + n_obs * std_err^2 * + # (4 - 0.25 * n_obs * std_err^2)) + # keyx1 <- 4 + 4 * odds_ratio^2 + odds_ratio * (-8 + 4 * n_obs * std_err^2) + # if (keyimagin > 0) { # haveimaginary <- T # if (minimgain <= 0 && keyx1 > 0) { - # changepi <- T - # n_treat <- n_obs * get_pi(odds_ratio, std_err, n_obs, n_treat) - # n_cnt <- n_obs - n_treat - # } else { - # stop("Cannot generate a usable contingency table; Please consider using the Pearson's chi-squared approach (under development).") - # } - #} - - # a1, b1, c1, d1 are one solution for the 4 cells in the contingency table - a1 <- get_a1_kfnl(odds_ratio, std_err, n_obs, n_treat) - b1 <- n_cnt - a1 - c1 <- get_c1_kfnl(odds_ratio, std_err, n_obs, n_treat) - d1 <- n_treat - c1 - - # a2, b2, c2, d2 are the second solution for the 4 cells in the contingency table - a2 <- get_a2_kfnl(odds_ratio, std_err, n_obs, n_treat) - b2 <- n_cnt - a2 - c2 <- get_c2_kfnl(odds_ratio, std_err, n_obs, n_treat) - d2 <- n_treat - c2 - - # Differences between these two sets of solutions: - ### a1 c1 are small while a2 c2 are large - ### b1 d1 are large while b2 d2 are small - ### the goal is to get fewest swithes to invalidate the inference - ### remove the solution if one cell has fewerer than 5 cases or negative cells or nan cells - check1 <- check2 <- TRUE - if (!(n_cnt >= a1 && a1 >= 5 && n_cnt >= b1 && b1 >= 5 && n_treat >= c1 && c1 >= 5 && n_treat >= d1 && d1 >= 5) - || is.nan(a1) || is.nan(b1) || is.nan(c1) || is.nan(d1)) { - check1 <- FALSE - } - - if (!(n_cnt >= a2 && a2 >= 5 && n_cnt >= b2 && b2 >= 5 && n_treat >= c2 && c2 >= 5 && n_treat >= d2 && d2 >= 5) - || is.nan(a2) || is.nan(b2) || is.nan(c2) || is.nan(d2)) { - check2 <- FALSE - } - - if (check1) { - table_bstart1 <- get_abcd_kfnl(a1, b1, c1, d1) - solution1 <- getswitch(table_bstart1, thr_t, switch_trm, n_obs) - } - if (check2) { - table_bstart2 <- get_abcd_kfnl(a2, b2, c2, d2) - solution2 <- getswitch(table_bstart2, thr_t, switch_trm, n_obs) - } - if (!check1 && !check2) { - stop("Cannot generate a usable contingency table!") - } - - # get the number of switches for solutions that satisfy the requirements - if (check1 && check2) { - final_switch1 <- solution1$final_switch - final_switch2 <- solution2$final_switch - if (final_switch1 < final_switch2) { - final_solution <- getswitch(table_bstart1, thr_t, switch_trm, n_obs) - } else { - final_solution <- getswitch(table_bstart2, thr_t, switch_trm, n_obs) + # changepi <- T + # n_treat <- n_obs * get_pi(odds_ratio, std_err, n_obs, n_treat) + # n_cnt <- n_obs - n_treat + # } else { + # stop("Cannot generate a usable contingency table; Please consider + # using the Pearson's chi-squared approach (under development).") + # } + #} + + # a1, b1, c1, d1 are one solution for the 4 cells in the contingency table + a1 <- get_a1_kfnl(odds_ratio, std_err, n_obs, n_treat) + b1 <- n_cnt - a1 + c1 <- get_c1_kfnl(odds_ratio, std_err, n_obs, n_treat) + d1 <- n_treat - c1 + + # a2, b2, c2, d2 are the second solution for the 4 cells in the + # contingency table + a2 <- get_a2_kfnl(odds_ratio, std_err, n_obs, n_treat) + b2 <- n_cnt - a2 + c2 <- get_c2_kfnl(odds_ratio, std_err, n_obs, n_treat) + d2 <- n_treat - c2 + + # Differences between these two sets of solutions: + ### a1 c1 are small while a2 c2 are large + ### b1 d1 are large while b2 d2 are small + ### the goal is to get fewest swithes to invalidate the inference + ### remove the solution if one cell has fewerer than 5 cases + ### or negative cells or nan cells + check1 <- check2 <- TRUE + if (!(n_cnt >= a1 && a1 >= 5 && n_cnt >= b1 && b1 >= 5 && + n_treat >= c1 && c1 >= 5 && n_treat >= d1 && d1 >= 5) + || is.nan(a1) || is.nan(b1) || is.nan(c1) || is.nan(d1)) { + check1 <- FALSE + } + + if (!(n_cnt >= a2 && a2 >= 5 && n_cnt >= b2 && b2 >= 5 && + n_treat >= c2 && c2 >= 5 && n_treat >= d2 && d2 >= 5) + || is.nan(a2) || is.nan(b2) || is.nan(c2) || is.nan(d2)) { + check2 <- FALSE + } + + if (check1) { + table_bstart1 <- get_abcd_kfnl(a1, b1, c1, d1) + solution1 <- getswitch(table_bstart1, thr_t, switch_trm, n_obs) + } + if (check2) { + table_bstart2 <- get_abcd_kfnl(a2, b2, c2, d2) + solution2 <- getswitch(table_bstart2, thr_t, switch_trm, n_obs) + } + if (!check1 && !check2) { + stop("Cannot generate a usable contingency table!") + } + + # get the number of switches for solutions that satisfy the requirements + if (check1 && check2) { + final_switch1 <- solution1$final_switch + final_switch2 <- solution2$final_switch + if (final_switch1 < final_switch2) { + final_solution <- getswitch(table_bstart1, thr_t, switch_trm, n_obs) + } else { + final_solution <- getswitch(table_bstart2, thr_t, switch_trm, n_obs) + } + } + + if (check1 && !check2) { + final_solution <- getswitch(table_bstart1, thr_t, switch_trm, n_obs) } - } - - if (check1 && !check2) { - final_solution <- getswitch(table_bstart1, thr_t, switch_trm, n_obs) - } - - if (!check1 && check2) { - final_solution <- getswitch(table_bstart2, thr_t, switch_trm, n_obs) - } - - final <- final_solution$final_switch - a <- final_solution$table_start[1,1] - b <- final_solution$table_start[1,2] - c <- final_solution$table_start[2,1] - d <- final_solution$table_start[2,2] - - if (switch_trm && dcroddsratio_ob) { - transferway <- "treatment success to treatment failure," - RIR <- ceiling(final/((a+c)/n_obs))*(replace=="entire") + ceiling(final/(a/(a+b)))*(1-(replace=="entire")) - RIRway <- "treatment success" - } - if (switch_trm && !dcroddsratio_ob) { - transferway <- "treatment failure to treatment success," - RIR <- ceiling(final/((b+d)/n_obs))*(replace=="entire") + ceiling(final/(b/(a+b)))*(1-(replace=="entire")) - RIRway <- "treatment failure" - } - if (!switch_trm && dcroddsratio_ob) { - transferway <- "control failure to control success," - RIR <- ceiling(final/((b+d)/n_obs))*(replace=="entire") + ceiling(final/(b/(a+b)))*(1-(replace=="entire")) - RIRway <- "control failure" - } - if (!switch_trm && !dcroddsratio_ob) { - transferway <- "control success to control failure," - RIR <- ceiling(final/((a+c)/n_obs))*(replace=="entire") + ceiling(final/(a/(a+b)))*(1-(replace=="entire")) - RIRway <- "control success" - } - - if (final_solution$needtworows) { - final_extra <- final_solution$final_extra + + if (!check1 && check2) { + final_solution <- getswitch(table_bstart2, thr_t, switch_trm, n_obs) + } + + final <- final_solution$final_switch + a <- final_solution$table_start[1,1] + b <- final_solution$table_start[1,2] + c <- final_solution$table_start[2,1] + d <- final_solution$table_start[2,2] + if (switch_trm && dcroddsratio_ob) { - transferway_extra <- "control failure to control success," - RIR_extra <- ceiling(final_extra/((b+d)/n_obs))*(replace=="entire") + - ceiling(final_extra/(b/(b+d)))*(1-(replace=="entire")) - RIRway_extra <- "control failure" + transferway <- "treatment success to treatment failure," + RIR <- ceiling(final/((a+c)/n_obs))*(replace=="entire") + + ceiling(final/(a/(a+b)))*(1-(replace=="entire")) + RIRway <- "treatment success" } if (switch_trm && !dcroddsratio_ob) { - transferway_extra <- "control success to control failure," - RIR_extra <- ceiling(final_extra/((a+c)/n_obs))*(replace=="entire") + - ceiling(final_extra/(a/(a+b)))*(1-(replace=="entire")) - RIRway_extra <- "control success" + transferway <- "treatment failure to treatment success," + RIR <- ceiling(final/((b+d)/n_obs))*(replace=="entire") + + ceiling(final/(b/(a+b)))*(1-(replace=="entire")) + RIRway <- "treatment failure" } if (!switch_trm && dcroddsratio_ob) { - transferway_extra <- "treatment success to treatment failure," - RIR_extra <- ceiling(final_extra/((a+c)/n_obs))*(replace=="entire") + - ceiling(final_extra/(a/(a+b)))*(1-(replace=="entire")) - RIRway_extra <- "treatment success" + transferway <- "control failure to control success," + RIR <- ceiling(final/((b+d)/n_obs))*(replace=="entire") + + ceiling(final/(b/(a+b)))*(1-(replace=="entire")) + RIRway <- "control failure" } if (!switch_trm && !dcroddsratio_ob) { - transferway_extra <- "treatment failure to treatment success," - RIR_extra <- ceiling(final_extra/((b+d)/n_obs))*(replace=="entire") + - ceiling(final_extra/(b/(b+d)))*(1-(replace=="entire")) - RIRway_extra <- "treatment failure" + transferway <- "control success to control failure," + RIR <- ceiling(final/((a+c)/n_obs))*(replace=="entire") + + ceiling(final/(a/(a+b)))*(1-(replace=="entire")) + RIRway <- "control success" } - } - - if (invalidate_ob) { - change <- "To invalidate the inference," - } else { - if (est_eff >= 0) { - change <- "To sustain an inference for a positive treatment effect," - } else { - change <- "To sustain an inference for a negative treatment effect," + + if (final_solution$needtworows) { + final_extra <- final_solution$final_extra + if (switch_trm && dcroddsratio_ob) { + transferway_extra <- "control failure to control success," + RIR_extra <- ceiling(final_extra/((b+d)/n_obs))* + (replace=="entire") + + ceiling(final_extra/(b/(b+d)))*(1-(replace=="entire")) + RIRway_extra <- "control failure" + } + if (switch_trm && !dcroddsratio_ob) { + transferway_extra <- "control success to control failure," + RIR_extra <- ceiling(final_extra/((a+c)/n_obs))* + (replace=="entire") + + ceiling(final_extra/(a/(a+b)))*(1-(replace=="entire")) + RIRway_extra <- "control success" + } + if (!switch_trm && dcroddsratio_ob) { + transferway_extra <- "treatment success to treatment failure," + RIR_extra <- ceiling(final_extra/((a+c)/n_obs))* + (replace=="entire") + + ceiling(final_extra/(a/(a+b)))*(1-(replace=="entire")) + RIRway_extra <- "treatment success" + } + if (!switch_trm && !dcroddsratio_ob) { + transferway_extra <- "treatment failure to treatment success," + RIR_extra <- ceiling(final_extra/((b+d)/n_obs))* + (replace=="entire") + + ceiling(final_extra/(b/(b+d)))*(1-(replace=="entire")) + RIRway_extra <- "treatment failure" + } } - } - - if (!final_solution$needtworows & final_solution$final_switch > 1) { - conclusion1 <- paste( - change, sprintf("you would need to replace %d", RIR), RIRway, "cases") - - if (replace == "control") { - conclusion1a <- sprintf("with cases for which the probability of failure in the control group applies (RIR = %d).", RIR) + + if (invalidate_ob) { + change <- "To invalidate the inference," } else { - conclusion1a <- sprintf("with cases for which the probability of failure in the entire sample applies (RIR = %d).", RIR) + if (est_eff >= 0) { + change <- "To sustain an inference for a positive treatment effect," + } else { + change <- "To sustain an inference for a negative treatment effect," + } } - - conclusion1b <- paste( - sprintf("This is equivalent to transferring %d", final_solution$final_switch), - c("cases from"), transferway) - - conclusion1c <- "as shown, from the Implied Table to the Transfer Table." - } else if (!final_solution$needtworows & final_solution$final_switch == 1) { - conclusion1 <- paste( - change, sprintf("you would need to replace %d", RIR), RIRway, "cases") - - if (replace == "control") { - conclusion1a <- sprintf("with cases for which the probability of failure in the control group applies (RIR = %d).", RIR) + if (!final_solution$needtworows & final_solution$final_switch > 1) { + conclusion1 <- paste( + change, sprintf("you would need to replace %d", + RIR), RIRway, "cases") + + if (replace == "control") { + conclusion1a <- sprintf( + "with cases for which the probability of failure in the + control group applies (RIR = %d).", RIR) + } else { + conclusion1a <- sprintf( + "with cases for which the probability of failure in the entire + sample applies (RIR = %d).", RIR) + } + + conclusion1b <- paste( + sprintf("This is equivalent to transferring %d", + final_solution$final_switch), + c("cases from"), transferway) + + conclusion1c <- + "as shown, from the Implied Table to the Transfer Table." + + } else if (!final_solution$needtworows & final_solution$final_switch == 1) { + conclusion1 <- paste( + change, sprintf("you would need to replace %d", + RIR), RIRway, "cases") + + if (replace == "control") { + conclusion1a <- sprintf("with cases for which the probability of + failure in the control group applies + (RIR = %d).", RIR) + } else { + conclusion1a <- sprintf("with cases for which the probability of + failure in the entire sample applies + (RIR = %d).", RIR) + } + + conclusion1b <- paste( + sprintf("This is equivalent to transferring %d", + final_solution$final_switch), + c("case from"), transferway) + + conclusion1c <- + "as shown, from the Implied Table to the Transfer Table." + } else { - conclusion1a <- sprintf("with cases for which the probability of failure in the entire sample applies (RIR = %d).", RIR) + conclusion1 <- paste( + change, c( + "only transferring cases from"), transferway, "is not enough.") + + conclusion1b <- paste(sprintf("We also need to transfer %d cases from", + final_solution$final_extra), + transferway_extra, c("as shown, from the + User-entered Table to the + Transfer Table.")) + + conclusion1c <- paste(sprintf("This means we need to replace %d of", + RIR), RIRway, + sprintf("with null hypothesis cases; and + replace %d", RIR_extra), RIRway_extra, + c("with null hypothesis cases to change + the inference.") + ) } - conclusion1b <- paste( - sprintf("This is equivalent to transferring %d", final_solution$final_switch), - c("case from"), transferway) - - conclusion1c <- "as shown, from the Implied Table to the Transfer Table." - - } else { - conclusion1 <- paste( - change, c("only transferring cases from"), transferway, "is not enough.") - - conclusion1b <- paste(sprintf("We also need to transfer %d cases from", final_solution$final_extra), - transferway_extra, c("as shown, from the User-entered Table to the Transfer Table.")) + conclusion2 <- sprintf( + "For the Implied Table, we have an estimate of %.3f, + with a SE of %.3f and a t-ratio of %.3f.", + final_solution$est_eff_start, final_solution$std_err_start, + final_solution$t_start + ) - conclusion1c <- paste(sprintf("This means we need to replace %d of", RIR), RIRway, - sprintf("with null hypothesis cases; and replace %d", RIR_extra), RIRway_extra, - c("with null hypothesis cases to change the inference.") + conclusion3 <- sprintf( + "For the Transfer Table, we have an estimate of %.3f, + with a SE of %.3f and a t-ratio of %.3f.", + final_solution$est_eff_final, final_solution$std_err_final, + final_solution$t_final ) - } - - conclusion2 <- sprintf( - "For the Implied Table, we have an estimate of %.3f, with a SE of %.3f and a t-ratio of %.3f.", - final_solution$est_eff_start, final_solution$std_err_start, final_solution$t_start - ) - - conclusion3 <- sprintf( - "For the Transfer Table, we have an estimate of %.3f, with a SE of %.3f and a t-ratio of %.3f.", - final_solution$est_eff_final, final_solution$std_err_final, final_solution$t_final - ) - - notice <- "Note: Values have been rounded to the nearest integer." - noticeb <- "This may cause a little change to the estimated effect for the Implied Table." - - if (changeSE) { - notice_SE <- sprintf( - "In order to generate a usable implied contingency table, we increased the standard error to %.3f (the original one is %.3f).", - std_err, user_std_err) - } - - if (final_solution$needtworows) { - total_switch <- final_solution$final_switch+final_solution$final_extra - total_RIR <- RIR + RIR_extra - } else { - total_switch <- final_solution$final_switch - total_RIR <- RIR - } - - # result <- list(conclusion1, - # Implied_Table = final_solution$table_start, notice, Transfer_Table = final_solution$table_final, - # conclusion2, conclusion3, - # total_RIR = total_RIR, total_switch = total_switch - # ) - - # output dispatch - if (to_return == "raw_output") { + + notice <- "Note: Values have been rounded to the nearest integer." + noticeb <- + "This may cause a little change to the estimated + effect for the Implied Table." if (changeSE) { - result <- list(conclusion1, - conclusion1b, - conclusion1c, - Implied_Table = final_solution$table_start, - notice, - Transfer_Table = final_solution$table_final, - conclusion2, - conclusion3, - RIR = RIR, - notice_SE) + notice_SE <- sprintf( + "In order to generate a usable implied contingency table, we + increased the standard error to %.3f (the original one is %.3f).", + std_err, user_std_err) + } + + if (final_solution$needtworows) { + total_switch <- final_solution$final_switch+final_solution$final_extra + total_RIR <- RIR + RIR_extra } else { - result <- list(conclusion1, - conclusion1b, - conclusion1c, - Implied_Table = final_solution$table_start, - notice, - Transfer_Table = final_solution$table_final, - conclusion2, - conclusion3, - RIR = RIR) + total_switch <- final_solution$final_switch + total_RIR <- RIR } - - return(result) - } else if (to_return == "print") { + # result <- list(conclusion1, + # Implied_Table = final_solution$table_start, notice, + # Transfer_Table = final_solution$table_final, + # conclusion2, conclusion3, + # total_RIR = total_RIR, total_switch = total_switch + # ) - # cat(crayon::bold("Background Information:")) - # cat("\n") - # cat(info1) - # cat("\n") - # cat("\n") - cat(crayon::bold("Conclusion:")) - cat("\n") - cat(crayon::underline("User-entered Table:")) - cat("\n") - print(final_solution$table_start) - cat("\n") - if (changeSE) { - cat(notice_SE) - cat("\n") - } - cat(notice) - cat(noticeb) - cat("\n") - cat("\n") - cat(conclusion1) - cat("\n") - cat(conclusion1a) - cat("\n") - cat(conclusion1b) - cat("\n") - cat(conclusion1c) - cat("\n") - cat("\n") - cat(crayon::underline("Transfer Table:")) - cat("\n") - print(final_solution$table_final) - cat("\n") - cat(conclusion2) - cat("\n") - cat(conclusion3) - cat("\n") - cat("\n") - cat(crayon::bold("RIR:")) - cat("\n") - cat("RIR =", RIR) - cat("\n") - - } + # output dispatch + if (to_return == "raw_output") { + + if (changeSE) { + result <- list(conclusion1, + conclusion1b, + conclusion1c, + Implied_Table = final_solution$table_start, + notice, + Transfer_Table = final_solution$table_final, + conclusion2, + conclusion3, + RIR = RIR, + notice_SE) + } else { + result <- list(conclusion1, + conclusion1b, + conclusion1c, + Implied_Table = final_solution$table_start, + notice, + Transfer_Table = final_solution$table_final, + conclusion2, + conclusion3, + RIR = RIR) + } + + return(result) + + } else if (to_return == "print") { + + # cat(crayon::bold("Background Information:")) + # cat("\n") + # cat(info1) + # cat("\n") + # cat("\n") + cat(crayon::bold("Conclusion:")) + cat("\n") + cat(crayon::underline("User-entered Table:")) + cat("\n") + print(final_solution$table_start) + cat("\n") + if (changeSE) { + cat(notice_SE) + cat("\n") + } + cat(notice) + cat(noticeb) + cat("\n") + cat("\n") + cat(conclusion1) + cat("\n") + cat(conclusion1a) + cat("\n") + cat(conclusion1b) + cat("\n") + cat(conclusion1c) + cat("\n") + cat("\n") + cat(crayon::underline("Transfer Table:")) + cat("\n") + print(final_solution$table_final) + cat("\n") + cat(conclusion2) + cat("\n") + cat(conclusion3) + cat("\n") + cat("\n") + cat(crayon::bold("RIR:")) + cat("\n") + cat("RIR =", RIR) + cat("\n") + + } } From 6991bf852b7e122302cbdfaf6d56e4ef1e51f5a9 Mon Sep 17 00:00:00 2001 From: wwang93 <142034236+wwang93@users.noreply.github.com> Date: Tue, 21 Nov 2023 14:45:59 -0500 Subject: [PATCH 18/27] Update tkonfound.R Manual modification --- R/tkonfound.R | 482 ++++++++++++++++++++++++++------------------------ 1 file changed, 252 insertions(+), 230 deletions(-) diff --git a/R/tkonfound.R b/R/tkonfound.R index 1433418f..95bac426 100644 --- a/R/tkonfound.R +++ b/R/tkonfound.R @@ -4,254 +4,276 @@ tkonfound <- function(a, b, c, d, test = "fisher", replace = "control", to_return = to_return){ - # a <- 35 - # b <- 17 - # c <- 17 - # d <- 38 - # alpha <- 0.05 - # switch_trm <- T - # test <- "fisher" - - # stop message - if (a < 0 || b < 0 || c < 0 || d < 0) { - stop("Please enter non-negative integers for each cell.") - } - - if (a != as.integer(a) || b != as.integer(b) || c != as.integer(c) || d != as.integer(d)) { - stop("Please enter non-negative integers for each cell.") - } - - # use fisher if any of the cell is smaller than 5 - if (a < 5 || b < 5 || c < 5 || d < 5){ - test <- "fisher" - } - - # odds_ratio <- a*d/(b*c) - n_cnt <- a+b - n_trm <- c+d - n_obs <- n_cnt + n_trm - # est <- log(odds_ratio) - - # this is the 2 by 2 table we start with - table_ob <- matrix(c(a, b, c, d), byrow = TRUE, 2, 2) - - if (test == "fisher") { - p_ob <- fisher_p(a, b, c, d) - fisher_ob <- fisher_oddsratio(a, b, c, d) - } - if (test == "chisq") { - p_ob <- chisq_p(a, b, c, d) - chisq_ob <- chisq_value(a, b, c, d) - } - - # get solution - if (test == "chisq"){ - solution <- getswitch_chisq(a, b, c, d, alpha, switch_trm) - chisq_final <- solution$chisq_final - } - - if (test == "fisher"){ - solution <- getswitch_fisher(a, b, c, d, alpha, switch_trm) - fisher_final <- solution$fisher_final - } - - table_final <- solution$Transfer_Table - table_start <- table_ob - dcroddsratio_ob <- solution$dcroddsratio_ob - allnotenough <- solution$needtworows - final <- solution$final_switch - p_final <- solution$p_final - taylor_pred <- solution$taylor_pred - perc_bias_pred <- solution$perc_bias_pred - final_extra <- solution$final_extra - total_switch <- solution$total_switch - - ### add column and row names to contingency tables - rownames(table_start) <- rownames(table_final) <- c("Control", "Treatment") - colnames(table_start) <- colnames(table_final) <- c("Fail", "Success") - - if (switch_trm && dcroddsratio_ob) { - transferway <- "treatment success to treatment failure" - RIR <- ceiling(final/((a+c)/n_obs))*(replace=="entire") + ceiling(final/(a/(a+b)))*(1-(replace=="entire")) - RIRway <- "treatment success" - } - if (switch_trm && !dcroddsratio_ob) { - transferway <- "treatment failure to treatment success" - RIR <- ceiling(final/((b+d)/n_obs))*(replace=="entire") + ceiling(final/(b/(a+b)))*(1-(replace=="entire")) - RIRway <- "treatment failure" - } - if (!switch_trm && dcroddsratio_ob) { - transferway <- "control failure to control success" - RIR <- ceiling(final/((b+d)/n_obs))*(replace=="entire") + ceiling(final/(b/(a+b)))*(1-(replace=="entire")) - RIRway <- "control failure" - } - if (!switch_trm && !dcroddsratio_ob) { - transferway <- "control success to control failure" - RIR <- ceiling(final/((a+c)/n_obs))*(replace=="entire") + ceiling(final/(a/(a+b)))*(1-(replace=="entire")) - RIRway <- "control success" - } - - if (allnotenough) { + # a <- 35 + # b <- 17 + # c <- 17 + # d <- 38 + # alpha <- 0.05 + # switch_trm <- T + # test <- "fisher" + + # stop message + if (a < 0 || b < 0 || c < 0 || d < 0) { + stop("Please enter non-negative integers for each cell.") + } + + if (a != as.integer(a) || b != as.integer(b) || + c != as.integer(c) || d != as.integer(d)) { + stop("Please enter non-negative integers for each cell.") + } + + # use fisher if any of the cell is smaller than 5 + if (a < 5 || b < 5 || c < 5 || d < 5){ + test <- "fisher" + } + + # odds_ratio <- a*d/(b*c) + n_cnt <- a+b + n_trm <- c+d + n_obs <- n_cnt + n_trm + # est <- log(odds_ratio) + + # this is the 2 by 2 table we start with + table_ob <- matrix(c(a, b, c, d), byrow = TRUE, 2, 2) + + if (test == "fisher") { + p_ob <- fisher_p(a, b, c, d) + fisher_ob <- fisher_oddsratio(a, b, c, d) + } + if (test == "chisq") { + p_ob <- chisq_p(a, b, c, d) + chisq_ob <- chisq_value(a, b, c, d) + } + + # get solution + if (test == "chisq"){ + solution <- getswitch_chisq(a, b, c, d, alpha, switch_trm) + chisq_final <- solution$chisq_final + } + + if (test == "fisher"){ + solution <- getswitch_fisher(a, b, c, d, alpha, switch_trm) + fisher_final <- solution$fisher_final + } + + table_final <- solution$Transfer_Table + table_start <- table_ob + dcroddsratio_ob <- solution$dcroddsratio_ob + allnotenough <- solution$needtworows + final <- solution$final_switch + p_final <- solution$p_final + taylor_pred <- solution$taylor_pred + perc_bias_pred <- solution$perc_bias_pred + final_extra <- solution$final_extra + total_switch <- solution$total_switch + + ### add column and row names to contingency tables + rownames(table_start) <- rownames(table_final) <- c("Control", "Treatment") + colnames(table_start) <- colnames(table_final) <- c("Fail", "Success") + if (switch_trm && dcroddsratio_ob) { - transferway_extra <- "control failure to control success" - RIR_extra <- ceiling(final_extra/((b+d)/n_obs))*(replace=="entire") + - ceiling(final_extra/(b/(b+d)))*(1-(replace=="entire")) - RIRway_extra <- "control failure" + transferway <- "treatment success to treatment failure" + RIR <- ceiling(final/((a+c)/n_obs))*(replace=="entire") + + ceiling(final/(a/(a+b)))*(1-(replace=="entire")) + RIRway <- "treatment success" } if (switch_trm && !dcroddsratio_ob) { - transferway_extra <- "control success to control failure" - RIR_extra <- ceiling(final_extra/((a+c)/n_obs))*(replace=="entire") + - ceiling(final_extra/(a/(a+b)))*(1-(replace=="entire")) - RIRway_extra <- "control success" + transferway <- "treatment failure to treatment success" + RIR <- ceiling(final/((b+d)/n_obs))*(replace=="entire") + + ceiling(final/(b/(a+b)))*(1-(replace=="entire")) + RIRway <- "treatment failure" } if (!switch_trm && dcroddsratio_ob) { - transferway_extra <- "treatment success to treatment failure" - RIR_extra <- ceiling(final_extra/((a+c)/n_obs))*(replace=="entire") + - ceiling(final_extra/(a/(a+b)))*(1-(replace=="entire")) - RIRway_extra <- "treatment success" + transferway <- "control failure to control success" + RIR <- ceiling(final/((b+d)/n_obs))*(replace=="entire") + + ceiling(final/(b/(a+b)))*(1-(replace=="entire")) + RIRway <- "control failure" } if (!switch_trm && !dcroddsratio_ob) { - transferway_extra <- "treatment failure to treatment success" - RIR_extra <- ceiling(final_extra/((b+d)/n_obs))*(replace=="entire") + - ceiling(final_extra/(b/(b+d)))*(1-(replace=="entire")) - RIRway_extra <- "treatment failure" + transferway <- "control success to control failure" + RIR <- ceiling(final/((a+c)/n_obs))*(replace=="entire") + + ceiling(final/(a/(a+b)))*(1-(replace=="entire")) + RIRway <- "control success" } - } - - if (p_ob < alpha) { - change <- "To invalidate the inference, " - } else { - change <- "To sustain an inference, " - } - - if (!allnotenough & final > 1) { - conclusion1 <- paste0( - change, sprintf("you would need to replace %d ", RIR), RIRway) - if (replace == "control") { - conclusion1b <- paste0( - sprintf("cases for which the probability of failure in the control group applies (RIR = %d). ", RIR)) - } else { - conclusion1b <- paste0( - sprintf("cases for which the probability of failure in the entire group applies (RIR = %d). ", RIR)) + if (allnotenough) { + if (switch_trm && dcroddsratio_ob) { + transferway_extra <- "control failure to control success" + RIR_extra <- ceiling(final_extra/((b+d)/n_obs))* + (replace=="entire") + + ceiling(final_extra/(b/(b+d)))*(1-(replace=="entire")) + RIRway_extra <- "control failure" + } + if (switch_trm && !dcroddsratio_ob) { + transferway_extra <- "control success to control failure" + RIR_extra <- ceiling(final_extra/((a+c)/n_obs))* + (replace=="entire") + + ceiling(final_extra/(a/(a+b)))*(1-(replace=="entire")) + RIRway_extra <- "control success" + } + if (!switch_trm && dcroddsratio_ob) { + transferway_extra <- "treatment success to treatment failure" + RIR_extra <- ceiling(final_extra/((a+c)/n_obs))* + (replace=="entire") + + ceiling(final_extra/(a/(a+b)))*(1-(replace=="entire")) + RIRway_extra <- "treatment success" + } + if (!switch_trm && !dcroddsratio_ob) { + transferway_extra <- "treatment failure to treatment success" + RIR_extra <- ceiling(final_extra/((b+d)/n_obs))* + (replace=="entire") + + ceiling(final_extra/(b/(b+d)))*(1-(replace=="entire")) + RIRway_extra <- "treatment failure" + } } - conclusion1c <- paste0( - sprintf("This is equivalent to transferring %d", final), - " cases from ", transferway, "." - ) - } - - if (!allnotenough & final == 1) { - conclusion1 <- paste0( - change, sprintf("you would need to replace %d ", RIR), RIRway) - - if (replace == "control") { - conclusion1b <- paste0( - sprintf("cases for which the probability of failure in the control group applies (RIR = %d). ", RIR)) + if (p_ob < alpha) { + change <- "To invalidate the inference, " } else { - conclusion1b <- paste0( - sprintf("cases for which the probability of failure in the entire group applies (RIR = %d). ", RIR)) - } + change <- "To sustain an inference, " + } - conclusion1c <- paste0( - sprintf("This is equivalent to transferring %d", final), - " case from ", transferway, ".") - } - - if (allnotenough){ - conclusion1 <- paste( - change, c("only transferring cases from" ), transferway, - sprintf(" is not enough. We also need to transfer %d cases from ", final_extra)) + if (!allnotenough & final > 1) { + conclusion1 <- paste0( + change, sprintf("you would need to replace %d ", RIR), RIRway) + + if (replace == "control") { + conclusion1b <- paste0( + sprintf("cases for which the probability of failure in + the control group applies (RIR = %d). ", RIR)) + } else { + conclusion1b <- paste0( + sprintf("cases for which the probability of failure in + the entire group applies (RIR = %d). ", RIR)) + } + + conclusion1c <- paste0( + sprintf("This is equivalent to transferring %d", final), + " cases from ", transferway, "." + ) + } - conclusion1b <- paste0( - transferway_extra, c("as shown, from the User-entered Table to the Transfer Table.")) + if (!allnotenough & final == 1) { + conclusion1 <- paste0( + change, sprintf("you would need to replace %d ", RIR), RIRway) + + if (replace == "control") { + conclusion1b <- paste0( + sprintf("cases for which the probability of failure in the + control group applies (RIR = %d). ", RIR)) + } else { + conclusion1b <- paste0( + sprintf("cases for which the probability of failure in the + entire group applies (RIR = %d). ", RIR)) + } + + conclusion1c <- paste0( + sprintf("This is equivalent to transferring %d", final), + " case from ", transferway, ".") + } - conclusion1c <- paste0(sprintf(" This means we need to replace %d of ", RIR), RIRway, - sprintf( "with null hypothesis cases; and replace %d ", RIR_extra), RIRway_extra, - c(" with null hypothesis cases to change the inference.")) - } - - if (test == "chisq"){ - conclusion2 <- sprintf( - "For the User-entered Table, we have a Pearson's chi square of %.3f, with p-value of %.3f:", chisq_ob, p_ob) - conclusion3 <- sprintf( - "For the Transfer Table, we have a Pearson's chi square of %.3f, with p-value of %.3f:", chisq_final, p_final) - } - - if (test == "fisher"){ - conclusion2 <- sprintf( - "For the User-entered Table, we have an estimated odds ratio of %.3f, with p-value of %.3f:", fisher_ob, p_ob) - conclusion3 <- sprintf( - "For the Transfer Table, we have an estimated odds ratio of %.3f, with p-value of %.3f:", fisher_final, p_final) - } - - info1 <- "This function calculates the number of cases that would have to be replaced" - info2 <- "with no effect cases (RIR) to invalidate an inference made about the association" - info3 <- "between the rows and columns in a 2x2 table." - info4 <- "One can also interpret this as switches from one cell to another, such as from" - info5 <- "the treatment success cell to the treatment failure cell." - - if (to_return == "raw_output") { + if (allnotenough){ + conclusion1 <- paste( + change, c("only transferring cases from" ), transferway, + sprintf(" is not enough. We also need to transfer %d cases from ", + final_extra)) + + conclusion1b <- paste0( + transferway_extra, c("as shown, from the User-entered Table + to the Transfer Table.")) + + conclusion1c <- paste0(sprintf(" This means we need to replace %d of ", + RIR), RIRway, + sprintf( "with null hypothesis cases; and + replace %d ", RIR_extra), RIRway_extra, + c(" with null hypothesis cases to change + the inference.")) + } - result <- list(info1, - info2, - conclusion1, - conclusion1b, - conclusion1c, - User_enter_value = table_start, - Transfer_Table = table_final, - conclusion2, - conclusion3, - RIR = RIR) + if (test == "chisq"){ + conclusion2 <- sprintf( + "For the User-entered Table, we have a Pearson's chi square of %.3f, + with p-value of %.3f:", chisq_ob, p_ob) + conclusion3 <- sprintf( + "For the Transfer Table, we have a Pearson's chi square of %.3f, + with p-value of %.3f:", chisq_final, p_final) + } - return(result) + if (test == "fisher"){ + conclusion2 <- sprintf( + "For the User-entered Table, we have an estimated odds ratio + of %.3f, with p-value of %.3f:", fisher_ob, p_ob) + conclusion3 <- sprintf( + "For the Transfer Table, we have an estimated odds ratio of + %.3f, with p-value of %.3f:", fisher_final, p_final) + } - } else if (to_return == "print") { + info1 <- "This function calculates the number of cases that + would have to be replaced" + info2 <- "with no effect cases (RIR) to invalidate an inference + made about the association" + info3 <- "between the rows and columns in a 2x2 table." + info4 <- "One can also interpret this as switches from one cell + to another, such as from" + info5 <- "the treatment success cell to the treatment failure cell." - cat(crayon::bold("Background Information:")) - cat("\n") - cat(info1) - cat("\n") - cat(info2) - cat("\n") - cat(info3) - cat("\n") - cat(info4) - cat("\n") - cat(info5) - cat("\n") - cat("\n") - cat(crayon::bold("Conclusion:")) - cat("\n") - cat(conclusion1) - cat("\n") - cat(conclusion1b) - cat("\n") - cat(conclusion1c) - cat("\n") - cat(conclusion2) - cat("\n") - cat("\n") - cat(crayon::underline("User-entered Table:")) - cat("\n") - print(table_start) - cat("\n") - cat("\n") - cat(conclusion3) - cat("\n") - cat(crayon::underline("Transfer Table:")) - cat("\n") - print(table_final) - cat("\n") - cat(crayon::bold("RIR:")) - cat("\n") - cat("RIR =", RIR) - cat("\n") + if (to_return == "raw_output") { + + result <- list(info1, + info2, + conclusion1, + conclusion1b, + conclusion1c, + User_enter_value = table_start, + Transfer_Table = table_final, + conclusion2, + conclusion3, + RIR = RIR) + + return(result) + + } else if (to_return == "print") { + + cat(crayon::bold("Background Information:")) + cat("\n") + cat(info1) + cat("\n") + cat(info2) + cat("\n") + cat(info3) + cat("\n") + cat(info4) + cat("\n") + cat(info5) + cat("\n") + cat("\n") + cat(crayon::bold("Conclusion:")) + cat("\n") + cat(conclusion1) + cat("\n") + cat(conclusion1b) + cat("\n") + cat(conclusion1c) + cat("\n") + cat(conclusion2) + cat("\n") + cat("\n") + cat(crayon::underline("User-entered Table:")) + cat("\n") + print(table_start) + cat("\n") + cat("\n") + cat(conclusion3) + cat("\n") + cat(crayon::underline("Transfer Table:")) + cat("\n") + print(table_final) + cat("\n") + cat(crayon::bold("RIR:")) + cat("\n") + cat("RIR =", RIR) + cat("\n") + + } - } - } - - - From 1b9157e9bf6ff0eea897705d2b3b8c3b0f687959 Mon Sep 17 00:00:00 2001 From: wwang93 <142034236+wwang93@users.noreply.github.com> Date: Tue, 21 Nov 2023 14:54:32 -0500 Subject: [PATCH 19/27] Update tkonfound_fig.R Manual modification --- R/tkonfound_fig.R | 698 +++++++++++++++++++++++++--------------------- 1 file changed, 382 insertions(+), 316 deletions(-) diff --git a/R/tkonfound_fig.R b/R/tkonfound_fig.R index 61c380e5..a91a11aa 100644 --- a/R/tkonfound_fig.R +++ b/R/tkonfound_fig.R @@ -1,339 +1,405 @@ -#' Draw figures for change in effect size as a function of switching or replacing outcomes -#' @description This function returns two plots for change in effect size as a function of switching or replacing outcomes, one for all possibilities (switching), another zoomed in the area for positive RIR -#' @param a cell is the number of cases in the control group showing unsuccessful results -#' @param b cell is the number of cases in the control group showing successful results -#' @param c cell is the number of cases in the treatment group showing unsuccessful results -#' @param d cell is the number of cases in the treatment group showing successful results -#' @param thr_p the p-value threshold used to evaluate statistical significance, with the default of 0.05 -#' @param switch_trm whether switching the two cells in the treatment row or the two cells in the control row, with the default of the treatment row +#' Draw figures for change in effect size as a function of switching +#' or replacing outcomes +#' @description This function returns two plots for change in effect size +#' as a function of switching or replacing outcomes, one for all +#' possibilities (switching), another zoomed in the area for positive RIR +#' @param a cell is the number of cases in the control group showing +#' unsuccessful results +#' @param b cell is the number of cases in the control group showing +#' successful results +#' @param c cell is the number of cases in the treatment group showing +#' unsuccessful results +#' @param d cell is the number of cases in the treatment group showing +#' successful results +#' @param thr_p the p-value threshold used to evaluate statistical +#' significance, with the default of 0.05 +#' @param switch_trm whether switching the two cells in the treatment row +#' or the two cells in the control row, with the default of the treatment row #' @param test whether using Fisher's Exact Test (default) or chi-square test -#' @param replace whether using entire sample or the control group to calculate the base rate, with the default of the control group +#' @param replace whether using entire sample or the control group to calculate +#' the base rate, with the default of the control group #' @importFrom stats chisq.test -#' @return prints 2 figures for how number of hypothetical cases switched changes the effect size +#' @return prints 2 figures for how number of hypothetical cases switched +#' changes the effect size #' @examples #' # using tkonfound_fig for a study where 2 by 2 table is (35, 17, 17, 38) #' tkonfound_fig(35, 17, 17, 38) #' tkonfound_fig(35, 17, 17, 38, thr_p = 0.01) #' tkonfound_fig(35, 17, 17, 38, thr_p = 0.01, switch_trm = FALSE) -#' tkonfound_fig(35, 17, 17, 38, thr_p = 0.01, switch_trm = TRUE, test = "chisq") -#' tkonfound_fig(35, 17, 17, 38, thr_p = 0.01, switch_trm = TRUE, test = "chisq", replace = "entire") +#' tkonfound_fig(35, 17, 17, 38, thr_p = 0.01, switch_trm = TRUE, +#' test = "chisq") +#' tkonfound_fig(35, 17, 17, 38, thr_p = 0.01, switch_trm = TRUE, +#' test = "chisq", replace = "entire") #' #' @export #' -tkonfound_fig <- function(a, b, c, d, thr_p = 0.05, switch_trm = T, test = "fisher", replace = "control"){ - -n_obs <- a + b + c + d -###***generate the log odds for each step of switch -meta <- data.frame(matrix(ncol = 10, nrow = n_obs-3)) -colnames(meta) <- c("a", "b", "c", "d", "nobs", "switch", "logodds","cntrl_p","tr_p","pdif") -if (switch_trm == T) { - for (i in 1:(n_obs-3)){ - if (i <= a){ - #from table(1, a+b-1, c+d-1, 1) to table(a, b, c+d-1, 1) - meta$a[i] <- i - meta$b[i] <- a+b-i - meta$c[i] <- c+d-1 - meta$d[i] <- 1 - meta$nobs[i] <- meta$a[i] + meta$b[i] + meta$c[i] + meta$d[i] - meta$switch[i] <- -(d-1+a-i) +tkonfound_fig <- function(a, b, c, d, thr_p = 0.05, switch_trm = T, + test = "fisher", replace = "control"){ + + n_obs <- a + b + c + d + ###***generate the log odds for each step of switch + meta <- data.frame(matrix(ncol = 10, nrow = n_obs-3)) + colnames(meta) <- c("a", "b", "c", "d", "nobs", "switch", + "logodds","cntrl_p","tr_p","pdif") + if (switch_trm == T) { + for (i in 1:(n_obs-3)){ + if (i <= a){ + #from table(1, a+b-1, c+d-1, 1) to table(a, b, c+d-1, 1) + meta$a[i] <- i + meta$b[i] <- a+b-i + meta$c[i] <- c+d-1 + meta$d[i] <- 1 + meta$nobs[i] <- meta$a[i] + meta$b[i] + meta$c[i] + meta$d[i] + meta$switch[i] <- -(d-1+a-i) + } + if (i>a & i<=a+d-1){ + # from table(a, b, c+d-1, 1) to table(a, b, c, d) + meta$a[i] <- a + meta$b[i] <- b + meta$c[i] <- c+d-1-i+a + meta$d[i] <- 1+i-a + meta$nobs[i] <- meta$a[i] + meta$b[i] + meta$c[i] + meta$d[i] + meta$switch[i] <- -(a+d-1-i) + } + if (i>a+d-1 & i<=a+d+c-2){ + # from table(a, b, c, d) to table(a, b, 1, d+c-1) + meta$a[i] <- a + meta$b[i] <- b + meta$c[i] <- c-(i-a-d+1) + meta$d[i] <- d+(i-a-d+1) + meta$nobs[i] <- meta$a[i] + meta$b[i] + meta$c[i] + meta$d[i] + meta$switch[i] <- i-(a+d-1) + } + if (i>a+d+c-2 & i<=a+b+c+d-3){ + # from table(a, b, 1, d+c-1) to table(a+b-1, 1, 1, d+c-1) + meta$a[i] <- a+(i-a-d-c+2) + meta$b[i] <- b-(i-a-d-c+2) + meta$c[i] <- 1 + meta$d[i] <- c+d-1 + meta$nobs[i] <- meta$a[i] + meta$b[i] + meta$c[i] + meta$d[i] + meta$switch[i] <- i-(a+d-1) + } + } } - if (i>a & i<=a+d-1){ - # from table(a, b, c+d-1, 1) to table(a, b, c, d) - meta$a[i] <- a - meta$b[i] <- b - meta$c[i] <- c+d-1-i+a - meta$d[i] <- 1+i-a - meta$nobs[i] <- meta$a[i] + meta$b[i] + meta$c[i] + meta$d[i] - meta$switch[i] <- -(a+d-1-i) + if (switch_trm == F) { + for (i in 1:(n_obs-3)){ + if (i <= d){ + #from table(1, a+b-1, c+d-1, 1) to table(1, a+b-1, c, d) + meta$d[i] <- i + meta$c[i] <- d+c-i + meta$b[i] <- b+a-1 + meta$a[i] <- 1 + meta$nobs[i] <- meta$a[i] + meta$b[i] + meta$c[i] + meta$d[i] + meta$switch[i] <- -(a-1+d-i) + } + if (i>d & i<=a+d-1){ + # from table(1, a+b-1, c, d) to table(a, b, c, d) + meta$d[i] <- d + meta$c[i] <- c + meta$b[i] <- b+a-1-i+d + meta$a[i] <- 1+i-d + meta$nobs[i] <- meta$a[i] + meta$b[i] + meta$c[i] + meta$d[i] + meta$switch[i] <- -(a+d-1-i) + } + if (i>a+d-1 & i<=a+d+b-2){ + # from table(a, b, c, d) to table(a+b-1, 1, c, d) + meta$d[i] <- d + meta$c[i] <- c + meta$b[i] <- b-(i-a-d+1) + meta$a[i] <- a+(i-a-d+1) + meta$nobs[i] <- meta$a[i] + meta$b[i] + meta$c[i] + meta$d[i] + meta$switch[i] <- i-(a+d-1) + } + if (i>a+d+b-2 & i<=a+b+c+d-3){ + # from table(a+b-1, 1, c, d) to table(a+b-1, 1, 1, d+c-1) + meta$d[i] <- d+(i-d-a-b+2) + meta$c[i] <- c-(i-a-d-b+2) + meta$b[i] <- 1 + meta$a[i] <- b+a-1 + meta$nobs[i] <- meta$a[i] + meta$b[i] + meta$c[i] + meta$d[i] + meta$switch[i] <- i-(a+d-1) + } + } } - if (i>a+d-1 & i<=a+d+c-2){ - # from table(a, b, c, d) to table(a, b, 1, d+c-1) - meta$a[i] <- a - meta$b[i] <- b - meta$c[i] <- c-(i-a-d+1) - meta$d[i] <- d+(i-a-d+1) - meta$nobs[i] <- meta$a[i] + meta$b[i] + meta$c[i] + meta$d[i] - meta$switch[i] <- i-(a+d-1) + meta$cntrl_p <- meta$b/(meta$a + meta$b) + meta$tr_p <- meta$d/(meta$c + meta$d) + meta$pdif <- meta$tr_p - meta$cntrl_p + + ###***find out significant thresholds + if (test == "chisq"){ + solution <- getswitch_chisq(a, b, c, d, thr_p, switch_trm) + meta$logodds <- log(meta$a*meta$d/meta$c/meta$b) } - if (i>a+d+c-2 & i<=a+b+c+d-3){ - # from table(a, b, 1, d+c-1) to table(a+b-1, 1, 1, d+c-1) - meta$a[i] <- a+(i-a-d-c+2) - meta$b[i] <- b-(i-a-d-c+2) - meta$c[i] <- 1 - meta$d[i] <- c+d-1 - meta$nobs[i] <- meta$a[i] + meta$b[i] + meta$c[i] + meta$d[i] - meta$switch[i] <- i-(a+d-1) - } - } -} -if (switch_trm == F) { - for (i in 1:(n_obs-3)){ - if (i <= d){ - #from table(1, a+b-1, c+d-1, 1) to table(1, a+b-1, c, d) - meta$d[i] <- i - meta$c[i] <- d+c-i - meta$b[i] <- b+a-1 - meta$a[i] <- 1 - meta$nobs[i] <- meta$a[i] + meta$b[i] + meta$c[i] + meta$d[i] - meta$switch[i] <- -(a-1+d-i) + + if (test == "fisher"){ + solution <- getswitch_fisher(a, b, c, d, thr_p, switch_trm) + for (i in 1:(n_obs-3)){ + meta$logodds[i] <- log( + fisher_oddsratio(meta$a[i], meta$b[i], meta$c[i], meta$d[i])) + } } - if (i>d & i<=a+d-1){ - # from table(1, a+b-1, c, d) to table(a, b, c, d) - meta$d[i] <- d - meta$c[i] <- c - meta$b[i] <- b+a-1-i+d - meta$a[i] <- 1+i-d - meta$nobs[i] <- meta$a[i] + meta$b[i] + meta$c[i] + meta$d[i] - meta$switch[i] <- -(a+d-1-i) + + table_final <- solution$Transfer_Table + dcroddsratio_ob <- solution$dcroddsratio_ob + + meta$sig <- ifelse(meta$a==table_final[1,1] & + meta$b==table_final[1,2] & meta$c==table_final[2,1] & + meta$d==table_final[2,2],1,0) + + if (meta[meta$sig==1,]$logodds > 0){ + if (dcroddsratio_ob){#from sig to not sig by decreasing odds ratio + posinsig <- meta[meta$sig==1,]$switch + pos_thr <- (meta[meta$switch==posinsig,]$logodds+ + meta[meta$switch==(posinsig+1),]$logodds)/2 + pos_thr_pdif <- (meta[meta$switch==posinsig,]$pdif+ + meta[meta$switch==(posinsig+1),]$pdif)/2 + zoom_lower <- -(posinsig + 2) + meta$sigpoint <- ifelse(meta$switch==(posinsig+1), + "positive","other") + } else {#from not sig to sig by increasing positive effect + possig <- meta[meta$sig==1,]$switch + pos_thr <- (meta[meta$switch==possig,]$logodds+ + meta[meta$switch==(possig-1),]$logodds)/2 + pos_thr_pdif <- (meta[meta$switch==possig,]$pdif+ + meta[meta$switch==(possig-1),]$pdif)/2 + zoom_lower <- -(possig + 2) + meta$sigpoint <- ifelse(meta$switch==possig,"positive","other") + } + # find out the row that is cloest to logodds of 0 but negative + temp1 <- meta[meta$logodds<0,] + temp1 <- temp1[order(abs(temp1$logodds)),] + j <- 1 + if (test == "chisq"){ + while (chisq_p(temp1$a[j], temp1$b[j], + temp1$c[j], temp1$d[j])>thr_p){ + j <- j+1 + } + } + if (test == "fisher"){ + while (fisher_p(temp1$a[j], temp1$b[j], + temp1$c[j], temp1$d[j])>thr_p){ + j <- j+1 + } + } + neg_thr <- (temp1$logodds[j-1]+temp1$logodds[j])/2 + neg_thr_pdif <- (temp1$pdif[j-1]+temp1$pdif[j])/2 + zoom_upper <- -(temp1$switch[j]-2) + meta$sigpoint <- ifelse(meta$switch==temp1$switch[j], + "negative",meta$sigpoint) } - if (i>a+d-1 & i<=a+d+b-2){ - # from table(a, b, c, d) to table(a+b-1, 1, c, d) - meta$d[i] <- d - meta$c[i] <- c - meta$b[i] <- b-(i-a-d+1) - meta$a[i] <- a+(i-a-d+1) - meta$nobs[i] <- meta$a[i] + meta$b[i] + meta$c[i] + meta$d[i] - meta$switch[i] <- i-(a+d-1) + + if (meta[meta$sig==1,]$logodds < 0){ + if (dcroddsratio_ob){ # from not sig to sig by decreasing odds ratio + negsig <- meta[meta$sig==1,]$switch + neg_thr <- (meta[meta$switch==negsig,]$logodds+ + meta[meta$switch==(negsig+1),]$logodds)/2 + neg_thr_pdif <- (meta[meta$switch==negsig,]$pdif+ + meta[meta$switch==(negsig+1),]$pdif)/2 + zoom_upper <- -(negsig - 4) + meta$sigpoint <- ifelse(meta$switch==negsig,"negative","other") + } else { + # from sig to not sig by increasing odds ratio that is smaller than 1 + neginsig <- meta[meta$sig==1,]$switch + neg_thr <- (meta[meta$switch==neginsig,]$logodds+ + meta[meta$switch==(neginsig-1),]$logodds)/2 + neg_thr_pdif <- (meta[meta$switch==neginsig,]$pdif+ + meta[meta$switch==(neginsig-1),]$pdif)/2 + zoom_upper <- -(neginsig - 2) + meta$sigpoint <- ifelse(meta$switch==neginsig-1,"negative","other") + } + # find out the row that is cloest to logodds of 0 but positive + temp1 <- meta[meta$logodds>0,] + temp1 <- temp1[order(abs(temp1$logodds)),] + j <- 1 + if (test == "chisq"){ + while (chisq_p(temp1$a[j], temp1$b[j], + temp1$c[j], temp1$d[j])>thr_p){ + j <- j+1 + } + } + if (test == "fisher"){ + while (fisher_p(temp1$a[j], temp1$b[j], + temp1$c[j], temp1$d[j])>thr_p){ + j <- j+1 + } + } + pos_thr <- (temp1$logodds[j-1]+temp1$logodds[j])/2 + pos_thr_pdif <- (temp1$pdif[j-1]+temp1$pdif[j])/2 + zoom_lower <- -(temp1$switch[j]+2) + meta$sigpoint <- ifelse(meta$switch==temp1$switch[j], + "positive",meta$sigpoint) } - if (i>a+d+b-2 & i<=a+b+c+d-3){ - # from table(a+b-1, 1, c, d) to table(a+b-1, 1, 1, d+c-1) - meta$d[i] <- d+(i-d-a-b+2) - meta$c[i] <- c-(i-a-d-b+2) - meta$b[i] <- 1 - meta$a[i] <- b+a-1 - meta$nobs[i] <- meta$a[i] + meta$b[i] + meta$c[i] + meta$d[i] - meta$switch[i] <- i-(a+d-1) - } - } -} -meta$cntrl_p <- meta$b/(meta$a + meta$b) -meta$tr_p <- meta$d/(meta$c + meta$d) -meta$pdif <- meta$tr_p - meta$cntrl_p - -###***find out significant thresholds -if (test == "chisq"){ - solution <- getswitch_chisq(a, b, c, d, thr_p, switch_trm) - meta$logodds <- log(meta$a*meta$d/meta$c/meta$b) -} - -if (test == "fisher"){ - solution <- getswitch_fisher(a, b, c, d, thr_p, switch_trm) - for (i in 1:(n_obs-3)){ - meta$logodds[i] <- log(fisher_oddsratio(meta$a[i], meta$b[i], meta$c[i], meta$d[i])) - } -} - -table_final <- solution$Transfer_Table -dcroddsratio_ob <- solution$dcroddsratio_ob - -meta$sig <- ifelse(meta$a==table_final[1,1] & meta$b==table_final[1,2] & meta$c==table_final[2,1] & meta$d==table_final[2,2], - 1,0) - -if (meta[meta$sig==1,]$logodds > 0){ - if (dcroddsratio_ob){#from sig to not sig by decreasing odds ratio - posinsig <- meta[meta$sig==1,]$switch - pos_thr <- (meta[meta$switch==posinsig,]$logodds+meta[meta$switch==(posinsig+1),]$logodds)/2 - pos_thr_pdif <- (meta[meta$switch==posinsig,]$pdif+meta[meta$switch==(posinsig+1),]$pdif)/2 - zoom_lower <- -(posinsig + 2) - meta$sigpoint <- ifelse(meta$switch==(posinsig+1),"positive","other") - } else {#from not sig to sig by increasing positive effect - possig <- meta[meta$sig==1,]$switch - pos_thr <- (meta[meta$switch==possig,]$logodds+meta[meta$switch==(possig-1),]$logodds)/2 - pos_thr_pdif <- (meta[meta$switch==possig,]$pdif+meta[meta$switch==(possig-1),]$pdif)/2 - zoom_lower <- -(possig + 2) - meta$sigpoint <- ifelse(meta$switch==possig,"positive","other") + + meta$current <- ifelse(meta$switch==0, "current", "other") + meta$currentlabel <- ifelse(meta$switch==0, "current", NA) + meta$sigpoint <- ifelse(meta$switch==0, "current",meta$sigpoint) + + if (switch_trm && dcroddsratio_ob) { + # transferway <- "treatment success to treatment failure," + meta$switch <- meta$switch*(-1) } - # find out the row that is cloest to logodds of 0 but negative - temp1 <- meta[meta$logodds<0,] - temp1 <- temp1[order(abs(temp1$logodds)),] - j <- 1 - if (test == "chisq"){ - while (chisq_p(temp1$a[j], temp1$b[j], temp1$c[j], temp1$d[j])>thr_p){ - j <- j+1 + + if (!switch_trm && dcroddsratio_ob) { + #transferway <- "control failure to control success," + meta$switch <- meta$switch*(-1) } - } - if (test == "fisher"){ - while (fisher_p(temp1$a[j], temp1$b[j], temp1$c[j], temp1$d[j])>thr_p){ - j <- j+1 + + fillcol <-c("current"="white", + "positive"="green4","negative"="red","other"="white") + pointshape <- c("current"=15,"other"=21) + + if (switch_trm && dcroddsratio_ob) { + meta$RIR <- ceiling(meta$switch/((a+c)/n_obs))*(replace=="entire") + + ceiling(meta$switch/(a/(a+b)))*(1-(replace=="entire")) } - } - neg_thr <- (temp1$logodds[j-1]+temp1$logodds[j])/2 - neg_thr_pdif <- (temp1$pdif[j-1]+temp1$pdif[j])/2 - zoom_upper <- -(temp1$switch[j]-2) - meta$sigpoint <- ifelse(meta$switch==temp1$switch[j],"negative",meta$sigpoint) -} - -if (meta[meta$sig==1,]$logodds < 0){ - if (dcroddsratio_ob){ # from not sig to sig by decreasing odds ratio - negsig <- meta[meta$sig==1,]$switch - neg_thr <- (meta[meta$switch==negsig,]$logodds+meta[meta$switch==(negsig+1),]$logodds)/2 - neg_thr_pdif <- (meta[meta$switch==negsig,]$pdif+meta[meta$switch==(negsig+1),]$pdif)/2 - zoom_upper <- -(negsig - 4) - meta$sigpoint <- ifelse(meta$switch==negsig,"negative","other") - } else { # from sig to not sig by increasing odds ratio that is smaller than 1 - neginsig <- meta[meta$sig==1,]$switch - neg_thr <- (meta[meta$switch==neginsig,]$logodds+meta[meta$switch==(neginsig-1),]$logodds)/2 - neg_thr_pdif <- (meta[meta$switch==neginsig,]$pdif+meta[meta$switch==(neginsig-1),]$pdif)/2 - zoom_upper <- -(neginsig - 2) - meta$sigpoint <- ifelse(meta$switch==neginsig-1,"negative","other") - } - # find out the row that is cloest to logodds of 0 but positive - temp1 <- meta[meta$logodds>0,] - temp1 <- temp1[order(abs(temp1$logodds)),] - j <- 1 - if (test == "chisq"){ - while (chisq_p(temp1$a[j], temp1$b[j], temp1$c[j], temp1$d[j])>thr_p){ - j <- j+1 + if (switch_trm && !dcroddsratio_ob) { + meta$RIR <- ceiling(meta$switch/((b+d)/n_obs))*(replace=="entire") + + ceiling(meta$switch/(b/(a+b)))*(1-(replace=="entire")) } - } - if (test == "fisher"){ - while (fisher_p(temp1$a[j], temp1$b[j], temp1$c[j], temp1$d[j])>thr_p){ - j <- j+1 + if (!switch_trm && dcroddsratio_ob) { + meta$RIR <- ceiling(meta$switch/((b+d)/n_obs))*(replace=="entire") + + ceiling(meta$switch/(b/(a+b)))*(1-(replace=="entire")) } - } - pos_thr <- (temp1$logodds[j-1]+temp1$logodds[j])/2 - pos_thr_pdif <- (temp1$pdif[j-1]+temp1$pdif[j])/2 - zoom_lower <- -(temp1$switch[j]+2) - meta$sigpoint <- ifelse(meta$switch==temp1$switch[j],"positive",meta$sigpoint) -} - -meta$current <- ifelse(meta$switch==0, "current", "other") -meta$currentlabel <- ifelse(meta$switch==0, "current", NA) -meta$sigpoint <- ifelse(meta$switch==0, "current",meta$sigpoint) - -if (switch_trm && dcroddsratio_ob) { - # transferway <- "treatment success to treatment failure," - meta$switch <- meta$switch*(-1) -} - -if (!switch_trm && dcroddsratio_ob) { - #transferway <- "control failure to control success," - meta$switch <- meta$switch*(-1) -} - -fillcol <-c("current"="white","positive"="green4","negative"="red","other"="white") -pointshape <- c("current"=15,"other"=21) - -if (switch_trm && dcroddsratio_ob) { - meta$RIR <- ceiling(meta$switch/((a+c)/n_obs))*(replace=="entire") + ceiling(meta$switch/(a/(a+b)))*(1-(replace=="entire")) -} -if (switch_trm && !dcroddsratio_ob) { - meta$RIR <- ceiling(meta$switch/((b+d)/n_obs))*(replace=="entire") + ceiling(meta$switch/(b/(a+b)))*(1-(replace=="entire")) -} -if (!switch_trm && dcroddsratio_ob) { - meta$RIR <- ceiling(meta$switch/((b+d)/n_obs))*(replace=="entire") + ceiling(meta$switch/(b/(a+b)))*(1-(replace=="entire")) -} -if (!switch_trm && !dcroddsratio_ob) { - meta$RIR <- ceiling(meta$switch/((a+c)/n_obs))*(replace=="entire") + ceiling(meta$switch/(a/(a+b)))*(1-(replace=="entire")) -} - -meta$xaxis <- paste(meta$RIR,"\n","(", meta$switch, ")", sep = "") - -fig1 <- ggplot2::ggplot(meta, ggplot2::aes_string(x="RIR", y="pdif"))+ - ggplot2::geom_line(ggplot2::aes_string(y="pdif"), size = 1) + - ggplot2::geom_point(ggplot2::aes_string(y="pdif", shape = "current",fill = "sigpoint"))+ - ggplot2::scale_fill_manual(values=fillcol)+ - ggplot2::scale_shape_manual(values=pointshape)+ - ggrepel::geom_label_repel(ggplot2::aes_string(label="currentlabel"))+ - ggplot2::geom_hline(yintercept = pos_thr_pdif, linetype = "dashed", color="green4", size = 1)+ - ggplot2::geom_hline(yintercept = neg_thr_pdif, linetype = "dashed", color="red", size = 1)+ - ggplot2::scale_y_continuous(name="Difference in probability of successful outcome (treatment - control)")+ - ggplot2::scale_x_continuous(name="RIR (Fragility)", - breaks= c(meta[meta$switch==0,]$RIR, meta[meta$sigpoint=="negative",]$RIR, - meta[meta$sigpoint=="positive",]$RIR), - labels= c(meta[meta$switch==0,]$xaxis, meta[meta$sigpoint=="negative",]$xaxis, - meta[meta$sigpoint=="positive",]$xaxis)) + - ggplot2::theme(#axis.title = ggplot2::element_text(size = 15), - #axis.text= ggplot2::element_text(size = 14), - panel.grid.major = ggplot2::element_blank(), - panel.grid.minor = ggplot2::element_blank(), - panel.background = ggplot2::element_blank(), - axis.line = ggplot2::element_line(colour = "black"), - legend.position = "none") - -zoom <- meta[meta$switch<=zoom_upper & meta$switch>=zoom_lower,] -zoom <- zoom[zoom$switch>=0,] - -if (switch_trm && dcroddsratio_ob) { - zoom <- zoom[zoom$RIR<=d,] -} -if (switch_trm && !dcroddsratio_ob) { - zoom <- zoom[zoom$RIR<=c,] -} -if (!switch_trm && dcroddsratio_ob) { - zoom <- zoom[zoom$RIR<=a,] -} -if (!switch_trm && !dcroddsratio_ob) { - zoom <- zoom[zoom$RIR<=b,] -} - -zoom$label <- ifelse(zoom$sigpoint=="positive", - paste("sig pos:RIR=", zoom[zoom$sigpoint=="positive",]$RIR),NA) -zoom$label <- ifelse(zoom$sigpoint=="negative", - paste("sig neg:RIR=", zoom[zoom$sigpoint=="negative",]$RIR),zoom$label) -zoom$label <- ifelse(zoom$sigpoint=="current", - paste("current"),zoom$label) - -fig2 <- ggplot2::ggplot(zoom, ggplot2::aes_string(x="RIR",y="pdif"))+ - ggplot2::geom_line(ggplot2::aes_string(y="pdif"), size = 1) + - ggplot2::geom_point(ggplot2::aes_string(y="pdif", shape = "current",fill = "sigpoint"), - size = 1)+ - ggrepel::geom_label_repel(ggplot2::aes_string(label="label"))+ - ggplot2::scale_fill_manual(values=fillcol)+ - ggplot2::scale_shape_manual(values=pointshape)+ - ggplot2::scale_y_continuous(name="Difference in probability of successful outcome (treatment - control)")+ - ggplot2::scale_x_continuous(name="RIR (Fragility)", - breaks= c(zoom$RIR[1], zoom$RIR[as.integer(length(zoom$RIR)/2)], - zoom$RIR[as.integer(length(zoom$RIR))]), - labels= c(zoom$xaxis[1], zoom$xaxis[as.integer(length(zoom$RIR)/2)], - zoom$xaxis[as.integer(length(zoom$RIR))])) + - ggplot2::theme(panel.grid.major = ggplot2::element_blank(), - panel.grid.minor = ggplot2::element_blank(), - panel.background = ggplot2::element_blank(), - axis.line = ggplot2::element_line(colour = "black"), - legend.position = "none") - -if (pos_thr_pdif <= max(zoom$pdif) && pos_thr_pdif >= min(zoom$pdif)) { - fig2 <- fig2 + ggplot2::geom_hline(yintercept = pos_thr_pdif, linetype = "dashed", color="green4", size = 1) -} - -if (neg_thr_pdif <= max(zoom$pdif) && neg_thr_pdif >= min(zoom$pdif)) { - fig2 <- fig2 + ggplot2::geom_hline(yintercept = neg_thr_pdif, linetype = "dashed", color="red", size = 1) -} - -###plot figure 3 RIS% as sample size gets larger, using t statistic as the criterion -# if (plt3 == TRUE) { -# meta3 <- data.frame(matrix(ncol = 7, nrow = 11)) -# colnames(meta3) <- c("a", "b", "c", "d", "nobs", "RIS", "RISperc_total", "RISperc_D") -# for (i in 1:11){ -# meta3$nobs[i] <- size <- 2^(i+5) -# meta3$a[i] <- a_i <- round(size/n_obs*a) -# meta3$b[i] <- b_i <- round(size/n_obs*b) -# meta3$c[i] <- c_i <- round(size/n_obs*c) -# meta3$d[i] <- d_i <- round(size/n_obs*d) -# table_i <- get_abcd_kfnl(a_i, b_i, c_i, d_i) -# thr_i_t <- stats::qt(1 - thr_p/2, size - 1) -# meta3$RIS[i] <- RIS_i <- getswitch(table_i, thr_t_i, switch_trm, size)$final_switch + getswitch(table_i, thr_t_i, switch_trm, size)$final_extra -# meta3$RISperc[i] <- RISperc_i <- RIS_i/size*100 -# } -# fig3 <- ggplot2::ggplot(meta3, aes(x=nobs, y=RISperc))+ -# geom_line(size = 1)+ -# geom_point(size = 2.5)+ -# labs(title = "RIS as % of Sample Size")+ -# scale_x_continuous(name="Sample Size", labels=scales::comma)+ -# scale_y_continuous(name="RIS%") + -# theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 12), -# axis.text = element_text(size = 12)) -#} - -if (switch_trm == T) { - note <- "A bend in line indicates switches from the control row because the treatment row was exhausted." - } else { - note <- "A bend in line indicates switches from the treatment row because the control row was exhausted." - } - -result <- list(fig1, note, fig2) - -return(result) + if (!switch_trm && !dcroddsratio_ob) { + meta$RIR <- ceiling(meta$switch/((a+c)/n_obs))*(replace=="entire") + + ceiling(meta$switch/(a/(a+b)))*(1-(replace=="entire")) + } + + meta$xaxis <- paste(meta$RIR,"\n","(", meta$switch, ")", sep = "") + + fig1 <- ggplot2::ggplot(meta, ggplot2::aes_string(x="RIR", y="pdif"))+ + ggplot2::geom_line(ggplot2::aes_string(y="pdif"), size = 1) + + ggplot2::geom_point(ggplot2::aes_string( + y="pdif", shape = "current",fill = "sigpoint"))+ + ggplot2::scale_fill_manual(values=fillcol)+ + ggplot2::scale_shape_manual(values=pointshape)+ + ggrepel::geom_label_repel(ggplot2::aes_string(label="currentlabel"))+ + ggplot2::geom_hline( + yintercept = pos_thr_pdif, + linetype = "dashed", color="green4", size = 1)+ + ggplot2::geom_hline( + yintercept = neg_thr_pdif, + linetype = "dashed", color="red", size = 1)+ + ggplot2::scale_y_continuous( + name="Difference in probability of + successful outcome (treatment - control)")+ + ggplot2::scale_x_continuous(name="RIR (Fragility)", + breaks= c(meta[meta$switch==0,]$RIR, meta[ + meta$sigpoint=="negative",]$RIR, + meta[meta$sigpoint=="positive", + ]$RIR), + labels= c(meta[meta$switch==0,]$xaxis, + meta[meta$sigpoint=="negative", + ]$xaxis, + meta[meta$sigpoint=="positive", + ]$xaxis)) + + ggplot2::theme(#axis.title = ggplot2::element_text(size = 15), + #axis.text= ggplot2::element_text(size = 14), + panel.grid.major = ggplot2::element_blank(), + panel.grid.minor = ggplot2::element_blank(), + panel.background = ggplot2::element_blank(), + axis.line = ggplot2::element_line(colour = "black"), + legend.position = "none") + + zoom <- meta[meta$switch<=zoom_upper & meta$switch>=zoom_lower,] + zoom <- zoom[zoom$switch>=0,] + + if (switch_trm && dcroddsratio_ob) { + zoom <- zoom[zoom$RIR<=d,] + } + if (switch_trm && !dcroddsratio_ob) { + zoom <- zoom[zoom$RIR<=c,] + } + if (!switch_trm && dcroddsratio_ob) { + zoom <- zoom[zoom$RIR<=a,] + } + if (!switch_trm && !dcroddsratio_ob) { + zoom <- zoom[zoom$RIR<=b,] + } + + zoom$label <- ifelse(zoom$sigpoint=="positive", + paste("sig pos:RIR=", zoom[zoom$sigpoint=="positive", + ]$RIR),NA) + zoom$label <- ifelse(zoom$sigpoint=="negative", + paste("sig neg:RIR=", zoom[zoom$sigpoint=="negative", + ]$RIR),zoom$label) + zoom$label <- ifelse(zoom$sigpoint=="current", + paste("current"),zoom$label) + + fig2 <- ggplot2::ggplot(zoom, ggplot2::aes_string(x="RIR",y="pdif"))+ + ggplot2::geom_line(ggplot2::aes_string(y="pdif"), size = 1) + + ggplot2::geom_point(ggplot2::aes_string(y="pdif", shape = "current",fill + = "sigpoint"), + size = 1)+ + ggrepel::geom_label_repel(ggplot2::aes_string(label="label"))+ + ggplot2::scale_fill_manual(values=fillcol)+ + ggplot2::scale_shape_manual(values=pointshape)+ + ggplot2::scale_y_continuous(name="Difference in probability of + successful outcome (treatment - control)")+ + ggplot2::scale_x_continuous(name="RIR (Fragility)", + breaks= c(zoom$RIR[1], zoom$RIR[ + as.integer(length(zoom$RIR)/2)], + zoom$RIR[as.integer( + length(zoom$RIR))]), + labels= c(zoom$xaxis[1], zoom$xaxis[ + as.integer(length(zoom$RIR)/2)], + zoom$xaxis[as.integer( + length(zoom$RIR))])) + + ggplot2::theme(panel.grid.major = ggplot2::element_blank(), + panel.grid.minor = ggplot2::element_blank(), + panel.background = ggplot2::element_blank(), + axis.line = ggplot2::element_line(colour = "black"), + legend.position = "none") + + if (pos_thr_pdif <= max(zoom$pdif) && pos_thr_pdif >= min(zoom$pdif)) { + fig2 <- fig2 + ggplot2::geom_hline(yintercept = pos_thr_pdif, linetype + = "dashed", color="green4", size = 1) + } + + if (neg_thr_pdif <= max(zoom$pdif) && neg_thr_pdif >= min(zoom$pdif)) { + fig2 <- fig2 + ggplot2::geom_hline(yintercept = neg_thr_pdif, linetype + = "dashed", color="red", size = 1) + } + + ###plot figure 3 RIS% as sample size gets larger, + ### using t statistic as the criterion + # if (plt3 == TRUE) { + # meta3 <- data.frame(matrix(ncol = 7, nrow = 11)) + # colnames(meta3) <- c("a", "b", "c", "d", "nobs", "RIS", "RISperc_total", + # "RISperc_D") + # for (i in 1:11){ + # meta3$nobs[i] <- size <- 2^(i+5) + # meta3$a[i] <- a_i <- round(size/n_obs*a) + # meta3$b[i] <- b_i <- round(size/n_obs*b) + # meta3$c[i] <- c_i <- round(size/n_obs*c) + # meta3$d[i] <- d_i <- round(size/n_obs*d) + # table_i <- get_abcd_kfnl(a_i, b_i, c_i, d_i) + # thr_i_t <- stats::qt(1 - thr_p/2, size - 1) + # meta3$RIS[i] <- RIS_i <- getswitch(table_i, thr_t_i, + # switch_trm, size)$final_switch + getswitch(table_i, thr_t_i, + # switch_trm, size)$final_extra + # meta3$RISperc[i] <- RISperc_i <- RIS_i/size*100 + # } + # fig3 <- ggplot2::ggplot(meta3, aes(x=nobs, y=RISperc))+ + # geom_line(size = 1)+ + # geom_point(size = 2.5)+ + # labs(title = "RIS as % of Sample Size")+ + # scale_x_continuous(name="Sample Size", labels=scales::comma)+ + # scale_y_continuous(name="RIS%") + + # theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 12), + # axis.text = element_text(size = 12)) + #} + + if (switch_trm == T) { + note <- "A bend in line indicates switches from the control + row because the treatment row was exhausted." + } else { + note <- "A bend in line indicates switches from the treatment row + because the control row was exhausted." + } + + result <- list(fig1, note, fig2) + + return(result) } From 96e340d429d8bcc3c2b2ffaf3292212a718b13d9 Mon Sep 17 00:00:00 2001 From: wwang93 <142034236+wwang93@users.noreply.github.com> Date: Tue, 21 Nov 2023 14:56:23 -0500 Subject: [PATCH 20/27] Update zzz.R Manual modification --- R/zzz.R | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/R/zzz.R b/R/zzz.R index 32b5dce8..01d8f048 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1,8 +1,14 @@ -## quiets concerns (notes) of R CMD check re: the vars that are evaluated using non-standard evaluation -if (getRversion() >= "2.15.1") utils::globalVariables(c("inference", "key", "replace_null_cases", "percent_bias", "val")) +## quiets concerns (notes) of R CMD check re: the vars that are evaluated +## using non-standard evaluation +if (getRversion() >= "2.15.1") + utils::globalVariables(c("inference", "key", "replace_null_cases", + "percent_bias", "val")) .onAttach <- function(libname, pkgname) { - packageStartupMessage("Sensitivity analysis as described in Frank, Maroulis, Duong, and Kelcey (2013) and in Frank (2000).\nFor more information visit http://konfound-it.com.") + packageStartupMessage("Sensitivity analysis as described in Frank, + Maroulis, Duong, and Kelcey (2013) and + in Frank (2000).\nFor more information visit + http://konfound-it.com.") } #' Open interactive web application for konfound @@ -11,8 +17,10 @@ if (getRversion() >= "2.15.1") utils::globalVariables(c("inference", "key", "rep #' @export launch_shiny <- function() { - utils::browseURL("http://konfound-it.com") + utils::browseURL("http://konfound-it.com") } -# addresses concerns (notes) of R CMD check re: the vars that are evaluated using non-standard evaluation -# if (getRversion() >= "2.15.1") utils::globalVariables(c("itcv", "term", "unstd_beta1", "var_name", "x", "y")) +# addresses concerns (notes) of R CMD check re: the vars that are +# evaluated using non-standard evaluation +# if (getRversion() >= "2.15.1") +# utils::globalVariables(c("itcv", "term", "unstd_beta1", "var_name", "x", "y")) From c6b3a7069e7e4c75dbeed5d71fd8c71e14ad92a7 Mon Sep 17 00:00:00 2001 From: wwang93 <142034236+wwang93@users.noreply.github.com> Date: Tue, 21 Nov 2023 15:07:34 -0500 Subject: [PATCH 21/27] Update konfound.R line 162, there is a link can't break lines --- R/konfound.R | 226 +++++++++++++++++++++++++++------------------------ 1 file changed, 121 insertions(+), 105 deletions(-) diff --git a/R/konfound.R b/R/konfound.R index e9ea1a84..18fdce62 100644 --- a/R/konfound.R +++ b/R/konfound.R @@ -1,12 +1,22 @@ #' Perform sensitivity analysis on fitted models -#' @description For fitted models, this command calculates (1) how much bias there must be in an estimate to invalidate/sustain an inference; (2) the impact of an omitted variable necessary to invalidate/sustain an inference for a regression coefficient. Currently works for: models created with lm() (linear models). +#' @description For fitted models, this command calculates +#' (1) how much bias there must be in an estimate to invalidate/sustain +#' an inference; (2) the impact of an omitted variable necessary to +#' invalidate/sustain an inference for a regression coefficient. +#' Currently works for: models created with lm() (linear models). #' @param model_object output from a model (currently works for: lm) -#' @param tested_variable Variable associated with the unstandardized beta coefficient to be tested +#' @param tested_variable Variable associated with the unstandardized beta +#' coefficient to be tested #' @inheritParams pkonfound -#' @param index whether output is RIR or IT (impact threshold); defaults to "RIR" -#' @param two_by_two whether or not the tested variable is a dichotomous variable in a GLM; if so, the 2X2 table approach is used; only works for single variables at present (so test_all = TRUE will return an error) -#' @param test_all whether to carry out the sensitivity test for all of the coefficients (defaults to FALSE) -#' @return prints the bias and the number of cases that would have to be replaced with cases for which there is no effect to invalidate the inference +#' @param index whether output is RIR or IT (impact threshold); +#' defaults to "RIR" +#' @param two_by_two whether or not the tested variable is a dichotomous +#' variable in a GLM; if so, the 2X2 table approach is used; only works for +#' single variables at present (so test_all = TRUE will return an error) +#' @param test_all whether to carry out the sensitivity test for all of the +#' coefficients (defaults to FALSE) +#' @return prints the bias and the number of cases that would have to be +#' replaced with cases for which there is no effect to invalidate the inference #' @importFrom rlang .data #' @examples #' # using lm() for linear models @@ -32,7 +42,8 @@ #' konfound(m3, Days) #' } #' -#' m4 <- glm(outcome ~ condition, data = binary_dummy_data, family = binomial(link = "logit")) +#' m4 <- glm(outcome ~ condition, data = binary_dummy_data, +#' family = binomial(link = "logit")) #' konfound(m4, condition, two_by_two = TRUE, n_treat = 55) #' @@ -49,120 +60,125 @@ konfound <- function(model_object, n_treat = NULL, switch_trm = TRUE, replace = "control") { - - # Stop messages - if (!(class(model_object)[1] %in% c("lm", "glm", "lmerMod"))) { - stop("konfound() is currently implemented for models estimated with + + # Stop messages + if (!(class(model_object)[1] %in% c("lm", "glm", "lmerMod"))) { + stop("konfound() is currently implemented for models estimated with lm(), glm(), and lme4::lmer(); consider using pkonfound() instead") - } - - if ("table" %in% to_return & test_all == TRUE){ - stop("cannot return a table when test_all is set to TRUE") - } - # Dealing with non-standard evaluation - - # dealing with non-standard evaluation - #(so unquoted names for tested_variable can be used) - tested_variable_enquo <- rlang::enquo(tested_variable) - tested_variable_string <- rlang::quo_name(tested_variable_enquo) - - # Dispatching based on class - if (class(model_object)[1] == "lm") { - output <- konfound_lm( - model_object = model_object, - tested_variable_string = tested_variable_string, - test_all = test_all, - alpha = alpha, - tails = tails, - index = index, - to_return = to_return - ) + } - if (is.null(output)) { - - } else { - return(output) + if ("table" %in% to_return & test_all == TRUE){ + stop("cannot return a table when test_all is set to TRUE") } - } - - if (inherits(model_object, "glm") & two_by_two == FALSE) { - message("Note that for a non-linear model, + # Dealing with non-standard evaluation + + # dealing with non-standard evaluation + #(so unquoted names for tested_variable can be used) + tested_variable_enquo <- rlang::enquo(tested_variable) + tested_variable_string <- rlang::quo_name(tested_variable_enquo) + + # Dispatching based on class + if (class(model_object)[1] == "lm") { + output <- konfound_lm( + model_object = model_object, + tested_variable_string = tested_variable_string, + test_all = test_all, + alpha = alpha, + tails = tails, + index = index, + to_return = to_return + ) + + if (is.null(output)) { + + } else { + return(output) + } + } + + if (inherits(model_object, "glm") & two_by_two == FALSE) { + message("Note that for a non-linear model, impact threshold should not be interpreted.") - message("Note that this is only an approximation. For exact results + message("Note that this is only an approximation. For exact results in terms of the number of cases that must be switched from treatment success to treatment failure to invalidate the inference see: - https://msu.edu/~kenfrank/non%20linear%20replacement%20treatment.xlsm") - message("If a dichotomous independent variable is used, consider using + https://msu.edu/~kenfrank/non%20linear%20replacement%20treatment.xlsm") + message("If a dichotomous independent variable is used, consider using the 2X2 table approach enabled with the argument two_by_two = TRUE") - output <- konfound_glm( - model_object = model_object, - tested_variable_string = tested_variable_string, - test_all = test_all, - alpha = alpha, - tails = tails, - to_return = to_return - ) + output <- konfound_glm( + model_object = model_object, + tested_variable_string = tested_variable_string, + test_all = test_all, + alpha = alpha, + tails = tails, + to_return = to_return + ) + + return(output) + } - return(output) - } - - if (inherits(model_object, "glm") & two_by_two == TRUE) { - - if(is.null(n_treat)){ - stop("Please provide a value for + if (inherits(model_object, "glm") & two_by_two == TRUE) { + + if(is.null(n_treat)){ + stop("Please provide a value for n_treat to use this functionality with a dichotomous predictor") - } - if (test_all == TRUE) { - stop("test_all = TRUE is not supported when two_by_two is specified") + } + if (test_all == TRUE) { + stop("test_all = TRUE is not supported + when two_by_two is specified") + } + output <- konfound_glm_dichotomous( + model_object = model_object, + tested_variable_string = tested_variable_string, + test_all = test_all, + alpha = alpha, + tails = tails, + to_return = to_return, + n_treat = n_treat, + switch_trm = switch_trm, + replace = replace + ) + + return(output) + } - output <- konfound_glm_dichotomous( - model_object = model_object, - tested_variable_string = tested_variable_string, - test_all = test_all, - alpha = alpha, - tails = tails, - to_return = to_return, - n_treat = n_treat, - switch_trm = switch_trm, - replace = replace - ) - - return(output) - } - - if (inherits(model_object, "lmerMod")) { - output <- konfound_lmer( - model_object = model_object, - tested_variable_string = tested_variable_string, - test_all = test_all, - alpha = alpha, - tails = tails, - index = index, - to_return = to_return - ) - - message("Note that the Kenward-Roger approximation is used to + if (inherits(model_object, "lmerMod")) { + output <- konfound_lmer( + model_object = model_object, + tested_variable_string = tested_variable_string, + test_all = test_all, + alpha = alpha, + tails = tails, + index = index, + to_return = to_return + ) + + message("Note that the Kenward-Roger approximation is used to estimate degrees of freedom for the predictor(s) of interest. We are presently working to add other methods for calculating the degrees of freedom for the predictor(s) of interest. If you wish to use other methods now, consider those detailed here: https://bbolker.github.io/mixedmodels-misc/glmmFAQ.html#why-doesnt-lme4-display-denominator-degrees-of-freedomp-values-what-other-options-do-i-have. - You can then enter degrees of freedom obtained from another method along with the coefficient, - number of observations, and number of covariates to the pkonfound() function to quantify the robustness of the inference.") + You can then enter degrees of freedom obtained from another method + along with the coefficient, + number of observations, and number of covariates to the pkonfound() + function to quantify the robustness of the inference.") + + return(output) + } - return(output) - } - - if (!("table" %in% to_return)) { - message("For more detailed output, consider setting `to_return` to table") - } - - if (test_all == FALSE) { - message("To consider other predictors of interest, + if (!("table" %in% to_return)) { + message("For more detailed output, + consider setting `to_return` to table") + } + + if (test_all == FALSE) { + message("To consider other predictors of interest, consider setting `test_all` to TRUE.") - } else { - message("Note that presently these predictors of interest are tested - independently; future output may use the approach used in mkonfound.") - } + } else { + message("Note that presently these predictors of interest are tested + independently; future output may use the approach used + in mkonfound.") + } } From 4b6cafc60758e390d65c75c88fe6ad002766dd09 Mon Sep 17 00:00:00 2001 From: wwang93 <142034236+wwang93@users.noreply.github.com> Date: Tue, 21 Nov 2023 15:13:40 -0500 Subject: [PATCH 22/27] Update cop_pse_auxiliary.R "R:222:235 R:248 over 80 characters cause of calculation formula" --- R/cop_pse_auxiliary.R | 109 ++++++++++++++++++++++++++---------------- 1 file changed, 67 insertions(+), 42 deletions(-) diff --git a/R/cop_pse_auxiliary.R b/R/cop_pse_auxiliary.R index cc958b81..8425616f 100644 --- a/R/cop_pse_auxiliary.R +++ b/R/cop_pse_auxiliary.R @@ -21,9 +21,17 @@ cal_rxy <- function(ryxGz, rxz, ryz){ return(rxy) } -cal_delta_star <- function(FR2max, R2, R2_uncond, est_eff, eff_thr, var_x, var_y, est_uncond, rxz, n_obs){ +cal_delta_star <- function(FR2max, + R2, R2_uncond, + est_eff, + eff_thr, + var_x, + var_y, + est_uncond, + rxz, + n_obs){ if (FR2max > .99) {FR2max = .99} - # if (FR2max < R2 + inci) {FR2max = R2 + inci} check with Ken what this means + # if (FR2max < R2 + inci) {FR2max = R2 + inci}check with Ken what this means if (FR2max > R2) {D = sqrt(FR2max - R2)} #elements for computing Oster's delta_star @@ -35,7 +43,8 @@ cal_delta_star <- function(FR2max, R2, R2_uncond, est_eff, eff_thr, var_x, var_y t_x = var_x * (n_obs / (n_obs - 1)) * (1 - rxz^2) ## adjust df for var_x ## var_x is population variance, need sample variance from x - ## this adjustment is to get closer to what robomit generates as they run regression using the sample data + ## this adjustment is to get closer to what robomit generates as they + ## run regression using the sample data num1 = bt_m_b * rt_m_ro_t_syy * t_x num2 = bt_m_b * var_x * t_x * b0_m_b1^2 num3 = 2 * bt_m_b^2 * (t_x * b0_m_b1 * var_x) @@ -115,8 +124,8 @@ verify_reg_Gzcv = function(n_obs, sdx, sdy, sdz, sdcv, flag_cov <- tryCatch( expr = { lavaan::sem(model, - sample.cov = cov.matrix, - sample.nobs = n_obs) + sample.cov = cov.matrix, + sample.nobs = n_obs) }, error = function(e){ flag_cov = F @@ -130,18 +139,24 @@ verify_reg_Gzcv = function(n_obs, sdx, sdy, sdz, sdcv, #if model can be run to verify true delta, then run it can save results if (class(flag_cov) == "lavaan") { fit <- lavaan::sem(model, - sample.cov = cov.matrix, - sample.nobs = n_obs) + sample.cov = cov.matrix, + sample.nobs = n_obs) ## the R2 extracted from summary is NOT right, do the calculation below R2 <- (sdy^2 - lavaan::parameterEstimates(fit)[4,]$est) / sdy^2 - betaX <- lavaan::parameterEstimates(fit)[lavaan::parameterEstimates(fit)$label == 'beta1',]$est - seX <- lavaan::parameterEstimates(fit)[lavaan::parameterEstimates(fit)$label == 'beta1',]$se - betaZ <- lavaan::parameterEstimates(fit)[lavaan::parameterEstimates(fit)$label == 'beta2',]$est - seZ <- lavaan::parameterEstimates(fit)[lavaan::parameterEstimates(fit)$label == 'beta2',]$se - betaCV <- lavaan::parameterEstimates(fit)[lavaan::parameterEstimates(fit)$label == 'beta3',]$est - seCV <- lavaan::parameterEstimates(fit)[lavaan::parameterEstimates(fit)$label == 'beta3',]$se + betaX <- lavaan::parameterEstimates(fit)[ + lavaan::parameterEstimates(fit)$label == 'beta1',]$est + seX <- lavaan::parameterEstimates(fit)[ + lavaan::parameterEstimates(fit)$label == 'beta1',]$se + betaZ <- lavaan::parameterEstimates(fit)[ + lavaan::parameterEstimates(fit)$label == 'beta2',]$est + seZ <- lavaan::parameterEstimates(fit)[ + lavaan::parameterEstimates(fit)$label == 'beta2',]$se + betaCV <- lavaan::parameterEstimates(fit)[ + lavaan::parameterEstimates(fit)$label == 'beta3',]$est + seCV <- lavaan::parameterEstimates(fit)[ + lavaan::parameterEstimates(fit)$label == 'beta3',]$se } - + #get regression based on true delta in terms of standardized coefficent cor.matrix <- matrix(c(1,rxy, rzy, rcvy, rxy, 1, rxz, rcvx, @@ -153,8 +168,8 @@ verify_reg_Gzcv = function(n_obs, sdx, sdy, sdz, sdcv, flag_cor <- tryCatch( expr = { lavaan::sem(model, - sample.cov = cor.matrix, - sample.nobs = n_obs) + sample.cov = cor.matrix, + sample.nobs = n_obs) }, error = function(e){ flag_cor = F @@ -169,15 +184,21 @@ verify_reg_Gzcv = function(n_obs, sdx, sdy, sdz, sdcv, # if model can be run, then run it if (class(flag_cor) == "lavaan") { fit <- lavaan::sem(model, - sample.cov = cor.matrix, - sample.nobs = n_obs) + sample.cov = cor.matrix, + sample.nobs = n_obs) std_R2 <- 1 - lavaan::parameterEstimates(fit)[4,]$est - std_betaX <- lavaan::parameterEstimates(fit)[lavaan::parameterEstimates(fit)$label == 'beta1',]$est - std_seX <- lavaan::parameterEstimates(fit)[lavaan::parameterEstimates(fit)$label == 'beta1',]$se - std_betaZ <- lavaan::parameterEstimates(fit)[lavaan::parameterEstimates(fit)$label == 'beta2',]$est - std_seZ <- lavaan::parameterEstimates(fit)[lavaan::parameterEstimates(fit)$label == 'beta2',]$se - std_betaCV <- lavaan::parameterEstimates(fit)[lavaan::parameterEstimates(fit)$label == 'beta3',]$est - std_seCV <- lavaan::parameterEstimates(fit)[lavaan::parameterEstimates(fit)$label == 'beta3',]$se + std_betaX <- lavaan::parameterEstimates(fit)[ + lavaan::parameterEstimates(fit)$label == 'beta1',]$est + std_seX <- lavaan::parameterEstimates(fit)[ + lavaan::parameterEstimates(fit)$label == 'beta1',]$se + std_betaZ <- lavaan::parameterEstimates(fit)[ + lavaan::parameterEstimates(fit)$label == 'beta2',]$est + std_seZ <- lavaan::parameterEstimates(fit)[ + lavaan::parameterEstimates(fit)$label == 'beta2',]$se + std_betaCV <- lavaan::parameterEstimates(fit)[ + lavaan::parameterEstimates(fit)$label == 'beta3',]$est + std_seCV <- lavaan::parameterEstimates(fit)[ + lavaan::parameterEstimates(fit)$label == 'beta3',]$se } if (class(flag_cor) == "lavaan" && class(flag_cov) == "lavaan") { @@ -247,8 +268,8 @@ verify_reg_Gz = function(n_obs, sdx, sdy, sdz, rxy, rxz, rzy){ flag_cov <- tryCatch( expr = { lavaan::sem(model, - sample.cov = cov.matrix, - sample.nobs = n_obs) + sample.cov = cov.matrix, + sample.nobs = n_obs) }, error = function(e){ flag_cov = F @@ -262,16 +283,20 @@ verify_reg_Gz = function(n_obs, sdx, sdy, sdz, rxy, rxz, rzy){ #if model can be run to verify true delta, then run it can save results if (class(flag_cov) == "lavaan") { fit <- lavaan::sem(model, - sample.cov = cov.matrix, - sample.nobs = n_obs) + sample.cov = cov.matrix, + sample.nobs = n_obs) ## the R2 extracted from summary is NOT right, do the calculation below R2 <- (sdy^2 - lavaan::parameterEstimates(fit)[3,]$est) / sdy^2 - betaX <- lavaan::parameterEstimates(fit)[lavaan::parameterEstimates(fit)$label == 'beta1',]$est - seX <- lavaan::parameterEstimates(fit)[lavaan::parameterEstimates(fit)$label == 'beta1',]$se - betaZ <- lavaan::parameterEstimates(fit)[lavaan::parameterEstimates(fit)$label == 'beta2',]$est - seZ <- lavaan::parameterEstimates(fit)[lavaan::parameterEstimates(fit)$label == 'beta2',]$se - } - + betaX <- lavaan::parameterEstimates(fit)[ + lavaan::parameterEstimates(fit)$label == 'beta1',]$est + seX <- lavaan::parameterEstimates(fit)[ + lavaan::parameterEstimates(fit)$label == 'beta1',]$se + betaZ <- lavaan::parameterEstimates(fit)[ + lavaan::parameterEstimates(fit)$label == 'beta2',]$est + seZ <- lavaan::parameterEstimates(fit)[ + lavaan::parameterEstimates(fit)$label == 'beta2',]$se + } + if (class(flag_cov) == "lavaan") { result = list(R2, betaX, seX, betaZ, seZ) return(result) @@ -294,8 +319,8 @@ verify_reg_uncond = function(n_obs, sdx, sdy, rxy){ flag_cov <- tryCatch( expr = { lavaan::sem(model, - sample.cov = cov.matrix, - sample.nobs = n_obs) + sample.cov = cov.matrix, + sample.nobs = n_obs) }, error = function(e){ flag_cov = F @@ -309,12 +334,14 @@ verify_reg_uncond = function(n_obs, sdx, sdy, rxy){ #if model can be run to verify true delta, then run it can save results if (class(flag_cov) == "lavaan") { fit <- lavaan::sem(model, - sample.cov = cov.matrix, - sample.nobs = n_obs) + sample.cov = cov.matrix, + sample.nobs = n_obs) ## the R2 extracted from summary is NOT right, do the calculation below R2 <- (sdy^2 - lavaan::parameterEstimates(fit)[2,]$est) / sdy^2 - betaX <- lavaan::parameterEstimates(fit)[lavaan::parameterEstimates(fit)$label == 'beta1',]$est - seX <- lavaan::parameterEstimates(fit)[lavaan::parameterEstimates(fit)$label == 'beta1',]$se + betaX <- lavaan::parameterEstimates(fit)[ + lavaan::parameterEstimates(fit)$label == 'beta1',]$est + seX <- lavaan::parameterEstimates(fit)[ + lavaan::parameterEstimates(fit)$label == 'beta1',]$se } if (class(flag_cov) == "lavaan") { @@ -324,5 +351,3 @@ verify_reg_uncond = function(n_obs, sdx, sdy, rxy){ stop("Error!") } } - - From 49b663463d72501ca872e6fe35ca24c8f091b31d Mon Sep 17 00:00:00 2001 From: wwang93 <142034236+wwang93@users.noreply.github.com> Date: Tue, 21 Nov 2023 15:14:43 -0500 Subject: [PATCH 23/27] Update concord1.R Manual modification: no more than 80 characters per line of code --- R/concord1.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/R/concord1.R b/R/concord1.R index a406e2f1..9346f9a0 100644 --- a/R/concord1.R +++ b/R/concord1.R @@ -4,6 +4,7 @@ #' #' @docType data #' @name concord1 -#' @references Hamilton, Lawrence C. 1983. Saving water: A causal model of household conservation. Sociological Perspectives 26(4):355-374. +#' @references Hamilton, Lawrence C. 1983. Saving water: +#' A causal model of household conservation. Sociological Perspectives 26(4):355-374. #' @format A data.frame with 496 rows and 10 variables. -NULL \ No newline at end of file +NULL From 39f3418a22e86fe2412f1d31505edd3f4e467176 Mon Sep 17 00:00:00 2001 From: wwang93 <142034236+wwang93@users.noreply.github.com> Date: Tue, 21 Nov 2023 15:16:54 -0500 Subject: [PATCH 24/27] Update mkonfound-data.R Manual modification: no more than 80 characters per line of code --- R/mkonfound-data.R | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/R/mkonfound-data.R b/R/mkonfound-data.R index db063821..bf16f4f2 100644 --- a/R/mkonfound-data.R +++ b/R/mkonfound-data.R @@ -1,7 +1,9 @@ #' Example data for the mkonfound function #' -#' A dataset containing t and df values from example studies from Educational Evaluation -#' and Policy Analysis (as detailed in Frank et al., 2013): https://drive.google.com/file/d/1aGhxGjvMvEPVAgOA8rrxvA97uUO5TTMe/view +#' A dataset containing t and df values from example studies +#' from Educational Evaluation +#' and Policy Analysis (as detailed in Frank et al., 2013): +#' https://drive.google.com/file/d/1aGhxGjvMvEPVAgOA8rrxvA97uUO5TTMe/view #' #' @format A data frame with 30 rows and 2 variables: #' \describe{ @@ -9,5 +11,6 @@ #' \item{df}{degrees of freedom associated with the t value} #' ... #' } -#' @source \url{https://drive.google.com/file/d/1aGhxGjvMvEPVAgOA8rrxvA97uUO5TTMe/view} -"mkonfound_ex" \ No newline at end of file +#' @source \url +#' {https://drive.google.com/file/d/1aGhxGjvMvEPVAgOA8rrxvA97uUO5TTMe/view} +"mkonfound_ex" From 5aedde0cc225cc8deb5b7d31044f33b8b0db3a85 Mon Sep 17 00:00:00 2001 From: wwang93 <142034236+wwang93@users.noreply.github.com> Date: Tue, 21 Nov 2023 15:59:53 -0500 Subject: [PATCH 25/27] Update tkonfound_fig.R --- R/tkonfound_fig.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/tkonfound_fig.R b/R/tkonfound_fig.R index a91a11aa..0add9b3d 100644 --- a/R/tkonfound_fig.R +++ b/R/tkonfound_fig.R @@ -199,7 +199,7 @@ tkonfound_fig <- function(a, b, c, d, thr_p = 0.05, switch_trm = T, zoom_upper <- -(negsig - 4) meta$sigpoint <- ifelse(meta$switch==negsig,"negative","other") } else { - # from sig to not sig by increasing odds ratio that is smaller than 1 + # from sig to not sig by increasing odds ratio that is smaller than 1 neginsig <- meta[meta$sig==1,]$switch neg_thr <- (meta[meta$switch==neginsig,]$logodds+ meta[meta$switch==(neginsig-1),]$logodds)/2 From 2c428400c8d0f977c65002654f6f78cbfcce9424 Mon Sep 17 00:00:00 2001 From: wwang93 <142034236+wwang93@users.noreply.github.com> Date: Tue, 21 Nov 2023 16:00:39 -0500 Subject: [PATCH 26/27] Update tkonfound_fig.R --- R/tkonfound_fig.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/tkonfound_fig.R b/R/tkonfound_fig.R index 0add9b3d..4f7e5f65 100644 --- a/R/tkonfound_fig.R +++ b/R/tkonfound_fig.R @@ -199,7 +199,8 @@ tkonfound_fig <- function(a, b, c, d, thr_p = 0.05, switch_trm = T, zoom_upper <- -(negsig - 4) meta$sigpoint <- ifelse(meta$switch==negsig,"negative","other") } else { - # from sig to not sig by increasing odds ratio that is smaller than 1 + # from sig to not sig by increasing odds ratio + # that is smaller than 1 neginsig <- meta[meta$sig==1,]$switch neg_thr <- (meta[meta$switch==neginsig,]$logodds+ meta[meta$switch==(neginsig-1),]$logodds)/2 From c8f8a9d0d8f7670a813ce7371f90206825359475 Mon Sep 17 00:00:00 2001 From: wwang93 Date: Thu, 11 Jan 2024 00:29:34 -0500 Subject: [PATCH 27/27] For solving importing package issues --- .DS_Store | Bin 12292 -> 14340 bytes DESCRIPTION | 18 ++++---- NAMESPACE | 69 +++++++++++++++++++++++++++++-- R/cop_pse_auxiliary.R | 58 ++++++++++++++++++++++++++ R/helper_output_dataframe.R | 17 +++++++- R/helper_output_print.R | 26 ++++++++++++ R/helper_output_table.R | 18 ++++++++ R/helper_plot_correlation.R | 14 +++++++ R/helper_plot_threshold.R | 16 +++++++ R/konfound-glm-dichotomous.R | 20 +++++++++ R/konfound-glm.R | 21 ++++++++++ R/konfound-lm.R | 18 ++++++++ R/konfound-lmer.R | 27 ++++++++++++ R/konfound.R | 53 ++++++++++++++---------- R/mkonfound-data.R | 3 +- R/mkonfound.R | 45 +++++++++++--------- R/nonlinear_auxiliary.R | 19 +++++++++ R/pkonfound.R | 2 + R/test_cop.R | 29 ++++++++++++- R/test_sensitivity.R | 3 ++ R/test_sensitivity_ln.R | 26 ++++++++++++ R/tkonfound.R | 29 +++++++++++++ R/tkonfound_fig.R | 53 +++++++++++++----------- R/zzz.R | 12 +++++- man/cal_delta_star.Rd | 46 +++++++++++++++++++++ man/cal_rxy.Rd | 21 ++++++++++ man/cal_rxz.Rd | 25 +++++++++++ man/cal_ryz.Rd | 19 +++++++++ man/chisq_p.Rd | 23 +++++++++++ man/concord1.Rd | 3 +- man/get_kr_df.Rd | 17 ++++++++ man/konfound.Rd | 36 +++++++++------- man/konfound_glm.Rd | 40 ++++++++++++++++++ man/konfound_glm_dichotomous.Rd | 45 ++++++++++++++++++++ man/konfound_lm.Rd | 40 ++++++++++++++++++ man/konfound_lmer.Rd | 39 ++++++++++++++++++ man/mkonfound.Rd | 29 +++++++------ man/mkonfound_ex.Rd | 6 ++- man/output_df.Rd | 49 ++++++++++++++++++++++ man/output_print.Rd | 57 +++++++++++++++++++++++++ man/output_table.Rd | 23 +++++++++++ man/pkonfound.Rd | 71 ++++++++++++++++++++++---------- man/plot_correlation.Rd | 23 +++++++++++ man/plot_threshold.Rd | 21 ++++++++++ man/summary.konfound.Rd | 3 +- man/test_cop.Rd | 59 ++++++++++++++++++++++++++ man/test_sensitivity_ln.Rd | 58 ++++++++++++++++++++++++++ man/tkonfound.Rd | 52 +++++++++++++++++++++++ man/tkonfound_fig.Rd | 32 ++++++++------ man/verify_reg_Gz.Rd | 29 +++++++++++++ man/verify_reg_uncond.Rd | 23 +++++++++++ man/zzz.Rd | 9 ++++ 52 files changed, 1347 insertions(+), 147 deletions(-) create mode 100644 man/cal_delta_star.Rd create mode 100644 man/cal_rxy.Rd create mode 100644 man/cal_rxz.Rd create mode 100644 man/cal_ryz.Rd create mode 100644 man/chisq_p.Rd create mode 100644 man/get_kr_df.Rd create mode 100644 man/konfound_glm.Rd create mode 100644 man/konfound_glm_dichotomous.Rd create mode 100644 man/konfound_lm.Rd create mode 100644 man/konfound_lmer.Rd create mode 100644 man/output_df.Rd create mode 100644 man/output_print.Rd create mode 100644 man/output_table.Rd create mode 100644 man/plot_correlation.Rd create mode 100644 man/plot_threshold.Rd create mode 100644 man/test_cop.Rd create mode 100644 man/test_sensitivity_ln.Rd create mode 100644 man/tkonfound.Rd create mode 100644 man/verify_reg_Gz.Rd create mode 100644 man/verify_reg_uncond.Rd create mode 100644 man/zzz.Rd diff --git a/.DS_Store b/.DS_Store index 8e9ac42330a57c000ef19ae1c9b61e999f7f1895..f2290d73fce3cfe94a08260f93bde6a00b9c23a6 100644 GIT binary patch delta 2020 zcmd5-O-vI(6n@ifrJ==^Aioj2;tvV5*a8MfJQOJhO^mT59=u3P8^E;PrUeqx$Pz&> zBnpf-4M<4PgBqK?nP^NvJb0=J7d0M?@u(!63oq zCPe@s7sn$$0585;@lu~oJAkqo#$mN(bG2rr+5ST)bKy6EAborKgz~d0 zXbP~}p~W}7x;Ap+lA+2YoSg6kl=hl?f`bjPg&v`aHVWjMX7)&$d~N4RLvNT3@IV2Z; z5mgOrXebCtNPz^m@O>ONKME<%F|xB!n#npim08@;vsA)vT)JR*dN`d;Ay{4kwTpeN zHW)%@Bj~33oIs~1Ar7&9eWFqA6Eq;s{NJ5KSr@Mm{?E>CDMFLaB6FGDDr8n|coK%5 zX6tXN2+k$X<5zdPuy$e~m551+VVqNYSUK*!ggc)G57t5muPD&Fbq<;QQPaq@+hR<< zS{&($5*AZzL|=AR_KLZvO)a`E?U(@TVPq~;$2*|@<_~cJt0Dvmu9RW&K02o(tUE8y zZeH(S(|+&*YE)Hra%Kon`YP delta 270 zcmZoEXh~3DU|?W$DortDV9)?EIe-{M3-ADmHUR1sDXKNjo2jC$;)IcCr^|Wo?Ij=Kd~%~k$3VDMd!)3N)?+! zl|30Z?^01=oouDWKUr1#h$Jh-a1CxC?TYMHpk?owC-a-=g6%&5Qa0I!N16xd8j!~r cfV9En43mkI4~VHxmRB{|%%pLL5o+u~093p<3IG5A diff --git a/DESCRIPTION b/DESCRIPTION index bb611795..b730d51a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -32,28 +32,30 @@ Imports: crayon, dplyr, ggplot2, - mice, + ggrepel, + lavaan, + lme4, + margins, + pbkrtest, purrr, rlang, + stats, tidyr, tibble, - lavaan + utils Suggests: - margins, - pbkrtest, devtools, forcats, knitr, - lme4, rmarkdown, roxygen2, testthat, - ggrepel, - covr + covr, + mice VignetteBuilder: knitr Encoding: UTF-8 LazyData: true -RoxygenNote: 7.2.1 +RoxygenNote: 7.2.3 URL: https://github.com/jrosen48/konfound BugReports: https://github.com/jrosen48/konfound/issues Depends: diff --git a/NAMESPACE b/NAMESPACE index 63e34ff2..03342dad 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,9 +5,72 @@ export(konfound) export(launch_shiny) export(mkonfound) export(pkonfound) +export(test_cop) +export(test_sensitivity_ln) +export(tkonfound) export(tkonfound_fig) -import(dplyr) -import(rlang) -importFrom(rlang,.data) +import(ggplot2) +import(lavaan) +importFrom(broom,glance) +importFrom(broom,tidy) +importFrom(broom.mixed,tidy) +importFrom(crayon,bold) +importFrom(crayon,italic) +importFrom(crayon,underline) +importFrom(dplyr,bind_cols) +importFrom(dplyr,case_when) +importFrom(dplyr,filter) +importFrom(dplyr,mutate) +importFrom(dplyr,pull) +importFrom(dplyr,rename) +importFrom(dplyr,select) +importFrom(dplyr,tibble) +importFrom(ggplot2,aes) +importFrom(ggplot2,aes_string) +importFrom(ggplot2,annotate) +importFrom(ggplot2,element_blank) +importFrom(ggplot2,element_line) +importFrom(ggplot2,element_text) +importFrom(ggplot2,facet_grid) +importFrom(ggplot2,geom_col) +importFrom(ggplot2,geom_curve) +importFrom(ggplot2,geom_histogram) +importFrom(ggplot2,geom_hline) +importFrom(ggplot2,geom_line) +importFrom(ggplot2,geom_point) +importFrom(ggplot2,geom_segment) +importFrom(ggplot2,ggplot) +importFrom(ggplot2,ggtitle) +importFrom(ggplot2,scale_fill_manual) +importFrom(ggplot2,scale_shape_manual) +importFrom(ggplot2,scale_x_continuous) +importFrom(ggplot2,scale_y_continuous) +importFrom(ggplot2,sec_axis) +importFrom(ggplot2,theme) +importFrom(ggplot2,theme_bw) +importFrom(ggplot2,theme_void) +importFrom(ggplot2,xlab) +importFrom(ggplot2,ylab) +importFrom(ggrepel,geom_label_repel) +importFrom(lavaan,parameterEstimates) +importFrom(lavaan,sem) +importFrom(lme4,fixef) +importFrom(lme4,lmer) +importFrom(margins,margins) +importFrom(pbkrtest,get_Lb_ddf) +importFrom(purrr,map) +importFrom(purrr,map2_dfr) +importFrom(purrr,map_dbl) +importFrom(purrr,modify_if) +importFrom(rlang,"!!") +importFrom(rlang,enquo) +importFrom(rlang,quo_name) importFrom(stats,chisq.test) +importFrom(stats,cor) importFrom(stats,fisher.test) +importFrom(stats,glm) +importFrom(stats,qt) +importFrom(tibble,tribble) +importFrom(tidyr,gather) +importFrom(utils,browseURL) +importFrom(utils,globalVariables) diff --git a/R/cop_pse_auxiliary.R b/R/cop_pse_auxiliary.R index 8425616f..483c104e 100644 --- a/R/cop_pse_auxiliary.R +++ b/R/cop_pse_auxiliary.R @@ -1,3 +1,9 @@ +#' Calculate R2yz based on ryxGz and R2 +#' +#' @param ryxGz correlation coefficient between Y and X given Z +#' @param R2 coefficient of determination +#' @return R2yz value +#' @importFrom lavaan parameterEstimates cal_ryz <- function(ryxGz, R2){ R2yz = (ryxGz^2 - R2)/(ryxGz^2 - 1) if (R2yz >= 0) { @@ -8,6 +14,15 @@ cal_ryz <- function(ryxGz, R2){ return(ryz) } +#' Calculate R2xz based on variances and standard error +#' +#' @param var_x variance of X +#' @param var_y variance of Y +#' @param R2 coefficient of determination +#' @param df degrees of freedom +#' @param std_err standard error +#' @return R2xz value +#' @importFrom lavaan parameterEstimates cal_rxz <- function(var_x, var_y, R2, df, std_err){ R2xz = 1 - ((var_y * (1 - R2))/(var_x * df * std_err^2)) if (R2xz <= 0) {stop("Error! R2xz < 0!")} @@ -16,11 +31,32 @@ cal_rxz <- function(var_x, var_y, R2, df, std_err){ return(rxz) } +#' Calculate rxy based on ryxGz, rxz, and ryz +#' +#' @param ryxGz correlation coefficient between Y and X given Z +#' @param rxz correlation coefficient between X and Z +#' @param ryz correlation coefficient between Y and Z +#' @return rxy value +#' @importFrom lavaan parameterEstimates cal_rxy <- function(ryxGz, rxz, ryz){ rxy = ryxGz * sqrt((1 - rxz^2)*(1 - ryz^2)) + rxz * ryz return(rxy) } +#' Calculate delta star for sensitivity analysis +#' +#' @param FR2max maximum R2 +#' @param R2 current R2 +#' @param R2_uncond unconditional R2 +#' @param est_eff estimated effect +#' @param eff_thr effect threshold +#' @param var_x variance of X +#' @param var_y variance of Y +#' @param est_uncond unconditional estimate +#' @param rxz correlation coefficient between X and Z +#' @param n_obs number of observations +#' @return delta star value +#' @importFrom lavaan parameterEstimates cal_delta_star <- function(FR2max, R2, R2_uncond, est_eff, @@ -101,6 +137,7 @@ cal_delta_star <- function(FR2max, # see test_cop for updated approach to calculate delta exact + verify_reg_Gzcv = function(n_obs, sdx, sdy, sdz, sdcv, rxy, rxz, rzy, rcvy, rcvx, rcvz){ @@ -250,6 +287,18 @@ verify_manual <- function(rxy, rxz, rxcv, ryz, rycv, rzcv, sdy, sdx){ return(beta) } + +#' Verify regression model with control variable Z +#' +#' @param n_obs number of observations +#' @param sdx standard deviation of X +#' @param sdy standard deviation of Y +#' @param sdz standard deviation of Z +#' @param rxy correlation coefficient between X and Y +#' @param rxz correlation coefficient between X and Z +#' @param rzy correlation coefficient between Z and Y +#' @return list of model parameters +#' @importFrom lavaan sem parameterEstimates verify_reg_Gz = function(n_obs, sdx, sdy, sdz, rxy, rxz, rzy){ model <- 'Y ~ beta1 * X + beta2 * Z' @@ -305,6 +354,15 @@ verify_reg_Gz = function(n_obs, sdx, sdy, sdz, rxy, rxz, rzy){ } } + +#' Verify unconditional regression model +#' +#' @param n_obs number of observations +#' @param sdx standard deviation of X +#' @param sdy standard deviation of Y +#' @param rxy correlation coefficient between X and Y +#' @return list of model parameters +#' @importFrom lavaan sem parameterEstimates verify_reg_uncond = function(n_obs, sdx, sdy, rxy){ model <- 'Y ~ beta1 * X' diff --git a/R/helper_output_dataframe.R b/R/helper_output_dataframe.R index 8f6cdf40..deda4a7a 100644 --- a/R/helper_output_dataframe.R +++ b/R/helper_output_dataframe.R @@ -1,5 +1,20 @@ -# Function to output the data frame +#' Output data frame based on model estimates and thresholds +#' +#' @param est_eff estimated effect +#' @param beta_threshhold threshold for beta +#' @param unstd_beta unstandardized beta value +#' @param bias bias to change inference +#' @param sustain sustain to change inference +#' @param recase number of cases to replace null +#' @param obs_r observed correlation +#' @param critical_r critical correlation +#' @param r_con correlation for omitted variable +#' @param itcv inferential threshold for confounding variable +#' @param non_linear flag for non-linear models +#' @return data frame with model information +#' @importFrom dplyr tibble +# Function to output the data frame output_df <- function(est_eff, beta_threshhold, unstd_beta, diff --git a/R/helper_output_print.R b/R/helper_output_print.R index e2f944c8..824348b8 100644 --- a/R/helper_output_print.R +++ b/R/helper_output_print.R @@ -1,3 +1,29 @@ +#' Output printed text with formatting +#' +#' This function outputs printed text for various indices such as RIR +#' (Robustness of Inference to Replacement) +#' and IT (Impact Threshold for a Confounding Variable) with specific formatting +#' like bold, underline, and italic +#' using functions from the crayon package. It handles different scenarios based +#' on the effect difference, +#' beta threshold, and other parameters, providing formatted +#' output for each case. +#' +#' @param eff_diff The difference in the effect size being evaluated. +#' @param beta_threshhold The threshold value of beta, used for +#' statistical significance determination. +#' @param bias The percentage of the estimate that could be due to bias (optional). +#' @param sustain The percentage of the estimate necessary to sustain an inference (optional). +#' @param nu The hypothesized effect size used in replacement analysis. +#' @param recase The number of cases that need to be replaced to change the inference. +#' @param obs_r The observed correlation coefficient in the data. +#' @param critical_r The critical correlation coefficient for statistical significance. +#' @param r_con The correlation coefficient of an omitted variable with both the outcome and the predictor. +#' @param itcv The impact threshold for a confounding variable. +#' @param alpha The level of statistical significance. +#' @param index A character string indicating the index for which the output is generated ('RIR' or 'IT'). +#' @importFrom crayon bold underline italic + # Function to output printed text output_print <- function(eff_diff, diff --git a/R/helper_output_table.R b/R/helper_output_table.R index 254a7521..1f608da7 100644 --- a/R/helper_output_table.R +++ b/R/helper_output_table.R @@ -1,3 +1,21 @@ +#' Output a Tidy Table from a Model Object +#' +#' This function takes a model object and the tested variable, +#' tidies the model output using `broom::tidy`, +#' calculates the impact threshold for confounding variables (ITCV) and impact +#' for each covariate,and returns a rounded, tidy table of model outputs. +#' +#' @param model_object A model object from which to generate the output. +#' @param tested_variable The variable being tested in the model. +#' @return A tidy data frame containing model outputs, ITCV, +#' and impacts for covariates. +#' @importFrom broom tidy +#' @importFrom purrr modify_if +#' @importFrom stats cor +#' @importFrom dplyr select filter mutate +#' @importFrom rlang !! enquo + + # Function to output the data frame output_table <- function(model_object, tested_variable) { diff --git a/R/helper_plot_correlation.R b/R/helper_plot_correlation.R index 5a329b60..0bbce83d 100644 --- a/R/helper_plot_correlation.R +++ b/R/helper_plot_correlation.R @@ -1,3 +1,17 @@ +#' Plot Correlation Diagram +#' +#' This function creates a plot to illustrate the correlation between different +#' variables,specifically focusing on the confounding variable, predictor of +#' interest, and outcome.It uses ggplot2 for graphical representation. +#' +#' @param r_con Correlation coefficient related to the confounding variable. +#' @param obs_r Observed correlation coefficient. +#' @param critical_r Critical correlation coefficient for decision-making. +#' @return A ggplot object representing the correlation diagram. +#' @importFrom ggplot2 ggplot aes_string geom_segment geom_curve annotate +#' theme_void ggtitle + + # plot correlation plot_correlation <- function(r_con, obs_r, critical_r) { diff --git a/R/helper_plot_threshold.R b/R/helper_plot_threshold.R index 63b3de35..96506f38 100644 --- a/R/helper_plot_threshold.R +++ b/R/helper_plot_threshold.R @@ -1,3 +1,19 @@ +#' Plot Effect Threshold Diagram +#' +#' This function creates a plot to illustrate the threshold of an effect +#' estimate in relation to a specified beta threshold. It uses ggplot2 +#' for graphical representation. +#' +#' @param beta_threshold The threshold value for the effect. +#' @param est_eff The estimated effect size. +#' @return A ggplot object representing the effect threshold diagram. +#' @importFrom ggplot2 ggplot aes geom_col geom_hline annotate +#' scale_fill_manual theme_bw theme xlab ylab +#' @importFrom dplyr tibble mutate rename select filter pull +#' @importFrom tidyr gather + + + # Function to output the plot plot_threshold <- function(beta_threshold, est_eff) { diff --git a/R/konfound-glm-dichotomous.R b/R/konfound-glm-dichotomous.R index 6785e23e..6db9a419 100644 --- a/R/konfound-glm-dichotomous.R +++ b/R/konfound-glm-dichotomous.R @@ -1,3 +1,23 @@ +#' Konfound Analysis for Generalized Linear Models with Dichotomous Outcomes +#' +#' This function performs konfound analysis on a generalized linear model +#' object with a dichotomous outcome. It uses 'broom' to tidy model outputs +#' and calculates the sensitivity of inferences. +#' +#' @param model_object The model object produced by glm. +#' @param tested_variable_string The name of the variable being tested. +#' @param test_all Boolean indicating whether to test all variables or not. +#' @param alpha Significance level for hypothesis testing. +#' @param tails Number of tails for the test (1 or 2). +#' @param to_return The type of output to return. +#' @param n_treat Number of treatment cases. +#' @param switch_trm Term to switch for sensitivity analysis. +#' @param replace Boolean indicating whether to replace cases or not. +#' @return The results of the konfound analysis. +#' @importFrom broom tidy glance +#' @importFrom stats glm + + # konfound-glm konfound_glm_dichotomous <- function(model_object, diff --git a/R/konfound-glm.R b/R/konfound-glm.R index c65cc32e..fbdcaac3 100644 --- a/R/konfound-glm.R +++ b/R/konfound-glm.R @@ -1,3 +1,24 @@ +#' Konfound Analysis for Generalized Linear Models +#' +#' This function performs konfound analysis on a generalized linear model +#' object. It uses 'broom' to tidy model outputs and calculates the sensitivity +#' of inferences. It supports analysis for a single variable +#' or multiple variables. +#' +#' @param model_object The model object produced by glm. +#' @param tested_variable_string The name of the variable being tested. +#' @param test_all Boolean indicating whether to test all variables or not. +#' @param alpha Significance level for hypothesis testing. +#' @param tails Number of tails for the test (1 or 2). +#' @param index Type of sensitivity analysis ('RIR' by default). +#' @param to_return The type of output to return. +#' @return The results of the konfound analysis for the specified variable(s). +#' @importFrom broom tidy glance +#' @importFrom dplyr select filter bind_cols +#' @importFrom stats glm +#' @importFrom margins margins + + # konfound-glm konfound_glm <- function(model_object, diff --git a/R/konfound-lm.R b/R/konfound-lm.R index f0d9bd1b..31a4884e 100644 --- a/R/konfound-lm.R +++ b/R/konfound-lm.R @@ -1,3 +1,21 @@ +#' Konfound Analysis for Linear Models +#' +#' This function performs konfound analysis on a linear model object +#' produced by lm. +#' It calculates the sensitivity of inferences for coefficients in the model. +#' It supports analysis for a single variable or multiple variables. +#' +#' @param model_object The linear model object produced by lm. +#' @param tested_variable_string The name of the variable being tested. +#' @param test_all Boolean indicating whether to test all variables or not. +#' @param alpha Significance level for hypothesis testing. +#' @param tails Number of tails for the test (1 or 2). +#' @param index Type of sensitivity analysis ('RIR' by default). +#' @param to_return The type of output to return. +#' @return The results of the konfound analysis for the specified variable(s). +#' @importFrom broom tidy glance +#' @importFrom dplyr select filter bind_cols + # konfound-lm konfound_lm <- function(model_object, diff --git a/R/konfound-lmer.R b/R/konfound-lmer.R index 63906eac..c1280281 100644 --- a/R/konfound-lmer.R +++ b/R/konfound-lmer.R @@ -1,5 +1,14 @@ + # # # konfound-lmer +#' Extract Degrees of Freedom for Fixed Effects in a Linear Mixed-Effects Model +#' +#' @param model_object The mixed-effects model object produced by lme4::lmer. +#' @return A vector containing degrees of freedom for the fixed effects in the model. +#' @importFrom lme4 fixef +#' @importFrom pbkrtest get_Lb_ddf +#' @importFrom purrr map_dbl + get_kr_df <- function(model_object) { L <- diag(rep(1, length(lme4::fixef(model_object)))) L <- as.data.frame(L) @@ -9,6 +18,24 @@ get_kr_df <- function(model_object) { out } + +#' Konfound Analysis for Linear Mixed-Effects Models +#' +#' This function performs konfound analysis on a linear mixed-effects model +#' object produced by lme4::lmer. It calculates the sensitivity of inferences +#' for fixed effects in the model. It supports analysis for a single variable or multiple variables. +#' +#' @param model_object The mixed-effects model object produced by lme4::lmer. +#' @param tested_variable_string The name of the fixed effect being tested. +#' @param test_all Boolean indicating whether to test all fixed effects or not. +#' @param alpha Significance level for hypothesis testing. +#' @param tails Number of tails for the test (1 or 2). +#' @param index Type of sensitivity analysis ('RIR' by default). +#' @param to_return The type of output to return. +#' @return The results of the konfound analysis for the specified fixed effect(s). +#' @importFrom broom.mixed tidy +#' @importFrom dplyr filter bind_cols + konfound_lmer <- function(model_object, tested_variable_string, test_all, alpha, diff --git a/R/konfound.R b/R/konfound.R index 18fdce62..62fa74eb 100644 --- a/R/konfound.R +++ b/R/konfound.R @@ -1,23 +1,33 @@ -#' Perform sensitivity analysis on fitted models -#' @description For fitted models, this command calculates -#' (1) how much bias there must be in an estimate to invalidate/sustain -#' an inference; (2) the impact of an omitted variable necessary to -#' invalidate/sustain an inference for a regression coefficient. -#' Currently works for: models created with lm() (linear models). -#' @param model_object output from a model (currently works for: lm) -#' @param tested_variable Variable associated with the unstandardized beta -#' coefficient to be tested -#' @inheritParams pkonfound -#' @param index whether output is RIR or IT (impact threshold); -#' defaults to "RIR" -#' @param two_by_two whether or not the tested variable is a dichotomous -#' variable in a GLM; if so, the 2X2 table approach is used; only works for -#' single variables at present (so test_all = TRUE will return an error) -#' @param test_all whether to carry out the sensitivity test for all of the -#' coefficients (defaults to FALSE) -#' @return prints the bias and the number of cases that would have to be -#' replaced with cases for which there is no effect to invalidate the inference -#' @importFrom rlang .data +#' Konfound Analysis for Various Model Types +#' +#' Performs sensitivity analysis on fitted models including +#' linear models (`lm`), generalized linear models (`glm`), +#' and linear mixed-effects models (`lmerMod`). +#' It calculates the amount of bias required to invalidate or +#' sustain an inference,and the impact of an omitted variable +#' necessary to affect the inference. +#' +#' @param model_object A model object produced by `lm`, `glm`, or `lme4::lmer`. +#' @param tested_variable Variable associated with the coefficient to be tested. +#' @param alpha Significance level for hypothesis testing. +#' @param tails Number of tails for the test (1 or 2). +#' @param index Type of sensitivity analysis ('RIR' by default). +#' @param to_return Type of output to return ('print', 'raw_output', 'table'). +#' @param test_all Boolean; if `TRUE`, tests all coefficients. +#' @param two_by_two Boolean; if `TRUE`, uses a 2x2 table approach +#' for `glm` dichotomous variables. +#' @param n_treat Number of treatment cases +#' (used only if `two_by_two` is `TRUE`). +#' @param switch_trm Boolean; switch treatment and control in the analysis. +#' @param replace Replacement method for treatment cases ('control' by default). +#' @return Depending on `to_return`, prints the result, returns a raw output, +#' or a summary table. +#' @importFrom rlang enquo quo_name +#' @importFrom lme4 fixef lmer +#' @importFrom broom tidy glance +#' @importFrom dplyr filter select bind_cols +#' @importFrom purrr map_dbl +#' @importFrom pbkrtest get_Lb_ddf #' @examples #' # using lm() for linear models #' m1 <- lm(mpg ~ wt + hp, data = mtcars) @@ -42,8 +52,7 @@ #' konfound(m3, Days) #' } #' -#' m4 <- glm(outcome ~ condition, data = binary_dummy_data, -#' family = binomial(link = "logit")) +#' m4 <- glm(outcome ~ condition, data = binary_dummy_data, family = binomial(link = "logit")) #' konfound(m4, condition, two_by_two = TRUE, n_treat = 55) #' diff --git a/R/mkonfound-data.R b/R/mkonfound-data.R index bf16f4f2..6e5dd3a1 100644 --- a/R/mkonfound-data.R +++ b/R/mkonfound-data.R @@ -11,6 +11,5 @@ #' \item{df}{degrees of freedom associated with the t value} #' ... #' } -#' @source \url -#' {https://drive.google.com/file/d/1aGhxGjvMvEPVAgOA8rrxvA97uUO5TTMe/view} +#' @source \url{https://drive.google.com/file/d/1aGhxGjvMvEPVAgOA8rrxvA97uUO5TTMe/view} "mkonfound_ex" diff --git a/R/mkonfound.R b/R/mkonfound.R index 5bed869e..08fa536c 100644 --- a/R/mkonfound.R +++ b/R/mkonfound.R @@ -1,27 +1,32 @@ -#' Perform meta-analyses including sensitivity analysis -#' @description For fitted models, this command carries out sensitivity analysis -#' for a number of models, when their parameters stored in a data.frame. -#' @param d data.frame or tibble with the t-statistics and -#' associated degrees of freedom -#' @param t t-statistic or vector of t-statistics -#' @param df degrees of freedom or vector of degrees of -#' freedom associated with the t-statistics in the t argument -#' @param return_plot whether to return a plot of the percent bias; -#' defaults to FALSE -#' @inheritParams pkonfound -#' @import rlang -#' @import dplyr -#' @return prints the bias and the number of cases that would have to be -#' replaced with cases for which there is no effect to invalidate the inference -#' for each of the cases in the data.frame +#' Meta-Analysis and Sensitivity Analysis for Multiple Studies +#' +#' Performs sensitivity analysis for multiple models, where parameters +#' are stored in a data frame. It calculates the amount of bias required to +#' invalidate or sustain an inference for each case in the data frame. +#' +#' @param d A data frame or tibble containing t-statistics and associated +#' degrees of freedom. +#' @param t Column name or vector of t-statistics. +#' @param df Column name or vector of degrees of freedom associated +#' with t-statistics. +#' @param alpha Significance level for hypothesis testing. +#' @param tails Number of tails for the test (1 or 2). +#' @param return_plot Whether to return a plot of the percent bias +#' (default is `FALSE`). +#' @return Depending on `return_plot`, either returns a data frame with +#' analysis results or a plot. +#' @importFrom rlang enquo +#' @importFrom dplyr select pull case_when +#' @importFrom purrr map2_dfr +#' @importFrom ggplot2 ggplot aes_string geom_histogram scale_fill_manual +#' theme_bw ggtitle facet_grid scale_y_continuous theme ylab xlab +#' @importFrom stats qt #' @examples #' \dontrun{ -#' mkonfound_ex -#' str(d) -#' mkonfound(mkonfound_ex, t, df) +#' data <- data.frame(t = c(2.1, 2.3, 1.9), df = c(30, 40, 35)) +#' mkonfound(data, t, df) #' } #' @export -#' mkonfound <- function(d, t, df, alpha = .05, tails = 2, return_plot = FALSE) { t_enquo <- enquo(t) diff --git a/R/nonlinear_auxiliary.R b/R/nonlinear_auxiliary.R index 9ba87925..f57dde34 100644 --- a/R/nonlinear_auxiliary.R +++ b/R/nonlinear_auxiliary.R @@ -1,3 +1,6 @@ +#' @importFrom stats qt + + # to evaluate whether we are moving cases to invalidate or sustain the inference isinvalidate <- function(thr_t, ob_t) { if ((0 < thr_t && thr_t < ob_t) || (ob_t < thr_t && thr_t < 0)) { @@ -106,6 +109,8 @@ get_abcd_kfnl <- function(a1, b1, c1, d1) { } # get the number of switches + + getswitch <- function(table_bstart, thr_t, switch_trm, n_obs) { ### calculate the est and se after rounding (before any switches) a <- table_bstart[1] @@ -332,6 +337,20 @@ get_pi <- function(odds_ratio, std_err, n_obs, n_trm) { return(x) } +#' Perform a Chi-Square Test +#' +#' @description +#' `chisq_p` calculates the p-value for a chi-square test given a contingency table. +#' +#' @param a Frequency count for row 1, column 1. +#' @param b Frequency count for row 1, column 2. +#' @param c Frequency count for row 2, column 1. +#' @param d Frequency count for row 2, column 2. +#' +#' @return P-value from the chi-square test. +#' @importFrom stats chisq.test + + # get p value for chi-square test chisq_p <- function(a, b, c, d){ table <- matrix(c(a,b,c,d), byrow = TRUE, 2, 2) diff --git a/R/pkonfound.R b/R/pkonfound.R index 19d699d5..2f297847 100644 --- a/R/pkonfound.R +++ b/R/pkonfound.R @@ -92,6 +92,8 @@ #' # use pkonfound to calculate rxcv and rycv when preserving standard error #' pkonfound(est_eff = .5, std_err = .056, n_obs = 6174, eff_thr = .1, #' sdx = 0.22, sdy = 1, R2 = .3, index = "PSE", to_return = "raw_output") +#' @importFrom dplyr pull +#' @importFrom tibble tribble #' @export pkonfound <- function(est_eff, diff --git a/R/test_cop.R b/R/test_cop.R index f07824f7..e1baf749 100644 --- a/R/test_cop.R +++ b/R/test_cop.R @@ -1,5 +1,30 @@ -# COP standards for Coefficient of Proportionality -# test_cop calculates both versions of COP (Oster's approx & exact) +#' Coefficient of Proportionality (COP) Test +#' +#' Conducts the Coefficient of Proportionality (COP) test, calculating both +#' Oster's approximate and exact versions of COP. +#' +#' @param est_eff The estimated effect (unstandardized). +#' @param std_err The standard error of the effect (unstandardized). +#' @param n_obs Number of observations. +#' @param n_covariates Number of covariates in the model. +#' @param sdx Standard deviation of the predictor variable. +#' @param sdy Standard deviation of the outcome variable. +#' @param R2 R-squared of the model, not adjusted. +#' @param eff_thr Threshold for the effect size, unstandardized. +#' @param FR2max_multiplier Multiplier for R2 to get R2max. +#' @param FR2max Maximum R-squared in the final model with an +#' unobserved confounder. +#' @param alpha Significance level for hypothesis testing (default: 0.05). +#' @param tails Number of tails for hypothesis testing (default: 2). +#' @param to_return Type of output to return ('raw_output', 'print', or other). +#' @return A list containing results of the COP test, including delta star, +#' delta exact,percentage bias, and other statistical measures. +#' Can also print summary results. +#' @importFrom stats qt +#' @importFrom ggplot2 ggplot aes geom_point geom_line scale_shape_manual +#' scale_y_continuous +#' @importFrom ggplot2 sec_axis theme element_blank element_line element_text +#' @export test_cop <- function(est_eff, # unstandardized std_err, # unstandardized diff --git a/R/test_sensitivity.R b/R/test_sensitivity.R index 35150591..7fa87a18 100644 --- a/R/test_sensitivity.R +++ b/R/test_sensitivity.R @@ -9,8 +9,11 @@ create_konfound_class <- function(x) { #' of data specified in the to_return argument #' @param object A `konfound` object #' @param ... Additional arguments +#' @method summary konfound #' @export + + summary.konfound <- function(object, ...) { cat("Created", length(object), "forms of output. To access type: \n") cat("\n") diff --git a/R/test_sensitivity_ln.R b/R/test_sensitivity_ln.R index 4d13bd05..e04e3297 100644 --- a/R/test_sensitivity_ln.R +++ b/R/test_sensitivity_ln.R @@ -1,3 +1,29 @@ +#' Test Sensitivity for Non-linear Models +#' +#' This function performs sensitivity analysis for non-linear models. +#' It is used in conjunction with `pkonfound()`, `konfound()`, and `mkonfound()`. +#' +#' @param est_eff Estimated effect size. +#' @param std_err Standard error of the estimated effect. +#' @param n_obs Number of observations in the study. +#' @param n_covariates Number of covariates in the model. +#' @param n_treat Number of cases in the treatment group. +#' @param switch_trm Logical value indicating whether to switch +#' treatment and control groups (default: TRUE). +#' @param replace Specifies the group for base rate calculation +#' ('control' or 'sample'). +#' @param alpha Significance level for hypothesis testing. +#' @param tails Number of tails for hypothesis testing. +#' @param nu Hypothesized value to test the effect against. +#' @param to_return Type of output to return ('raw_output', 'print', or other). +#' @param model_object A model object used in the sensitivity analysis. +#' @param tested_variable Name of the variable being tested in the model. +#' @return Depending on `to_return`,a list of analysis results or printed output +#' @importFrom stats qt +#' @importFrom crayon bold underline +#' @importFrom purrr map +#' @export + # Main function to test sensitivity for non-linear models to # be wrapped with pkonfound(), konfound(), and mkonfound() test_sensitivity_ln <- function(est_eff, diff --git a/R/tkonfound.R b/R/tkonfound.R index 95bac426..2efef398 100644 --- a/R/tkonfound.R +++ b/R/tkonfound.R @@ -1,3 +1,32 @@ + +#' Perform Sensitivity Analysis on 2x2 Tables +#' +#' This function performs a sensitivity analysis on a 2x2 contingency table. +#' It calculates the number of cases that need to be replaced to invalidate +#' or sustain the statistical inference. The function also allows switching +#' between treatment success and failure or control success and failure +#' based on the provided parameters. +#' +#' @param a Number of unsuccessful cases in the control group. +#' @param b Number of successful cases in the control group. +#' @param c Number of unsuccessful cases in the treatment group. +#' @param d Number of successful cases in the treatment group. +#' @param alpha Significance level for the statistical test, default is 0.05. +#' @param switch_trm Boolean indicating whether to switch treatment row cells, +#' default is TRUE. +#' @param test Type of statistical test to use, either "fisher" +#' (default) or "chisq". +#' @param replace Indicates whether to use the entire sample or the control +#' group for base rate calculation, default is "control". +#' @param to_return Type of output to return, either "raw_output" or "print". +#' +#' @importFrom crayon bold underline +#' +#' @return Returns detailed information about the sensitivity analysis, +#' including the number of cases to be replaced (RIR), user-entered +#' table, transfer table, and conclusions. +#' +#' @export tkonfound <- function(a, b, c, d, alpha = 0.05, switch_trm = T, diff --git a/R/tkonfound_fig.R b/R/tkonfound_fig.R index 4f7e5f65..49f76b2c 100644 --- a/R/tkonfound_fig.R +++ b/R/tkonfound_fig.R @@ -1,33 +1,36 @@ -#' Draw figures for change in effect size as a function of switching -#' or replacing outcomes -#' @description This function returns two plots for change in effect size -#' as a function of switching or replacing outcomes, one for all -#' possibilities (switching), another zoomed in the area for positive RIR -#' @param a cell is the number of cases in the control group showing -#' unsuccessful results -#' @param b cell is the number of cases in the control group showing -#' successful results -#' @param c cell is the number of cases in the treatment group showing -#' unsuccessful results -#' @param d cell is the number of cases in the treatment group showing -#' successful results -#' @param thr_p the p-value threshold used to evaluate statistical -#' significance, with the default of 0.05 -#' @param switch_trm whether switching the two cells in the treatment row -#' or the two cells in the control row, with the default of the treatment row -#' @param test whether using Fisher's Exact Test (default) or chi-square test -#' @param replace whether using entire sample or the control group to calculate -#' the base rate, with the default of the control group -#' @importFrom stats chisq.test -#' @return prints 2 figures for how number of hypothetical cases switched -#' changes the effect size +#' Draw Figures for Change in Effect Size in 2x2 Tables +#' +#' This function generates plots illustrating how the change in effect size +#' is influenced by switching or replacing outcomes in a 2x2 table. It produces +#' two plots: one showing all possibilities (switching) and another zoomed in +#' the area for positive RIR (Relative Impact Ratio). +#' +#' @param a Number of cases in the control group with unsuccessful outcomes. +#' @param b Number of cases in the control group with successful outcomes. +#' @param c Number of cases in the treatment group with unsuccessful outcomes. +#' @param d Number of cases in the treatment group with successful outcomes. +#' @param thr_p P-value threshold for statistical significance, default is 0.05. +#' @param switch_trm Whether to switch the two cells in the treatment or +#' control row, default is TRUE (treatment row). +#' @param test Type of statistical test used, either "Fisher's Exact Test" +#' (default) or "Chi-square test". +#' @param replace Indicates whether to use the entire sample or just the control +#' group for calculating the base rate, default is "control". +#' +#' @importFrom ggplot2 ggplot aes_string geom_line geom_point scale_fill_manual +#' scale_shape_manual geom_hline scale_y_continuous +#' scale_x_continuous theme element_blank element_line +#' @importFrom ggrepel geom_label_repel +#' +#' @return Returns two plots showing the effect of hypothetical case switches +#' on the effect size in a 2x2 table. +#' #' @examples #' # using tkonfound_fig for a study where 2 by 2 table is (35, 17, 17, 38) #' tkonfound_fig(35, 17, 17, 38) #' tkonfound_fig(35, 17, 17, 38, thr_p = 0.01) #' tkonfound_fig(35, 17, 17, 38, thr_p = 0.01, switch_trm = FALSE) -#' tkonfound_fig(35, 17, 17, 38, thr_p = 0.01, switch_trm = TRUE, -#' test = "chisq") +#' tkonfound_fig(35, 17, 17, 38, thr_p = 0.01, switch_trm = TRUE, test = "chisq") #' tkonfound_fig(35, 17, 17, 38, thr_p = 0.01, switch_trm = TRUE, #' test = "chisq", replace = "entire") #' diff --git a/R/zzz.R b/R/zzz.R index 01d8f048..2de70e6f 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1,8 +1,18 @@ +#' Package Initialization Functions and Utilities +#' +#' These functions are used for initializing the package environment and +#' providing utility functions for the package. +#' +#' @name zzz +#' @aliases zzz +#' @rdname zzz +#' @importFrom utils globalVariables browseURL +#' ## quiets concerns (notes) of R CMD check re: the vars that are evaluated ## using non-standard evaluation if (getRversion() >= "2.15.1") utils::globalVariables(c("inference", "key", "replace_null_cases", - "percent_bias", "val")) + "percent_bias", "val","ModelLabel", "coef_X")) .onAttach <- function(libname, pkgname) { packageStartupMessage("Sensitivity analysis as described in Frank, diff --git a/man/cal_delta_star.Rd b/man/cal_delta_star.Rd new file mode 100644 index 00000000..337ed0a1 --- /dev/null +++ b/man/cal_delta_star.Rd @@ -0,0 +1,46 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cop_pse_auxiliary.R +\name{cal_delta_star} +\alias{cal_delta_star} +\title{Calculate delta star for sensitivity analysis} +\usage{ +cal_delta_star( + FR2max, + R2, + R2_uncond, + est_eff, + eff_thr, + var_x, + var_y, + est_uncond, + rxz, + n_obs +) +} +\arguments{ +\item{FR2max}{maximum R2} + +\item{R2}{current R2} + +\item{R2_uncond}{unconditional R2} + +\item{est_eff}{estimated effect} + +\item{eff_thr}{effect threshold} + +\item{var_x}{variance of X} + +\item{var_y}{variance of Y} + +\item{est_uncond}{unconditional estimate} + +\item{rxz}{correlation coefficient between X and Z} + +\item{n_obs}{number of observations} +} +\value{ +delta star value +} +\description{ +Calculate delta star for sensitivity analysis +} diff --git a/man/cal_rxy.Rd b/man/cal_rxy.Rd new file mode 100644 index 00000000..ddc5b756 --- /dev/null +++ b/man/cal_rxy.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cop_pse_auxiliary.R +\name{cal_rxy} +\alias{cal_rxy} +\title{Calculate rxy based on ryxGz, rxz, and ryz} +\usage{ +cal_rxy(ryxGz, rxz, ryz) +} +\arguments{ +\item{ryxGz}{correlation coefficient between Y and X given Z} + +\item{rxz}{correlation coefficient between X and Z} + +\item{ryz}{correlation coefficient between Y and Z} +} +\value{ +rxy value +} +\description{ +Calculate rxy based on ryxGz, rxz, and ryz +} diff --git a/man/cal_rxz.Rd b/man/cal_rxz.Rd new file mode 100644 index 00000000..9cfd0bbe --- /dev/null +++ b/man/cal_rxz.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cop_pse_auxiliary.R +\name{cal_rxz} +\alias{cal_rxz} +\title{Calculate R2xz based on variances and standard error} +\usage{ +cal_rxz(var_x, var_y, R2, df, std_err) +} +\arguments{ +\item{var_x}{variance of X} + +\item{var_y}{variance of Y} + +\item{R2}{coefficient of determination} + +\item{df}{degrees of freedom} + +\item{std_err}{standard error} +} +\value{ +R2xz value +} +\description{ +Calculate R2xz based on variances and standard error +} diff --git a/man/cal_ryz.Rd b/man/cal_ryz.Rd new file mode 100644 index 00000000..83988264 --- /dev/null +++ b/man/cal_ryz.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cop_pse_auxiliary.R +\name{cal_ryz} +\alias{cal_ryz} +\title{Calculate R2yz based on ryxGz and R2} +\usage{ +cal_ryz(ryxGz, R2) +} +\arguments{ +\item{ryxGz}{correlation coefficient between Y and X given Z} + +\item{R2}{coefficient of determination} +} +\value{ +R2yz value +} +\description{ +Calculate R2yz based on ryxGz and R2 +} diff --git a/man/chisq_p.Rd b/man/chisq_p.Rd new file mode 100644 index 00000000..54a4b8c5 --- /dev/null +++ b/man/chisq_p.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/nonlinear_auxiliary.R +\name{chisq_p} +\alias{chisq_p} +\title{Perform a Chi-Square Test} +\usage{ +chisq_p(a, b, c, d) +} +\arguments{ +\item{a}{Frequency count for row 1, column 1.} + +\item{b}{Frequency count for row 1, column 2.} + +\item{c}{Frequency count for row 2, column 1.} + +\item{d}{Frequency count for row 2, column 2.} +} +\value{ +P-value from the chi-square test. +} +\description{ +`chisq_p` calculates the p-value for a chi-square test given a contingency table. +} diff --git a/man/concord1.Rd b/man/concord1.Rd index 678ecb58..cf3179b4 100644 --- a/man/concord1.Rd +++ b/man/concord1.Rd @@ -11,5 +11,6 @@ A data.frame with 496 rows and 10 variables. This data is from Hamilton (1983) } \references{ -Hamilton, Lawrence C. 1983. Saving water: A causal model of household conservation. Sociological Perspectives 26(4):355-374. +Hamilton, Lawrence C. 1983. Saving water: +A causal model of household conservation. Sociological Perspectives 26(4):355-374. } diff --git a/man/get_kr_df.Rd b/man/get_kr_df.Rd new file mode 100644 index 00000000..6241cc14 --- /dev/null +++ b/man/get_kr_df.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/konfound-lmer.R +\name{get_kr_df} +\alias{get_kr_df} +\title{Extract Degrees of Freedom for Fixed Effects in a Linear Mixed-Effects Model} +\usage{ +get_kr_df(model_object) +} +\arguments{ +\item{model_object}{The mixed-effects model object produced by lme4::lmer.} +} +\value{ +A vector containing degrees of freedom for the fixed effects in the model. +} +\description{ +Extract Degrees of Freedom for Fixed Effects in a Linear Mixed-Effects Model +} diff --git a/man/konfound.Rd b/man/konfound.Rd index bd55f341..6e1edb7e 100644 --- a/man/konfound.Rd +++ b/man/konfound.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/konfound.R \name{konfound} \alias{konfound} -\title{Perform sensitivity analysis on fitted models} +\title{Konfound Analysis for Various Model Types} \usage{ konfound( model_object, @@ -19,33 +19,41 @@ konfound( ) } \arguments{ -\item{model_object}{output from a model (currently works for: lm)} +\item{model_object}{A model object produced by `lm`, `glm`, or `lme4::lmer`.} -\item{tested_variable}{Variable associated with the unstandardized beta coefficient to be tested} +\item{tested_variable}{Variable associated with the coefficient to be tested.} -\item{alpha}{probability of rejecting the null hypothesis (defaults to 0.05)} +\item{alpha}{Significance level for hypothesis testing.} -\item{tails}{integer whether hypothesis testing is one-tailed (1) or two-tailed (2; defaults to 2)} +\item{tails}{Number of tails for the test (1 or 2).} -\item{index}{whether output is RIR or IT (impact threshold); defaults to "RIR"} +\item{index}{Type of sensitivity analysis ('RIR' by default).} -\item{to_return}{whether to return a data.frame (by specifying this argument to equal "raw_output" for use in other analyses) or a plot ("plot"); default is to print ("print") the output to the console; can specify a vector of output to return} +\item{to_return}{Type of output to return ('print', 'raw_output', 'table').} -\item{test_all}{whether to carry out the sensitivity test for all of the coefficients (defaults to FALSE)} +\item{test_all}{Boolean; if `TRUE`, tests all coefficients.} -\item{two_by_two}{whether or not the tested variable is a dichotomous variable in a GLM; if so, the 2X2 table approach is used; only works for single variables at present (so test_all = TRUE will return an error)} +\item{two_by_two}{Boolean; if `TRUE`, uses a 2x2 table approach +for `glm` dichotomous variables.} -\item{n_treat}{the number of cases associated with the treatment condition; applicable only when model_type = "logistic"} +\item{n_treat}{Number of treatment cases +(used only if `two_by_two` is `TRUE`).} -\item{switch_trm}{whether to switch the treatment and control cases; defaults to FALSE; applicable only when model_type = "logistic"} +\item{switch_trm}{Boolean; switch treatment and control in the analysis.} -\item{replace}{whether using entire sample or the control group to calculate the base rate; default is the entire sample} +\item{replace}{Replacement method for treatment cases ('control' by default).} } \value{ -prints the bias and the number of cases that would have to be replaced with cases for which there is no effect to invalidate the inference +Depending on `to_return`, prints the result, returns a raw output, +or a summary table. } \description{ -For fitted models, this command calculates (1) how much bias there must be in an estimate to invalidate/sustain an inference; (2) the impact of an omitted variable necessary to invalidate/sustain an inference for a regression coefficient. Currently works for: models created with lm() (linear models). +Performs sensitivity analysis on fitted models including +linear models (`lm`), generalized linear models (`glm`), +and linear mixed-effects models (`lmerMod`). +It calculates the amount of bias required to invalidate or +sustain an inference,and the impact of an omitted variable +necessary to affect the inference. } \examples{ # using lm() for linear models diff --git a/man/konfound_glm.Rd b/man/konfound_glm.Rd new file mode 100644 index 00000000..cd11b0af --- /dev/null +++ b/man/konfound_glm.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/konfound-glm.R +\name{konfound_glm} +\alias{konfound_glm} +\title{Konfound Analysis for Generalized Linear Models} +\usage{ +konfound_glm( + model_object, + tested_variable_string, + test_all, + alpha, + tails, + index = "RIR", + to_return +) +} +\arguments{ +\item{model_object}{The model object produced by glm.} + +\item{tested_variable_string}{The name of the variable being tested.} + +\item{test_all}{Boolean indicating whether to test all variables or not.} + +\item{alpha}{Significance level for hypothesis testing.} + +\item{tails}{Number of tails for the test (1 or 2).} + +\item{index}{Type of sensitivity analysis ('RIR' by default).} + +\item{to_return}{The type of output to return.} +} +\value{ +The results of the konfound analysis for the specified variable(s). +} +\description{ +This function performs konfound analysis on a generalized linear model +object. It uses 'broom' to tidy model outputs and calculates the sensitivity +of inferences. It supports analysis for a single variable + or multiple variables. +} diff --git a/man/konfound_glm_dichotomous.Rd b/man/konfound_glm_dichotomous.Rd new file mode 100644 index 00000000..a3f78169 --- /dev/null +++ b/man/konfound_glm_dichotomous.Rd @@ -0,0 +1,45 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/konfound-glm-dichotomous.R +\name{konfound_glm_dichotomous} +\alias{konfound_glm_dichotomous} +\title{Konfound Analysis for Generalized Linear Models with Dichotomous Outcomes} +\usage{ +konfound_glm_dichotomous( + model_object, + tested_variable_string, + test_all, + alpha, + tails, + to_return, + n_treat, + switch_trm, + replace +) +} +\arguments{ +\item{model_object}{The model object produced by glm.} + +\item{tested_variable_string}{The name of the variable being tested.} + +\item{test_all}{Boolean indicating whether to test all variables or not.} + +\item{alpha}{Significance level for hypothesis testing.} + +\item{tails}{Number of tails for the test (1 or 2).} + +\item{to_return}{The type of output to return.} + +\item{n_treat}{Number of treatment cases.} + +\item{switch_trm}{Term to switch for sensitivity analysis.} + +\item{replace}{Boolean indicating whether to replace cases or not.} +} +\value{ +The results of the konfound analysis. +} +\description{ +This function performs konfound analysis on a generalized linear model +object with a dichotomous outcome. It uses 'broom' to tidy model outputs +and calculates the sensitivity of inferences. +} diff --git a/man/konfound_lm.Rd b/man/konfound_lm.Rd new file mode 100644 index 00000000..2959dd83 --- /dev/null +++ b/man/konfound_lm.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/konfound-lm.R +\name{konfound_lm} +\alias{konfound_lm} +\title{Konfound Analysis for Linear Models} +\usage{ +konfound_lm( + model_object, + tested_variable_string, + test_all, + alpha, + tails, + index, + to_return +) +} +\arguments{ +\item{model_object}{The linear model object produced by lm.} + +\item{tested_variable_string}{The name of the variable being tested.} + +\item{test_all}{Boolean indicating whether to test all variables or not.} + +\item{alpha}{Significance level for hypothesis testing.} + +\item{tails}{Number of tails for the test (1 or 2).} + +\item{index}{Type of sensitivity analysis ('RIR' by default).} + +\item{to_return}{The type of output to return.} +} +\value{ +The results of the konfound analysis for the specified variable(s). +} +\description{ +This function performs konfound analysis on a linear model object +produced by lm. +It calculates the sensitivity of inferences for coefficients in the model. +It supports analysis for a single variable or multiple variables. +} diff --git a/man/konfound_lmer.Rd b/man/konfound_lmer.Rd new file mode 100644 index 00000000..7a9bacc8 --- /dev/null +++ b/man/konfound_lmer.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/konfound-lmer.R +\name{konfound_lmer} +\alias{konfound_lmer} +\title{Konfound Analysis for Linear Mixed-Effects Models} +\usage{ +konfound_lmer( + model_object, + tested_variable_string, + test_all, + alpha, + tails, + index, + to_return +) +} +\arguments{ +\item{model_object}{The mixed-effects model object produced by lme4::lmer.} + +\item{tested_variable_string}{The name of the fixed effect being tested.} + +\item{test_all}{Boolean indicating whether to test all fixed effects or not.} + +\item{alpha}{Significance level for hypothesis testing.} + +\item{tails}{Number of tails for the test (1 or 2).} + +\item{index}{Type of sensitivity analysis ('RIR' by default).} + +\item{to_return}{The type of output to return.} +} +\value{ +The results of the konfound analysis for the specified fixed effect(s). +} +\description{ +This function performs konfound analysis on a linear mixed-effects model +object produced by lme4::lmer. It calculates the sensitivity of inferences +for fixed effects in the model. It supports analysis for a single variable or multiple variables. +} diff --git a/man/mkonfound.Rd b/man/mkonfound.Rd index 57aa8e72..3c02b240 100644 --- a/man/mkonfound.Rd +++ b/man/mkonfound.Rd @@ -2,33 +2,38 @@ % Please edit documentation in R/mkonfound.R \name{mkonfound} \alias{mkonfound} -\title{Perform meta-analyses including sensitivity analysis} +\title{Meta-Analysis and Sensitivity Analysis for Multiple Studies} \usage{ mkonfound(d, t, df, alpha = 0.05, tails = 2, return_plot = FALSE) } \arguments{ -\item{d}{data.frame or tibble with the t-statistics and associated degrees of freedom} +\item{d}{A data frame or tibble containing t-statistics and associated +degrees of freedom.} -\item{t}{t-statistic or vector of t-statistics} +\item{t}{Column name or vector of t-statistics.} -\item{df}{degrees of freedom or vector of degrees of freedom associated with the t-statistics in the t argument} +\item{df}{Column name or vector of degrees of freedom associated +with t-statistics.} -\item{alpha}{probability of rejecting the null hypothesis (defaults to 0.05)} +\item{alpha}{Significance level for hypothesis testing.} -\item{tails}{integer whether hypothesis testing is one-tailed (1) or two-tailed (2; defaults to 2)} +\item{tails}{Number of tails for the test (1 or 2).} -\item{return_plot}{whether to return a plot of the percent bias; defaults to FALSE} +\item{return_plot}{Whether to return a plot of the percent bias +(default is `FALSE`).} } \value{ -prints the bias and the number of cases that would have to be replaced with cases for which there is no effect to invalidate the inference for each of the cases in the data.frame +Depending on `return_plot`, either returns a data frame with +analysis results or a plot. } \description{ -For fitted models, this command carries out sensitivity analysis for a number of models, when their parameters stored in a data.frame. +Performs sensitivity analysis for multiple models, where parameters +are stored in a data frame. It calculates the amount of bias required to +invalidate or sustain an inference for each case in the data frame. } \examples{ \dontrun{ -mkonfound_ex -str(d) -mkonfound(mkonfound_ex, t, df) +data <- data.frame(t = c(2.1, 2.3, 1.9), df = c(30, 40, 35)) +mkonfound(data, t, df) } } diff --git a/man/mkonfound_ex.Rd b/man/mkonfound_ex.Rd index 54387103..c2d72455 100644 --- a/man/mkonfound_ex.Rd +++ b/man/mkonfound_ex.Rd @@ -19,7 +19,9 @@ A data frame with 30 rows and 2 variables: mkonfound_ex } \description{ -A dataset containing t and df values from example studies from Educational Evaluation -and Policy Analysis (as detailed in Frank et al., 2013): https://drive.google.com/file/d/1aGhxGjvMvEPVAgOA8rrxvA97uUO5TTMe/view +A dataset containing t and df values from example studies +from Educational Evaluation +and Policy Analysis (as detailed in Frank et al., 2013): + https://drive.google.com/file/d/1aGhxGjvMvEPVAgOA8rrxvA97uUO5TTMe/view } \keyword{datasets} diff --git a/man/output_df.Rd b/man/output_df.Rd new file mode 100644 index 00000000..54ea2527 --- /dev/null +++ b/man/output_df.Rd @@ -0,0 +1,49 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/helper_output_dataframe.R +\name{output_df} +\alias{output_df} +\title{Output data frame based on model estimates and thresholds} +\usage{ +output_df( + est_eff, + beta_threshhold, + unstd_beta, + bias = NULL, + sustain = NULL, + recase, + obs_r, + critical_r, + r_con, + itcv, + non_linear +) +} +\arguments{ +\item{est_eff}{estimated effect} + +\item{beta_threshhold}{threshold for beta} + +\item{unstd_beta}{unstandardized beta value} + +\item{bias}{bias to change inference} + +\item{sustain}{sustain to change inference} + +\item{recase}{number of cases to replace null} + +\item{obs_r}{observed correlation} + +\item{critical_r}{critical correlation} + +\item{r_con}{correlation for omitted variable} + +\item{itcv}{inferential threshold for confounding variable} + +\item{non_linear}{flag for non-linear models} +} +\value{ +data frame with model information +} +\description{ +Output data frame based on model estimates and thresholds +} diff --git a/man/output_print.Rd b/man/output_print.Rd new file mode 100644 index 00000000..c69cd741 --- /dev/null +++ b/man/output_print.Rd @@ -0,0 +1,57 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/helper_output_print.R +\name{output_print} +\alias{output_print} +\title{Output printed text with formatting} +\usage{ +output_print( + eff_diff, + beta_threshhold, + bias = NULL, + sustain = NULL, + nu, + recase, + obs_r, + critical_r, + r_con, + itcv, + alpha, + index +) +} +\arguments{ +\item{eff_diff}{The difference in the effect size being evaluated.} + +\item{beta_threshhold}{The threshold value of beta, used for +statistical significance determination.} + +\item{bias}{The percentage of the estimate that could be due to bias (optional).} + +\item{sustain}{The percentage of the estimate necessary to sustain an inference (optional).} + +\item{nu}{The hypothesized effect size used in replacement analysis.} + +\item{recase}{The number of cases that need to be replaced to change the inference.} + +\item{obs_r}{The observed correlation coefficient in the data.} + +\item{critical_r}{The critical correlation coefficient for statistical significance.} + +\item{r_con}{The correlation coefficient of an omitted variable with both the outcome and the predictor.} + +\item{itcv}{The impact threshold for a confounding variable.} + +\item{alpha}{The level of statistical significance.} + +\item{index}{A character string indicating the index for which the output is generated ('RIR' or 'IT').} +} +\description{ +This function outputs printed text for various indices such as RIR +(Robustness of Inference to Replacement) +and IT (Impact Threshold for a Confounding Variable) with specific formatting + like bold, underline, and italic +using functions from the crayon package. It handles different scenarios based + on the effect difference, +beta threshold, and other parameters, providing formatted +output for each case. +} diff --git a/man/output_table.Rd b/man/output_table.Rd new file mode 100644 index 00000000..886adfb0 --- /dev/null +++ b/man/output_table.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/helper_output_table.R +\name{output_table} +\alias{output_table} +\title{Output a Tidy Table from a Model Object} +\usage{ +output_table(model_object, tested_variable) +} +\arguments{ +\item{model_object}{A model object from which to generate the output.} + +\item{tested_variable}{The variable being tested in the model.} +} +\value{ +A tidy data frame containing model outputs, ITCV, +and impacts for covariates. +} +\description{ +This function takes a model object and the tested variable, +tidies the model output using `broom::tidy`, +calculates the impact threshold for confounding variables (ITCV) and impact +for each covariate,and returns a rounded, tidy table of model outputs. +} diff --git a/man/pkonfound.Rd b/man/pkonfound.Rd index 37a41f8d..9936a6fd 100644 --- a/man/pkonfound.Rd +++ b/man/pkonfound.Rd @@ -33,9 +33,11 @@ pkonfound( ) } \arguments{ -\item{est_eff}{the estimated effect (such as an unstandardized beta coefficient or a group mean difference)} +\item{est_eff}{the estimated effect (such as an unstandardized beta +coefficient or a group mean difference)} -\item{std_err}{the standard error of the estimate of the unstandardized regression coefficient} +\item{std_err}{the standard error of the estimate of the unstandardized +regression coefficient} \item{n_obs}{the number of observations in the sample} @@ -43,31 +45,45 @@ pkonfound( \item{alpha}{probability of rejecting the null hypothesis (defaults to 0.05)} -\item{tails}{integer whether hypothesis testing is one-tailed (1) or two-tailed (2; defaults to 2)} +\item{tails}{integer whether hypothesis testing is one-tailed (1) or +two-tailed (2; defaults to 2)} -\item{index}{whether output is RIR or IT (impact threshold); defaults to "RIR"} +\item{index}{whether output is RIR or IT (impact threshold); +defaults to "RIR"} -\item{nu}{what hypothesis to be tested; defaults to testing whether est_eff is significantly different from 0} +\item{nu}{what hypothesis to be tested; defaults to testing whether +est_eff is significantly different from 0} -\item{n_treat}{the number of cases associated with the treatment condition; applicable only when model_type = "logistic"} +\item{n_treat}{the number of cases associated with the treatment condition; +applicable only when model_type = "logistic"} -\item{switch_trm}{whether to switch the treatment and control cases; defaults to FALSE; applicable only when model_type = "logistic"} +\item{switch_trm}{whether to switch the treatment and control cases; defaults +to FALSE; applicable only when model_type = "logistic"} -\item{model_type}{the type of model being estimated; defaults to "ols" for a linear regression model; the other option is "logistic"} +\item{model_type}{the type of model being estimated; defaults to "ols" for +a linear regression model; the other option is "logistic"} -\item{a}{cell is the number of cases in the control group showing unsuccessful results} +\item{a}{cell is the number of cases in the control group showing +unsuccessful results} -\item{b}{cell is the number of cases in the control group showing successful results} +\item{b}{cell is the number of cases in the control group showing successful +results} -\item{c}{cell is the number of cases in the treatment group showing unsuccessful results} +\item{c}{cell is the number of cases in the treatment group showing +unsuccessful results} -\item{d}{cell is the number of cases in the treatment group showing successful results} +\item{d}{cell is the number of cases in the treatment group showing +successful results} -\item{two_by_two_table}{table that is a matrix or can be coerced to one (data.frame, tibble, tribble) from which the a, b, c, and d arguments can be extracted} +\item{two_by_two_table}{table that is a matrix or can be coerced to one +(data.frame, tibble, tribble) from which the a, b, c, and d arguments can +be extracted} -\item{test}{whether using Fisher's Exact Test or A chi-square test; defaults to Fisher's Exact Test} +\item{test}{whether using Fisher's Exact Test or A chi-square test; defaults +to Fisher's Exact Test} -\item{replace}{whether using entire sample or the control group to calculate the base rate; default is the entire sample} +\item{replace}{whether using entire sample or the control group to calculate +the base rate; default is the control group} \item{sdx}{the standard deviation of X} @@ -77,17 +93,26 @@ pkonfound( \item{eff_thr}{unstandardized coefficient threshold to change an inference} -\item{FR2max}{the largest R2, or R2max, in the final model with unobserved confounder} +\item{FR2max}{the largest R2, or R2max, in the final model with unobserved +confounder} -\item{FR2max_multiplier}{the multiplier of R2 to get R2max, default is set to 1.3} +\item{FR2max_multiplier}{the multiplier of R2 to get R2max, +default is set to 1.3} -\item{to_return}{whether to return a data.frame (by specifying this argument to equal "raw_output" for use in other analyses) or a plot ("plot"); default is to print ("print") the output to the console; can specify a vector of output to return} +\item{to_return}{whether to return a data.frame +(by specifying this argument to equal "raw_output" for use in other analyses) + or a plot ("plot"); default is to print ("print") the output to the console; + can specify a vector of output to return} } \value{ -prints the bias and the number of cases that would have to be replaced with cases for which there is no effect to invalidate the inference +prints the bias and the number of cases that would have to be +replaced with cases for which there is no effect to invalidate the inference } \description{ -For published studies, this command calculates (1) how much bias there must be in an estimate to invalidate/sustain an inference; (2) the impact of an omitted variable necessary to invalidate/sustain an inference for a regression coefficient. +For published studies, this command calculates +(1) how much bias there must be in an estimate to invalidate/sustain +an inference; (2) the impact of an omitted variable necessary to +invalidate/sustain an inference for a regression coefficient. } \examples{ # using pkonfound for linear models @@ -114,5 +139,9 @@ pkonfound(a = 35, b = 17, c = 17, d = 38, alpha = 0.01, switch_trm = FALSE) pkonfound(a = 35, b = 17, c = 17, d = 38, test = "chisq") # use pkonfound to calculate delta* and delta_exact - +pkonfound(est_eff = .4, std_err = .1, n_obs = 290, sdx = 2, sdy = 6, R2 = .7, + eff_thr = 0, FR2max = .8, index = "COP", to_return = "raw_output") +# use pkonfound to calculate rxcv and rycv when preserving standard error +pkonfound(est_eff = .5, std_err = .056, n_obs = 6174, eff_thr = .1, +sdx = 0.22, sdy = 1, R2 = .3, index = "PSE", to_return = "raw_output") } diff --git a/man/plot_correlation.Rd b/man/plot_correlation.Rd new file mode 100644 index 00000000..960cd99d --- /dev/null +++ b/man/plot_correlation.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/helper_plot_correlation.R +\name{plot_correlation} +\alias{plot_correlation} +\title{Plot Correlation Diagram} +\usage{ +plot_correlation(r_con, obs_r, critical_r) +} +\arguments{ +\item{r_con}{Correlation coefficient related to the confounding variable.} + +\item{obs_r}{Observed correlation coefficient.} + +\item{critical_r}{Critical correlation coefficient for decision-making.} +} +\value{ +A ggplot object representing the correlation diagram. +} +\description{ +This function creates a plot to illustrate the correlation between different +variables,specifically focusing on the confounding variable, predictor of +interest, and outcome.It uses ggplot2 for graphical representation. +} diff --git a/man/plot_threshold.Rd b/man/plot_threshold.Rd new file mode 100644 index 00000000..6407c71e --- /dev/null +++ b/man/plot_threshold.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/helper_plot_threshold.R +\name{plot_threshold} +\alias{plot_threshold} +\title{Plot Effect Threshold Diagram} +\usage{ +plot_threshold(beta_threshold, est_eff) +} +\arguments{ +\item{beta_threshold}{The threshold value for the effect.} + +\item{est_eff}{The estimated effect size.} +} +\value{ +A ggplot object representing the effect threshold diagram. +} +\description{ +This function creates a plot to illustrate the threshold of an effect +estimate in relation to a specified beta threshold. It uses ggplot2 +for graphical representation. +} diff --git a/man/summary.konfound.Rd b/man/summary.konfound.Rd index b1a4b7d2..0e66b35b 100644 --- a/man/summary.konfound.Rd +++ b/man/summary.konfound.Rd @@ -15,5 +15,6 @@ Concise summary of konfound output } \details{ -Prints a concise summary of konfound output with multiple types of data specified in the to_return argument +Prints a concise summary of konfound output with multiple types + of data specified in the to_return argument } diff --git a/man/test_cop.Rd b/man/test_cop.Rd new file mode 100644 index 00000000..314fb1f7 --- /dev/null +++ b/man/test_cop.Rd @@ -0,0 +1,59 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/test_cop.R +\name{test_cop} +\alias{test_cop} +\title{Coefficient of Proportionality (COP) Test} +\usage{ +test_cop( + est_eff, + std_err, + n_obs, + n_covariates, + sdx, + sdy, + R2, + eff_thr = 0, + FR2max_multiplier = 1.3, + FR2max = 0, + alpha = 0.05, + tails = 2, + to_return = to_return +) +} +\arguments{ +\item{est_eff}{The estimated effect (unstandardized).} + +\item{std_err}{The standard error of the effect (unstandardized).} + +\item{n_obs}{Number of observations.} + +\item{n_covariates}{Number of covariates in the model.} + +\item{sdx}{Standard deviation of the predictor variable.} + +\item{sdy}{Standard deviation of the outcome variable.} + +\item{R2}{R-squared of the model, not adjusted.} + +\item{eff_thr}{Threshold for the effect size, unstandardized.} + +\item{FR2max_multiplier}{Multiplier for R2 to get R2max.} + +\item{FR2max}{Maximum R-squared in the final model with an +unobserved confounder.} + +\item{alpha}{Significance level for hypothesis testing (default: 0.05).} + +\item{tails}{Number of tails for hypothesis testing (default: 2).} + +\item{to_return}{Type of output to return ('raw_output', 'print', or other).} +} +\value{ +A list containing results of the COP test, including delta star, + delta exact,percentage bias, and other statistical measures. + Can also print summary results. +} +\description{ +Conducts the Coefficient of Proportionality (COP) test, calculating both +Oster's approximate and exact versions of COP. +} diff --git a/man/test_sensitivity_ln.Rd b/man/test_sensitivity_ln.Rd new file mode 100644 index 00000000..c34865c2 --- /dev/null +++ b/man/test_sensitivity_ln.Rd @@ -0,0 +1,58 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/test_sensitivity_ln.R +\name{test_sensitivity_ln} +\alias{test_sensitivity_ln} +\title{Test Sensitivity for Non-linear Models} +\usage{ +test_sensitivity_ln( + est_eff, + std_err, + n_obs, + n_covariates, + n_treat, + switch_trm = T, + replace = "control", + alpha, + tails, + nu, + to_return, + model_object, + tested_variable +) +} +\arguments{ +\item{est_eff}{Estimated effect size.} + +\item{std_err}{Standard error of the estimated effect.} + +\item{n_obs}{Number of observations in the study.} + +\item{n_covariates}{Number of covariates in the model.} + +\item{n_treat}{Number of cases in the treatment group.} + +\item{switch_trm}{Logical value indicating whether to switch +treatment and control groups (default: TRUE).} + +\item{replace}{Specifies the group for base rate calculation +('control' or 'sample').} + +\item{alpha}{Significance level for hypothesis testing.} + +\item{tails}{Number of tails for hypothesis testing.} + +\item{nu}{Hypothesized value to test the effect against.} + +\item{to_return}{Type of output to return ('raw_output', 'print', or other).} + +\item{model_object}{A model object used in the sensitivity analysis.} + +\item{tested_variable}{Name of the variable being tested in the model.} +} +\value{ +Depending on `to_return`,a list of analysis results or printed output +} +\description{ +This function performs sensitivity analysis for non-linear models. +It is used in conjunction with `pkonfound()`, `konfound()`, and `mkonfound()`. +} diff --git a/man/tkonfound.Rd b/man/tkonfound.Rd new file mode 100644 index 00000000..b3297ce2 --- /dev/null +++ b/man/tkonfound.Rd @@ -0,0 +1,52 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tkonfound.R +\name{tkonfound} +\alias{tkonfound} +\title{Perform Sensitivity Analysis on 2x2 Tables} +\usage{ +tkonfound( + a, + b, + c, + d, + alpha = 0.05, + switch_trm = T, + test = "fisher", + replace = "control", + to_return = to_return +) +} +\arguments{ +\item{a}{Number of unsuccessful cases in the control group.} + +\item{b}{Number of successful cases in the control group.} + +\item{c}{Number of unsuccessful cases in the treatment group.} + +\item{d}{Number of successful cases in the treatment group.} + +\item{alpha}{Significance level for the statistical test, default is 0.05.} + +\item{switch_trm}{Boolean indicating whether to switch treatment row cells, +default is TRUE.} + +\item{test}{Type of statistical test to use, either "fisher" +(default) or "chisq".} + +\item{replace}{Indicates whether to use the entire sample or the control +group for base rate calculation, default is "control".} + +\item{to_return}{Type of output to return, either "raw_output" or "print".} +} +\value{ +Returns detailed information about the sensitivity analysis, + including the number of cases to be replaced (RIR), user-entered + table, transfer table, and conclusions. +} +\description{ +This function performs a sensitivity analysis on a 2x2 contingency table. +It calculates the number of cases that need to be replaced to invalidate +or sustain the statistical inference. The function also allows switching +between treatment success and failure or control success and failure +based on the provided parameters. +} diff --git a/man/tkonfound_fig.Rd b/man/tkonfound_fig.Rd index 4a742aac..6c2c8783 100644 --- a/man/tkonfound_fig.Rd +++ b/man/tkonfound_fig.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/tkonfound_fig.R \name{tkonfound_fig} \alias{tkonfound_fig} -\title{Draw figures for change in effect size as a function of switching or replacing outcomes} +\title{Draw Figures for Change in Effect Size in 2x2 Tables} \usage{ tkonfound_fig( a, @@ -16,27 +16,34 @@ tkonfound_fig( ) } \arguments{ -\item{a}{cell is the number of cases in the control group showing unsuccessful results} +\item{a}{Number of cases in the control group with unsuccessful outcomes.} -\item{b}{cell is the number of cases in the control group showing successful results} +\item{b}{Number of cases in the control group with successful outcomes.} -\item{c}{cell is the number of cases in the treatment group showing unsuccessful results} +\item{c}{Number of cases in the treatment group with unsuccessful outcomes.} -\item{d}{cell is the number of cases in the treatment group showing successful results} +\item{d}{Number of cases in the treatment group with successful outcomes.} -\item{thr_p}{the p-value threshold used to evaluate statistical significance, with the default of 0.05} +\item{thr_p}{P-value threshold for statistical significance, default is 0.05.} -\item{switch_trm}{whether switching the two cells in the treatment row or the two cells in the control row, with the default of the treatment row} +\item{switch_trm}{Whether to switch the two cells in the treatment or +control row, default is TRUE (treatment row).} -\item{test}{whether using Fisher's Exact Test (default) or chi-square test} +\item{test}{Type of statistical test used, either "Fisher's Exact Test" +(default) or "Chi-square test".} -\item{replace}{whether using entire sample or the control group to calculate the base rate, with the default of the entire sample} +\item{replace}{Indicates whether to use the entire sample or just the control +group for calculating the base rate, default is "control".} } \value{ -prints 2 figures for how number of hypothetical cases switched changes the effect size +Returns two plots showing the effect of hypothetical case switches + on the effect size in a 2x2 table. } \description{ -This function returns two plots for change in effect size as a function of switching or replacing outcomes, one for all possibilities (switching), another zoomed in the area for positive RIR +This function generates plots illustrating how the change in effect size +is influenced by switching or replacing outcomes in a 2x2 table. It produces +two plots: one showing all possibilities (switching) and another zoomed in +the area for positive RIR (Relative Impact Ratio). } \examples{ # using tkonfound_fig for a study where 2 by 2 table is (35, 17, 17, 38) @@ -44,6 +51,7 @@ tkonfound_fig(35, 17, 17, 38) tkonfound_fig(35, 17, 17, 38, thr_p = 0.01) tkonfound_fig(35, 17, 17, 38, thr_p = 0.01, switch_trm = FALSE) tkonfound_fig(35, 17, 17, 38, thr_p = 0.01, switch_trm = TRUE, test = "chisq") -tkonfound_fig(35, 17, 17, 38, thr_p = 0.01, switch_trm = TRUE, test = "chisq", replace = "control") +tkonfound_fig(35, 17, 17, 38, thr_p = 0.01, switch_trm = TRUE, +test = "chisq", replace = "entire") } diff --git a/man/verify_reg_Gz.Rd b/man/verify_reg_Gz.Rd new file mode 100644 index 00000000..5b95f9f3 --- /dev/null +++ b/man/verify_reg_Gz.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cop_pse_auxiliary.R +\name{verify_reg_Gz} +\alias{verify_reg_Gz} +\title{Verify regression model with control variable Z} +\usage{ +verify_reg_Gz(n_obs, sdx, sdy, sdz, rxy, rxz, rzy) +} +\arguments{ +\item{n_obs}{number of observations} + +\item{sdx}{standard deviation of X} + +\item{sdy}{standard deviation of Y} + +\item{sdz}{standard deviation of Z} + +\item{rxy}{correlation coefficient between X and Y} + +\item{rxz}{correlation coefficient between X and Z} + +\item{rzy}{correlation coefficient between Z and Y} +} +\value{ +list of model parameters +} +\description{ +Verify regression model with control variable Z +} diff --git a/man/verify_reg_uncond.Rd b/man/verify_reg_uncond.Rd new file mode 100644 index 00000000..b860d355 --- /dev/null +++ b/man/verify_reg_uncond.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cop_pse_auxiliary.R +\name{verify_reg_uncond} +\alias{verify_reg_uncond} +\title{Verify unconditional regression model} +\usage{ +verify_reg_uncond(n_obs, sdx, sdy, rxy) +} +\arguments{ +\item{n_obs}{number of observations} + +\item{sdx}{standard deviation of X} + +\item{sdy}{standard deviation of Y} + +\item{rxy}{correlation coefficient between X and Y} +} +\value{ +list of model parameters +} +\description{ +Verify unconditional regression model +} diff --git a/man/zzz.Rd b/man/zzz.Rd new file mode 100644 index 00000000..7de2a125 --- /dev/null +++ b/man/zzz.Rd @@ -0,0 +1,9 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/zzz.R +\name{zzz} +\alias{zzz} +\title{Package Initialization Functions and Utilities} +\description{ +These functions are used for initializing the package environment and +providing utility functions for the package. +}