From d32fe06eb92c59318fee94507346d93553c53bcc Mon Sep 17 00:00:00 2001 From: wwang93 <142034236+wwang93@users.noreply.github.com> Date: Sun, 19 Nov 2023 20:02:38 -0500 Subject: [PATCH 01/26] Update helper_output_dataframe.R Linefeed the parameter "output_df <- function" to ensure that the line of code does not exceed 80 characters. --- R/helper_output_dataframe.R | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/R/helper_output_dataframe.R b/R/helper_output_dataframe.R index 26518a56..8f6cdf40 100644 --- a/R/helper_output_dataframe.R +++ b/R/helper_output_dataframe.R @@ -1,6 +1,15 @@ # Function to output the data frame -output_df <- function(est_eff, beta_threshhold, unstd_beta, bias = NULL, sustain = NULL, recase, obs_r, critical_r, r_con, itcv, non_linear) { +output_df <- function(est_eff, + beta_threshhold, + unstd_beta, + bias = NULL, + sustain = NULL, + recase, obs_r, + critical_r, + r_con, + itcv, + non_linear) { if (abs(est_eff) > abs(beta_threshhold)) { df <- dplyr::tibble( action = "to_invalidate", From 3cbab8b513f3a45e95ca1b63fb1eb9cc6f3f5351 Mon Sep 17 00:00:00 2001 From: wwang93 <142034236+wwang93@users.noreply.github.com> Date: Sun, 19 Nov 2023 21:30:03 -0500 Subject: [PATCH 02/26] Update helper_output_print.R Manual modification: no more than 80 characters per line of code --- R/helper_output_print.R | 130 ++++++++++++++++++++++++++++------------ 1 file changed, 92 insertions(+), 38 deletions(-) diff --git a/R/helper_output_print.R b/R/helper_output_print.R index 09b32dcb..e2f944c8 100644 --- a/R/helper_output_print.R +++ b/R/helper_output_print.R @@ -1,24 +1,47 @@ # Function to output printed text -output_print <- function(eff_diff, beta_threshhold, bias = NULL, sustain = NULL, nu, recase, obs_r, critical_r, r_con, itcv, alpha, index) { - if (index == "RIR"){ - cat(crayon::bold("Robustness of Inference to Replacement (RIR):\n")) - if (abs(eff_diff) > abs(beta_threshhold)) { - cat("To invalidate an inference, ", round(bias, 3), "% of the estimate would have to be due to bias. ") - cat("\n") - cat("This is based on a threshold of ", round(beta_threshhold, 3), " for statistical significance (alpha = ", alpha, ").\n", sep = "") - cat("\n") - cat("To invalidate an inference, ", round(recase, 3), " observations would have to be replaced with cases") - cat("\n") - cat("for which the effect is ", nu, " (RIR = ", round(recase, 3), ").\n", sep = "") - cat("\n") +output_print <- function(eff_diff, + beta_threshhold, + bias = NULL, + sustain = NULL, + nu, + recase, + obs_r, + critical_r, + r_con, + itcv, + alpha, + index) { + if (index == "RIR") { + cat(crayon::bold("Robustness of Inference to Replacement (RIR):\n")) + if (abs(eff_diff) > abs(beta_threshhold)) { + cat("To invalidate an inference, ", round(bias, 3), + "% of the estimate would have to be due to bias. ") + cat("\n") + cat("This is based on a threshold of ", round(beta_threshhold, 3), + " for statistical significance (alpha = ", alpha, ").\n", + sep = "") + cat("\n") + cat("To invalidate an inference, ", round(recase, 3), + " observations would have to be replaced with cases") + cat("\n") + cat("for which the effect is ", nu, + " (RIR = ", round(recase, 3), ").\n", sep = "") + cat("\n") } else if (abs(eff_diff) < abs(beta_threshhold)) { - cat("To sustain an inference, ", round(sustain, 3), "% of the estimate would have to be due to bias. ") + cat("To sustain an inference, ", round(sustain, 3), + "% of the estimate would have to be due to bias. ") cat("\n") - cat("This is based on a threshold of ", round(beta_threshhold, 3), " for statistical significance (alpha = ", alpha, ").\n", sep = "") + cat("This is based on a threshold of ", round(beta_threshhold, 3), + " for statistical significance (alpha = ", alpha, ").\n", + sep = "") cat("\n") - cat("To sustain an inference, ", round(recase, 3), " of the cases with ", nu, " effect would have to be replaced with cases at the threshold of inference"," (RIR = ", round(recase, 3), ").\n", sep = "") + cat("To sustain an inference, ", round(recase, 3), + " of the cases with ", nu, + " effect would have to be replaced with cases at the + threshold of inference", + " (RIR = ", round(recase, 3), ").\n", sep = "") } else if (eff_diff == beta_threshhold) { warning("The coefficient is exactly equal to the threshold.\n") @@ -26,65 +49,94 @@ output_print <- function(eff_diff, beta_threshhold, bias = NULL, sustain = NULL, cat("See Frank et al. (2013) for a description of the method.") cat("\n") cat("\n") - cat(crayon::underline("Citation:"), "Frank, K.A., Maroulis, S., Duong, M., and Kelcey, B. (2013).") + cat(crayon::underline("Citation:"), + "Frank, K.A., Maroulis, S., Duong, M., and Kelcey, B. (2013).") cat("\n") cat("What would it take to change an inference?") cat("\n") - cat("Using Rubin's causal model to interpret the robustness of causal inferences.") + cat("Using Rubin's causal model to interpret the + robustness of causal inferences.") cat("\n") - cat(crayon::italic("Education, Evaluation and Policy Analysis, 35"), "437-460.") + cat(crayon::italic("Education, Evaluation and + Policy Analysis, 35"), "437-460.") cat("\n") } if (index == "IT") { cat(crayon::bold("Impact Threshold for a Confounding Variable:\n")) if (abs(obs_r) > abs(critical_r) & obs_r > 0) { - cat("The minimum impact of an omitted variable to invalidate an inference for a null hypothesis of 0 effect is based on a correlation of ", r_con) + cat("The minimum impact of an omitted variable to invalidate an + inference for a null hypothesis of 0 effect is based on + a correlation of ", r_con) cat("\n") cat(" with the outcome and at ", r_con, - " with the predictor of interest (conditioning on observed covariates) based on a threshold of ") + " with the predictor of interest + (conditioning on observed covariates) based on a threshold of ") cat("\n") - cat(round(critical_r, 3), " for statistical significance (alpha = ", alpha, ").\n", - sep = "") + cat(round(critical_r, 3), " for statistical + significance (alpha = ", alpha, ").\n",sep = "") cat("\n") - cat("Correspondingly the impact of an omitted variable (as defined in Frank 2000) must be ") + cat("Correspondingly the impact of an omitted variable + (as defined in Frank 2000) must be ") cat("\n") - cat(r_con, " X ", r_con, " = ", round(r_con^2, 3), " to invalidate an inference for a null hypothesis of 0 effect.\n", sep = "") + cat(r_con, " X ", r_con, " = ", round(r_con^2, 3), + " to invalidate an inference for a null hypothesis of 0 effect. + \n", sep = "") } else if (abs(obs_r) > abs(critical_r) & obs_r < 0) { - cat("The minimum (in absolute value) impact of an omitted variable to invalidate an inference for a null hypothesis of 0 effect is based on a correlation of ", -r_con) + cat("The minimum (in absolute value) impact of an omitted + variable to invalidate an inference for a null hypothesis of 0 + effect is based on a correlation of ", -r_con) cat("\n") cat(" with the outcome and at ", r_con, - " with the predictor of interest (conditioning on observed covariates; signs are interchangable) based on a threshold of ") + " with the predictor of interest (conditioning on observed + covariates; signs are interchangable) based on a threshold of ") cat("\n") - cat(round(critical_r, 3), " for statistical significance (alpha = ", alpha, ").\n", + cat(round(critical_r, 3), " for statistical significance + (alpha = ", alpha, ").\n", sep = "") cat("\n") - cat("Correspondingly the impact of an omitted variable (as defined in Frank 2000) must be ") + cat("Correspondingly the impact of an omitted variable + (as defined in Frank 2000) must be ") cat("\n") - cat(-r_con, " X ", r_con, " = ", -round(r_con^2, 3), " to invalidate an inference for a null hypothesis of 0 effect.\n", sep = "") + cat(-r_con, " X ", r_con, " = ", -round(r_con^2, 3), + " to invalidate an inference for a null hypothesis + of 0 effect.\n", sep = "") } else if (abs(obs_r) < abs(critical_r) & obs_r > 0) { - cat("The maximum impact (in absolute value) of an omitted variable to sustain an inference for a null hypothesis of 0 effect is based on a correlation of ", -r_con) + cat("The maximum impact (in absolute value) of an omitted variable + to sustain an inference for a null hypothesis of 0 effect is + based on a correlation of ", -r_con) cat("\n") cat(" with the outcome and at ", r_con, - " with the predictor of interest (conditioning on observed covariates; signs are interchangable) based on a threshold of ", round(beta_threshhold, 3)) + " with the predictor of interest (conditioning on observed + covariates; signs are interchangable) based on a threshold of ", + round(beta_threshhold, 3)) cat("\n") cat(" for statistical significance (alpha = ", alpha, ").\n", sep = "") cat("\n") - cat("Correspondingly the impact of an omitted variable (as defined in Frank 2000) must be ") + cat("Correspondingly the impact of an omitted variable + (as defined in Frank 2000) must be ") cat("\n") - cat(-r_con, " X ", r_con, " = ", -round(r_con^2, 3), " to sustain an inference for a null hypothesis of 0 effect.\n", sep = "") + cat(-r_con, " X ", r_con, " = ", -round(r_con^2, 3), + " to sustain an inference for a null hypothesis of 0 effect.\n", + sep = "") } else if (abs(obs_r) < abs(critical_r) & obs_r < 0) { - cat("The maximum impact of an omitted variable to sustain an inference for a null hypothesis of 0 effect is based on a correlation of ", r_con) + cat("The maximum impact of an omitted variable to sustain an + inference for a null hypothesis of 0 effect is based on a + correlation of ", r_con) cat("\n") cat(" with the outcome and at ", r_con, - " with the predictor of interest (conditioning on observed covariates) based on a threshold of ", round(beta_threshhold, 3)) + " with the predictor of interest (conditioning on observed + covariates) based on a threshold of ", round(beta_threshhold, 3)) cat("\n") cat(" for statistical significance (alpha = ", alpha, ").\n", sep = "") cat("\n") - cat("Correspondingly the impact of an omitted variable (as defined in Frank 2000) must be ") + cat("Correspondingly the impact of an omitted variable + (as defined in Frank 2000) must be ") cat("\n") - cat(r_con, " X ", r_con, " = ", round(r_con^2, 3), " to sustain an inference for a null hypothesis of 0 effect.\n", sep = "") + cat(r_con, " X ", r_con, " = ", round(r_con^2, 3), + " to sustain an inference for a null hypothesis of 0 effect.\n", + sep = "") } else if (obs_r == critical_r) { warning("The correlation is exactly equal to the threshold.\n") } @@ -95,7 +147,9 @@ output_print <- function(eff_diff, beta_threshhold, bias = NULL, sustain = NULL, cat("\n") cat("Frank, K. (2000). Impact of a confounding variable on the") cat("\n") - cat("inference of a regression coefficient.", crayon::italic("Sociological Methods and Research, 29"), "(2), 147-194") + cat("inference of a regression coefficient.", + crayon::italic("Sociological Methods and Research, 29"), + "(2), 147-194") cat("\n") } From 4fc720814d4c3bbe3c317088310cc112f20ca023 Mon Sep 17 00:00:00 2001 From: wwang93 <142034236+wwang93@users.noreply.github.com> Date: Sun, 19 Nov 2023 21:46:02 -0500 Subject: [PATCH 03/26] Update helper_output_table.R Manual modification: no more than 80 characters per line of code --- R/helper_output_table.R | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/R/helper_output_table.R b/R/helper_output_table.R index c3211f5a..254a7521 100644 --- a/R/helper_output_table.R +++ b/R/helper_output_table.R @@ -5,18 +5,34 @@ output_table <- function(model_object, tested_variable) { cat("Dependent variable is", p, "\n") model_output <- broom::tidy(model_object) # tidying output var_row <- model_output$term == tested_variable - model_output$itcv[var_row] <- abs(konfound(model_object, !!tested_variable, to_return = "raw_output")$itcv) + model_output$itcv[var_row] <- abs(konfound( + model_object, + !!tested_variable, to_return = "raw_output" + )$itcv) - covariate_names <- model_output$term[!(model_output$term %in% c("(Intercept)", tested_variable))] + covariate_names <- model_output$term[!( + model_output$term %in% c("(Intercept)", tested_variable) +)] - for (i in seq(covariate_names)) { - cov_row <- model_output$term == covariate_names[i] - d <- model_object$model - cor_df <- as.data.frame(stats::cor(d)) - model_output$impact[cov_row] <- round(abs(cor_df[cov_row, 1]) * abs(cor_df[cov_row, tested_variable]), 3) # r_zy * r_zx - } - model_output <- purrr::modify_if(model_output, is.numeric, round, digits = 3) +for (i in seq(covariate_names)) { + cov_row <- model_output$term == covariate_names[i] + d <- model_object$model + cor_df <- as.data.frame(stats::cor(d)) + model_output$impact[cov_row] <- round( + abs(cor_df[cov_row, 1]) * abs(cor_df[cov_row, tested_variable]), + 3 + ) # r_zy * r_zx +} + + + model_output <- purrr::modify_if( + model_output, + is.numeric, + round, + digits = 3 +) + return(model_output) } From eef4c4964afdaa42574d653aa65d435c5c425e71 Mon Sep 17 00:00:00 2001 From: wwang93 <142034236+wwang93@users.noreply.github.com> Date: Mon, 20 Nov 2023 00:18:24 -0500 Subject: [PATCH 04/26] Update helper_plot_correlation.R Manual modification: no more than 80 characters per line of code --- R/helper_plot_correlation.R | 127 +++++++++++++++++++++++++++++------- 1 file changed, 105 insertions(+), 22 deletions(-) diff --git a/R/helper_plot_correlation.R b/R/helper_plot_correlation.R index e622bc66..5a329b60 100644 --- a/R/helper_plot_correlation.R +++ b/R/helper_plot_correlation.R @@ -13,28 +13,111 @@ plot_correlation <- function(r_con, obs_r, critical_r) { p <- ggplot2::ggplot(d, ggplot2::aes_string(x = "x", y = "y")) + - # main arrows - ggplot2::geom_segment(ggplot2::aes(y = .1), xend = 0, yend = .9, arrow = ggplot2::arrow(), size = 2.5, color = "#A6CEE3") + # straight up - ggplot2::geom_segment(ggplot2::aes(x = .1), xend = 1, yend = .9, arrow = ggplot2::arrow(), size = 2.5, color = "#A6CEE3") + # hypotenuse - ggplot2::geom_segment(ggplot2::aes(x = .15, y = 1), xend = .9, yend = 1, arrow = ggplot2::arrow(), size = 2.5, color = "#A6CEE3") + # straight across - - # lagel annotations - ggplot2::annotate("text", x = 0, y = 0, label = paste0("Confounding\nVariable"), fontface = 3) + - ggplot2::annotate("text", x = 0, y = 1, label = paste0("Predictor of Interest"), fontface = 3) + - ggplot2::annotate("text", x = 1, y = 1, label = paste0("Outcome"), fontface = 3) + - - # CV arrows - # ggplot2::geom_segment(ggplot2::aes(x = .05, y = .25), xend = .275, yend = .65, arrow = ggplot2::arrow(), size = 2.5, color = "#1F78B4") + # straight across - # ggplot2::geom_segment(ggplot2::aes(x = .175, y = .15), xend = .3, yend = .625, arrow = ggplot2::arrow(), size = 2.5, color = "#1F78B4") + # straight across - ggplot2::geom_curve(ggplot2::aes(x = .04, y = .325, xend = .35, yend = .825), curvature = -.35, size = 2.5, color = "#1F78B4", arrow = ggplot2::arrow()) + - ggplot2::geom_curve(ggplot2::aes(x = .225, y = .23, xend = .4, yend = .8), curvature = .35, size = 2.5, color = "#1F78B4", arrow = ggplot2::arrow()) + - - ggplot2::geom_segment(ggplot2::aes(x = .37, y = .81), xend = .465, yend = .925, arrow = ggplot2::arrow(), size = 2.5, color = "#1F78B4") + # straight across - - # corr. annotations - ggplot2::annotate("text", x = -.125, y = .5, label = paste0("Rx.cv | Z =\n ", r_con), fontface = 1) + - ggplot2::annotate("text", x = .575, y = .35, label = paste0("Ry.cv | Z =\n", r_con), fontface = 1) + - ggplot2::annotate("text", x = .25, y = .525, label = paste0("Rx.cv | Z * Ry.cv | Z =\n", round(r_con^2, 3)), fontface = 1) + +# main arrows +ggplot2::geom_segment( + ggplot2::aes(y = .1), + xend = 0, + yend = .9, + arrow = ggplot2::arrow(), + size = 2.5, + color = "#A6CEE3" +) + # straight up +ggplot2::geom_segment( + ggplot2::aes(x = .1), + xend = 1, + yend = .9, + arrow = ggplot2::arrow(), + size = 2.5, + color = "#A6CEE3" +) + # hypotenuse +ggplot2::geom_segment( + ggplot2::aes(x = .15, y = 1), + xend = .9, + yend = 1, + arrow = ggplot2::arrow(), + size = 2.5, + color = "#A6CEE3" +) + # straight across + + +# label annotations +ggplot2::annotate( + "text", + x = 0, + y = 0, + label = paste0("Confounding\nVariable"), + fontface = 3 +) + +ggplot2::annotate( + "text", + x = 0, + y = 1, + label = paste0("Predictor of Interest"), + fontface = 3 +) + +ggplot2::annotate( + "text", + x = 1, + y = 1, + label = paste0("Outcome"), + fontface = 3 +) + + + +# CV arrows +# ggplot2::geom_segment(ggplot2::aes(x = .05, y = .25), xend = .275, +#yend = .65, arrow = ggplot2::arrow(), size = 2.5, color = "#1F78B4") + +# straight across +# ggplot2::geom_segment(ggplot2::aes(x = .175, y = .15), xend = .3, +#yend = .625, arrow = ggplot2::arrow(), size = 2.5, color = "#1F78B4") + + # straight across +ggplot2::geom_curve( + ggplot2::aes(x = .04, y = .325, xend = .35, yend = .825), + curvature = -.35, + size = 2.5, + color = "#1F78B4", + arrow = ggplot2::arrow() +) + +ggplot2::geom_curve( + ggplot2::aes(x = .225, y = .23, xend = .4, yend = .8), + curvature = .35, + size = 2.5, + color = "#1F78B4", + arrow = ggplot2::arrow() +) + +ggplot2::geom_segment( + ggplot2::aes(x = .37, y = .81), + xend = .465, + yend = .925, + arrow = ggplot2::arrow(), + size = 2.5, + color = "#1F78B4" +) + # straight across + + +# corr. annotations +ggplot2::annotate( + "text", + x = -.125, + y = .5, + label = paste0("Rx.cv | Z =\n ", r_con), + fontface = 1 +) + +ggplot2::annotate( + "text", + x = .575, + y = .35, + label = paste0("Ry.cv | Z =\n", r_con), + fontface = 1 +) + +ggplot2::annotate( + "text", + x = .25, + y = .525, + label = paste0("Rx.cv | Z * Ry.cv | Z =\n", round(r_con^2, 3)), + fontface = 1 +) + + # plot modifications ggplot2::xlim(-.15, 1.1) + From 64e1584208590285180a2edeed39e0021eac9d30 Mon Sep 17 00:00:00 2001 From: wwang93 <142034236+wwang93@users.noreply.github.com> Date: Mon, 20 Nov 2023 00:34:00 -0500 Subject: [PATCH 05/26] Update helper_plot_threshold.R Manual modification: no more than 80 characters per line of code --- R/helper_plot_threshold.R | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/R/helper_plot_threshold.R b/R/helper_plot_threshold.R index aa388c5f..63b3de35 100644 --- a/R/helper_plot_threshold.R +++ b/R/helper_plot_threshold.R @@ -27,7 +27,8 @@ plot_threshold <- function(beta_threshold, est_eff) { } else if (est_eff < beta_threshold) { # beta is below threshold dd <- dplyr::tibble(est_eff = est_eff, beta_threshold = beta_threshold) - dd <- dplyr::mutate(dd, `Above Estimated Effect, Below Threshold` = abs(est_eff - beta_threshold)) + dd <- dplyr::mutate( dd,`Above Estimated Effect, Below Threshold` + = abs(est_eff - beta_threshold)) dd <- dplyr::mutate(dd, `Below Threshold` = est_eff) dd <- dplyr::select(dd, -beta_threshold) @@ -43,22 +44,30 @@ plot_threshold <- function(beta_threshold, est_eff) { y_thresh_text <- y_thresh + sqrt(.005 * abs(y_thresh)) effect_text <- est_eff + sqrt(.025 * abs(est_eff)) # y-value of text - cols <- c("lightgray", "#1F78B4") # dark blue and green (green not used right now) + cols <- c("lightgray", "#1F78B4") + # dark blue and green (green not used right now) } p <- ggplot2::ggplot(dd, ggplot2::aes(x = inference, y = val, fill = key)) + ggplot2::geom_col(position = "stack") + ggplot2::geom_hline(yintercept = est_eff, color = "black") + - ggplot2::annotate("text", x = 1, y = effect_text, label = "Estimated Effect") + + ggplot2::annotate("text", x = 1, y = effect_text, + label = "Estimated Effect") + ggplot2::geom_hline(yintercept = y_thresh, color = "red") + - ggplot2::annotate("text", x = 1, y = y_thresh_text, label = "Threshold") + - # ggplot2::geom_text(aes(label = "Effect"), vjust = -.5) + this is discussed here: https://github.com/jrosen48/konfound/issues/5 + ggplot2::annotate("text", x = 1, y = y_thresh_text, + label = "Threshold") + + # ggplot2::geom_text(aes(label = "Effect"), vjust = -.5) + + #this is discussed here: https://github.com/jrosen48/konfound/issues/5 ggplot2::scale_fill_manual("", values = cols) + ggplot2::theme_bw() + - ggplot2::theme(axis.text.x = ggplot2::element_blank(), axis.ticks = ggplot2::element_blank()) + + ggplot2::theme( + axis.text.x = ggplot2::element_blank(), + axis.ticks = ggplot2::element_blank() +) + + ggplot2::xlab(NULL) + ggplot2::ylab("Effect (abs. value)") + ggplot2::theme(legend.position = "top") From b0e5adc594e19481752075ea741eb8c196427a5f Mon Sep 17 00:00:00 2001 From: wwang93 <142034236+wwang93@users.noreply.github.com> Date: Mon, 20 Nov 2023 00:36:31 -0500 Subject: [PATCH 06/26] Update konfound-glm-dichotomous.R Manual modification: no more than 80 characters per line of code --- R/konfound-glm-dichotomous.R | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/R/konfound-glm-dichotomous.R b/R/konfound-glm-dichotomous.R index 4aa1287b..6785e23e 100644 --- a/R/konfound-glm-dichotomous.R +++ b/R/konfound-glm-dichotomous.R @@ -1,7 +1,13 @@ # konfound-glm -konfound_glm_dichotomous <- function(model_object, tested_variable_string, test_all, alpha, tails, - to_return, n_treat, switch_trm, replace) { +konfound_glm_dichotomous <- function(model_object, + tested_variable_string, + test_all, + alpha, tails, + to_return, + n_treat, + switch_trm, + replace) { tidy_output <- broom::tidy(model_object) # tidying output glance_output <- broom::glance(model_object) From 68d4824b3a66c0d56212e176771899f8995c765f Mon Sep 17 00:00:00 2001 From: wwang93 <142034236+wwang93@users.noreply.github.com> Date: Mon, 20 Nov 2023 00:51:06 -0500 Subject: [PATCH 07/26] Update konfound-glm.R Manual modification: no more than 80 characters per line of code --- R/konfound-glm.R | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/R/konfound-glm.R b/R/konfound-glm.R index aef0941c..c65cc32e 100644 --- a/R/konfound-glm.R +++ b/R/konfound-glm.R @@ -1,6 +1,12 @@ # konfound-glm -konfound_glm <- function(model_object, tested_variable_string, test_all, alpha, tails, index = "RIR", to_return) { +konfound_glm <- function(model_object, + tested_variable_string, + test_all, + alpha, + tails, + index = "RIR", + to_return) { tidy_output <- broom::tidy(model_object) # tidying output glance_output <- broom::glance(model_object) @@ -8,11 +14,18 @@ konfound_glm <- function(model_object, tested_variable_string, test_all, alpha, coef_df <- tidy_output[tidy_output$term == tested_variable_string, ] } else { coef_df <- tidy_output[-1, ] - coef_df$est_eff <- suppressWarnings(summary(margins::margins(model_object))$AME[names(summary(margins::margins(model_object))$AME) == tested_variable_string]) + coef_df$est_eff <- suppressWarnings(summary( + margins::margins(model_object))$AME[ + names(summary( + margins::margins( + model_object))$AME) == tested_variable_string]) } # to remove intercept est_eff <- coef_df$estimate - est_eff <- suppressWarnings(summary(margins::margins(model_object))$AME[names(summary(margins::margins(model_object))$AME) == tested_variable_string]) + est_eff <- suppressWarnings(summary( + margins::margins(model_object))$AME[ + names(summary( + margins::margins(model_object))$AME) == tested_variable_string]) std_err <- coef_df$std.error n_obs <- glance_output$nobs n_covariates <- glance_output$df.null - glance_output$df.residual @@ -33,11 +46,14 @@ konfound_glm <- function(model_object, tested_variable_string, test_all, alpha, ) return(out) } else { - message("Note that this output is calculated based on the correlation-based approach used in mkonfound()") - stop("Multiple variables cannot presently be tested for models fit using glm(); this will be added in the future.") + message("Note that this output is calculated based on + the correlation-based approach used in mkonfound()") + stop("Multiple variables cannot presently be tested + for models fit using glm(); this will be added in the future.") d <- data.frame(t = est_eff / std_err, df = (n_obs - n_covariates - 1)) o <- mkonfound(d, .data$t, .data$df) - term_names <- dplyr::select(tidy_output, var_name = .data$term) # remove the first row for intercept + term_names <- dplyr::select(tidy_output, var_name = .data$term) + # remove the first row for intercept term_names <- dplyr::filter(term_names, .data$var_name != "(Intercept)") return(dplyr::bind_cols(term_names, o)) } From 5fd0bca206229d3097ff74b14a48276bdc588a5e Mon Sep 17 00:00:00 2001 From: wwang93 <142034236+wwang93@users.noreply.github.com> Date: Mon, 20 Nov 2023 00:53:19 -0500 Subject: [PATCH 08/26] Update konfound-lm.R Manual modification: no more than 80 characters per line of code --- R/konfound-lm.R | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/R/konfound-lm.R b/R/konfound-lm.R index ff2923b3..f0d9bd1b 100644 --- a/R/konfound-lm.R +++ b/R/konfound-lm.R @@ -1,6 +1,11 @@ # konfound-lm -konfound_lm <- function(model_object, tested_variable_string, test_all, alpha, tails, index, to_return) { +konfound_lm <- function(model_object, + tested_variable_string, + test_all, alpha, + tails, + index, + to_return) { tidy_output <- broom::tidy(model_object) # tidying output glance_output <- broom::glance(model_object) @@ -31,10 +36,12 @@ konfound_lm <- function(model_object, tested_variable_string, test_all, alpha, t ) return(out) } else { - message("Note that this output is calculated based on the correlation-based approach used in mkonfound()") + message("Note that this output is calculated based on + the correlation-based approach used in mkonfound()") d <- data.frame(t = est_eff / std_err, df = (n_obs - n_covariates - 1)) o <- mkonfound(d, .data$t, .data$df) - term_names <- dplyr::select(tidy_output, var_name = .data$term) # remove the first row for intercept + term_names <- dplyr::select(tidy_output, var_name = .data$term) + # remove the first row for intercept term_names <- dplyr::filter(term_names, .data$var_name != "(Intercept)") return(dplyr::bind_cols(term_names, o)) } From e2a806b8b23f50ef5203e95a442290c19360bea7 Mon Sep 17 00:00:00 2001 From: wwang93 <142034236+wwang93@users.noreply.github.com> Date: Mon, 20 Nov 2023 00:55:15 -0500 Subject: [PATCH 09/26] Update konfound-lmer.R Manual modification: no more than 80 characters per line of code --- R/konfound-lmer.R | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/R/konfound-lmer.R b/R/konfound-lmer.R index afb773d1..63906eac 100644 --- a/R/konfound-lmer.R +++ b/R/konfound-lmer.R @@ -3,12 +3,18 @@ get_kr_df <- function(model_object) { L <- diag(rep(1, length(lme4::fixef(model_object)))) L <- as.data.frame(L) - out <- suppressWarnings(purrr::map_dbl(L, pbkrtest::get_Lb_ddf, object = model_object)) + out <- suppressWarnings(purrr::map_dbl( + L, pbkrtest::get_Lb_ddf, object = model_object)) names(out) <- names(lme4::fixef(model_object)) out } -konfound_lmer <- function(model_object, tested_variable_string, test_all, alpha, tails, index, to_return) { +konfound_lmer <- function(model_object, + tested_variable_string, + test_all, alpha, + tails, + index, + to_return) { tidy_output <- broom.mixed::tidy(model_object) # tidying output if (test_all == FALSE) { From 1c0d2b479aeb5acc81b36e6193330be6c60348a4 Mon Sep 17 00:00:00 2001 From: wwang93 <142034236+wwang93@users.noreply.github.com> Date: Mon, 20 Nov 2023 10:22:18 -0500 Subject: [PATCH 10/26] Update konfound.R some comment need to lines break --- R/konfound.R | 49 ++++++++++++++++++++++++++++++++++++------------- 1 file changed, 36 insertions(+), 13 deletions(-) diff --git a/R/konfound.R b/R/konfound.R index d9b1c3c3..e9ea1a84 100644 --- a/R/konfound.R +++ b/R/konfound.R @@ -52,13 +52,18 @@ konfound <- function(model_object, # Stop messages if (!(class(model_object)[1] %in% c("lm", "glm", "lmerMod"))) { - stop("konfound() is currently implemented for models estimated with lm(), glm(), and lme4::lmer(); consider using pkonfound() instead") + stop("konfound() is currently implemented for models estimated with + lm(), glm(), and lme4::lmer(); consider using pkonfound() instead") } - if ("table" %in% to_return & test_all == TRUE) stop("cannot return a table when test_all is set to TRUE") - + if ("table" %in% to_return & test_all == TRUE){ + stop("cannot return a table when test_all is set to TRUE") + } # Dealing with non-standard evaluation - tested_variable_enquo <- rlang::enquo(tested_variable) # dealing with non-standard evaluation (so unquoted names for tested_variable can be used) + + # dealing with non-standard evaluation + #(so unquoted names for tested_variable can be used) + tested_variable_enquo <- rlang::enquo(tested_variable) tested_variable_string <- rlang::quo_name(tested_variable_enquo) # Dispatching based on class @@ -81,9 +86,14 @@ konfound <- function(model_object, } if (inherits(model_object, "glm") & two_by_two == FALSE) { - message("Note that for a non-linear model, impact threshold should not be interpreted.") - message("Note that this is only an approximation. For exact results in terms of the number of cases that must be switched from treatment success to treatment failure to invalidate the inference see: https://msu.edu/~kenfrank/non%20linear%20replacement%20treatment.xlsm") - message("If a dichotomous independent variable is used, consider using the 2X2 table approach enabled with the argument two_by_two = TRUE") + message("Note that for a non-linear model, + impact threshold should not be interpreted.") + message("Note that this is only an approximation. For exact results + in terms of the number of cases that must be switched from treatment + success to treatment failure to invalidate the inference see: + https://msu.edu/~kenfrank/non%20linear%20replacement%20treatment.xlsm") + message("If a dichotomous independent variable is used, consider using + the 2X2 table approach enabled with the argument two_by_two = TRUE") output <- konfound_glm( model_object = model_object, tested_variable_string = tested_variable_string, @@ -98,9 +108,13 @@ konfound <- function(model_object, if (inherits(model_object, "glm") & two_by_two == TRUE) { - if(is.null(n_treat)) stop("Please provide a value for n_treat to use this functionality with a dichotomous predictor") - if (test_all == TRUE) stop("test_all = TRUE is not supported when two_by_two is specified") - + if(is.null(n_treat)){ + stop("Please provide a value for + n_treat to use this functionality with a dichotomous predictor") + } + if (test_all == TRUE) { + stop("test_all = TRUE is not supported when two_by_two is specified") + } output <- konfound_glm_dichotomous( model_object = model_object, tested_variable_string = tested_variable_string, @@ -128,7 +142,14 @@ konfound <- function(model_object, to_return = to_return ) - message("Note that the Kenward-Roger approximation is used to estimate degrees of freedom for the predictor(s) of interest. We are presently working to add other methods for calculating the degrees of freedom for the predictor(s) of interest. If you wish to use other methods now, consider those detailed here: https://bbolker.github.io/mixedmodels-misc/glmmFAQ.html#why-doesnt-lme4-display-denominator-degrees-of-freedomp-values-what-other-options-do-i-have. You can then enter degrees of freedom obtained from another method along with the coefficient, number of observations, and number of covariates to the pkonfound() function to quantify the robustness of the inference.") + message("Note that the Kenward-Roger approximation is used to + estimate degrees of freedom for the predictor(s) of interest. + We are presently working to add other methods for calculating + the degrees of freedom for the predictor(s) of interest. + If you wish to use other methods now, consider those detailed here: + https://bbolker.github.io/mixedmodels-misc/glmmFAQ.html#why-doesnt-lme4-display-denominator-degrees-of-freedomp-values-what-other-options-do-i-have. + You can then enter degrees of freedom obtained from another method along with the coefficient, + number of observations, and number of covariates to the pkonfound() function to quantify the robustness of the inference.") return(output) } @@ -138,8 +159,10 @@ konfound <- function(model_object, } if (test_all == FALSE) { - message("To consider other predictors of interest, consider setting `test_all` to TRUE.") + message("To consider other predictors of interest, + consider setting `test_all` to TRUE.") } else { - message("Note that presently these predictors of interest are tested independently; future output may use the approach used in mkonfound.") + message("Note that presently these predictors of interest are tested + independently; future output may use the approach used in mkonfound.") } } From 470657531ec1b087433d3b51c4863955285b860b Mon Sep 17 00:00:00 2001 From: wwang93 <142034236+wwang93@users.noreply.github.com> Date: Mon, 20 Nov 2023 22:59:00 -0500 Subject: [PATCH 11/26] Update mkonfound.R Manual modification --- R/mkonfound.R | 40 +++++++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/R/mkonfound.R b/R/mkonfound.R index 5f926de3..5bed869e 100644 --- a/R/mkonfound.R +++ b/R/mkonfound.R @@ -1,13 +1,19 @@ #' Perform meta-analyses including sensitivity analysis -#' @description For fitted models, this command carries out sensitivity analysis for a number of models, when their parameters stored in a data.frame. -#' @param d data.frame or tibble with the t-statistics and associated degrees of freedom +#' @description For fitted models, this command carries out sensitivity analysis +#' for a number of models, when their parameters stored in a data.frame. +#' @param d data.frame or tibble with the t-statistics and +#' associated degrees of freedom #' @param t t-statistic or vector of t-statistics -#' @param df degrees of freedom or vector of degrees of freedom associated with the t-statistics in the t argument -#' @param return_plot whether to return a plot of the percent bias; defaults to FALSE +#' @param df degrees of freedom or vector of degrees of +#' freedom associated with the t-statistics in the t argument +#' @param return_plot whether to return a plot of the percent bias; +#' defaults to FALSE #' @inheritParams pkonfound #' @import rlang #' @import dplyr -#' @return prints the bias and the number of cases that would have to be replaced with cases for which there is no effect to invalidate the inference for each of the cases in the data.frame +#' @return prints the bias and the number of cases that would have to be +#' replaced with cases for which there is no effect to invalidate the inference +#' for each of the cases in the data.frame #' @examples #' \dontrun{ #' mkonfound_ex @@ -25,18 +31,23 @@ mkonfound <- function(d, t, df, alpha = .05, tails = 2, return_plot = FALSE) { df_vec <- pull(select(d, !!df_enquo)) if (length(t_vec) <= 1) { - stop("To carry out sensitivity analysis for a single study, use pkonfound()") + stop("To carry out sensitivity analysis + for a single study, use pkonfound()") } - results_df <- suppressWarnings(purrr::map2_dfr(.x = t_vec, .y = df_vec, .f = core_sensitivity_mkonfound)) - + results_df <- suppressWarnings( + purrr::map2_dfr( + .x = t_vec, .y = df_vec, .f = core_sensitivity_mkonfound) +) if (return_plot == TRUE) { results_df$action <- dplyr::case_when( results_df$action == "to_invalidate" ~ "To Invalidate", results_df$action == "to_sustain" ~ "To Sustain" ) - p <- ggplot2::ggplot(results_df, ggplot2::aes_string(x = "pct_bias_to_change_inference", fill = "action")) + + p <- ggplot2::ggplot(results_df, ggplot2::aes_string( + x = "pct_bias_to_change_inference", fill = "action" +)) + ggplot2::geom_histogram() + ggplot2::scale_fill_manual("", values = c("#1F78B4", "#A6CEE3")) + ggplot2::theme_bw() + @@ -86,8 +97,15 @@ core_sensitivity_mkonfound <- function(t, df, alpha = .05, tails = 2) { r_con <- round(sqrt(abs(itcv)), 3) out <- dplyr::data_frame(t, df, action, inference, pct_bias, itcv, r_con) - names(out) <- c("t", "df", "action", "inference", "pct_bias_to_change_inference", "itcv", "r_con") - out$pct_bias_to_change_inference <- round(out$pct_bias_to_change_inference, 3) + names(out) <- c("t", + "df", + "action", + "inference", + "pct_bias_to_change_inference", + "itcv", + "r_con") + out$pct_bias_to_change_inference <- round( + out$pct_bias_to_change_inference, 3) out$itcv <- round(out$itcv, 3) out$action <- as.character(out$action) out$inference <- as.character(out$inference) From c4cc779996cd7d91f19afab00631281b0d9dc76c Mon Sep 17 00:00:00 2001 From: wwang93 <142034236+wwang93@users.noreply.github.com> Date: Tue, 21 Nov 2023 09:12:21 -0500 Subject: [PATCH 12/26] Update nonlinear_auxiliary.R All manual modifications, except for the calculation formula --- R/nonlinear_auxiliary.R | 548 +++++++++++++++++++++++++++------------- 1 file changed, 371 insertions(+), 177 deletions(-) diff --git a/R/nonlinear_auxiliary.R b/R/nonlinear_auxiliary.R index 6b60f6b6..9ba87925 100644 --- a/R/nonlinear_auxiliary.R +++ b/R/nonlinear_auxiliary.R @@ -8,7 +8,7 @@ isinvalidate <- function(thr_t, ob_t) { return(x) } -# to evaluate what kind of switches we need - increase or decrease the odds ratio +#to evaluate what kind of switches we need-increase or decrease the odds ratio isdcroddsratio <- function(thr_t, ob_t) { if (thr_t < ob_t) { x <- T @@ -18,7 +18,8 @@ isdcroddsratio <- function(thr_t, ob_t) { return(x) } -# get the a cell (control failure) for given odds_ratio, se and treatment cases - first solution +# get the a cell (control failure) for given odds_ratio, +# se and treatment cases - first solution get_a1_kfnl <- function(odds_ratio, std_err, n_obs, n_trm) { a1 <- -(1 / (2 * (1 + odds_ratio^2 + odds_ratio * (-2 + n_trm * std_err^2)))) * (2 * n_trm * (-1 + odds_ratio) * odds_ratio + n_trm^2 * odds_ratio * std_err^2 - @@ -28,7 +29,8 @@ get_a1_kfnl <- function(odds_ratio, std_err, n_obs, n_trm) { return(a1) } -# get the c cell (treatment failure) for given odds_ratio, se and treatment cases - first solution +# get the c cell (treatment failure) for given odds_ratio, +# se and treatment cases - first solution get_c1_kfnl <- function(odds_ratio, std_err, n_obs, n_trm) { c1 <- -((-2 * n_trm + 2 * n_trm * odds_ratio - n_obs * n_trm * odds_ratio * std_err^2 + n_trm^2 * odds_ratio * std_err^2 + @@ -38,7 +40,8 @@ get_c1_kfnl <- function(odds_ratio, std_err, n_obs, n_trm) { return(c1) } -# get the a cell (control failure) for given odds_ratio, se and treatment cases - second solution +# get the a cell (control failure) for given odds_ratio, +# se and treatment cases - second solution get_a2_kfnl <- function(odds_ratio, std_err, n_obs, n_trm) { a2 <- (1 / (2 * (1 + odds_ratio^2 + odds_ratio * (-2 + n_trm * std_err^2)))) * (-2 * n_trm * (-1 + odds_ratio) * odds_ratio - n_trm^2 * odds_ratio * std_err^2 + @@ -48,7 +51,8 @@ get_a2_kfnl <- function(odds_ratio, std_err, n_obs, n_trm) { return(a2) } -# get the c cell (treatment failure) for given odds_ratio, se and treatment cases - second solution +# get the c cell (treatment failure) for given odds_ratio, +# se and treatment cases - second solution get_c2_kfnl <- function(odds_ratio, std_err, n_obs, n_trm) { c2 <- (2 * n_trm - 2 * n_trm * odds_ratio + n_obs * n_trm * odds_ratio * std_err^2 - n_trm^2 * odds_ratio * std_err^2 + @@ -75,8 +79,10 @@ taylorexp <- function(a, b, c, d, q, thr) { d1square <- Num1 / Den1 - Num2 / Den2 # d1unsquare is the first derivative of the unsquared term d1unsquare <- d1square / (2 * log(a * (d - q) / (b * (c + q))) / sqrt(1 / a + 1 / b + 1 / (c + q) + 1 / (d - q))) - # x is the number of cases need to be replaced solved based on the taylor expansion - # this is the (linear approximation) of the original/unsquared term around the value of q + # x is the number of cases need to be replaced + # solved based on the taylor expansion + # this is the (linear approximation) of the + # original/unsquared term around the value of q x <- (thr - log(a * (d - q) / (b * (c + q))) / sqrt((1 / a + 1 / b + 1 / (c + q) + 1 / (d - q)))) / d1unsquare + q return(x) } @@ -129,7 +135,9 @@ getswitch <- function(table_bstart, thr_t, switch_trm, n_obs) { b_tryall <- b - (b - 1) * (1 - as.numeric(switch_trm)) tryall_t <- get_t_kfnl(a_tryall, b_tryall, c_tryall, d_tryall) tryall_est <- log(a_tryall*d_tryall/c_tryall/b_tryall) - allnotenough <- isTRUE(thr_t - tryall_t > 0 & tryall_est*est_eff_start > 0) + allnotenough <- isTRUE( + thr_t - tryall_t > 0 & tryall_est*est_eff_start > 0 + ) } if (t_start > thr_t) { # transfer cases from A to B or D to C to decrease odds ratio @@ -139,7 +147,8 @@ getswitch <- function(table_bstart, thr_t, switch_trm, n_obs) { b_tryall <- b + (a - 1) * (1 - as.numeric(switch_trm)) tryall_t <- get_t_kfnl(a_tryall, b_tryall, c_tryall, d_tryall) tryall_est <- log(a_tryall*d_tryall/c_tryall/b_tryall) - allnotenough <- isTRUE(tryall_t - thr_t > 0 & tryall_est*est_eff_start > 0) + allnotenough <- isTRUE( + tryall_t - thr_t > 0 & tryall_est*est_eff_start > 0) } ### run following if transfering one row is enough @@ -165,20 +174,23 @@ getswitch <- function(table_bstart, thr_t, switch_trm, n_obs) { ### calculate predicted switches based on Taylor expansion if (switch_trm) { - taylor_pred <- abs(taylorexp(a, b, c, d, step * perc_bias_pred, thr_t)) + taylor_pred <- abs( + taylorexp(a, b, c, d, step * perc_bias_pred, thr_t)) a_taylor <- round(a) b_taylor <- round(b) c_taylor <- round(c + taylor_pred * step) d_taylor <- round(d - taylor_pred * step) } else { - taylor_pred <- abs(taylorexp(d, c, b, a, step * perc_bias_pred, thr_t)) + taylor_pred <- abs( + taylorexp(d, c, b, a, step * perc_bias_pred, thr_t)) a_taylor <- round(a - taylor_pred * step) b_taylor <- round(b + taylor_pred * step) c_taylor <- round(c) d_taylor <- round(d) } - ### check whether taylor_pred move too many and causes non-positive odds ratio + ### check whether taylor_pred move too many and + ### causes non-positive odds ratio if (a_taylor <= 0) { b_taylor <- a_taylor + b_taylor - 1 a_taylor <- 1 @@ -205,9 +217,11 @@ getswitch <- function(table_bstart, thr_t, switch_trm, n_obs) { t_loop <- t_taylor } - ### when we need to transfer two rows the previously defined tryall are the starting point for brute force + ### when we need to transfer two rows the previously defined + ### tryall are the starting point for brute force if (allnotenough) { - ### Later: set tryall as the starting point and call this getswitch function again + ### Later: set tryall as the starting point and + ### call this getswitch function again a_loop <- a_tryall b_loop <- b_tryall c_loop <- c_tryall @@ -263,7 +277,8 @@ getswitch <- function(table_bstart, thr_t, switch_trm, n_obs) { est_eff_final <- log(a_final * d_final / (b_final * c_final)) std_err_final <- sqrt(1 / a_final + 1 / b_final + 1 / c_final + 1 / d_final) t_final <- est_eff_final / std_err_final - table_final <- matrix(c(a_final, b_final, c_final, d_final), byrow = TRUE, 2, 2) + table_final <- matrix( + c(a_final, b_final, c_final, d_final), byrow = TRUE, 2, 2) if (switch_trm == allnotenough) { final <- abs(a - a_final) + as.numeric(allnotenough) * abs(c - c_final) } else { @@ -289,9 +304,12 @@ getswitch <- function(table_bstart, thr_t, switch_trm, n_obs) { ### return result result <- list( - final_switch = final, table_start = table_start, table_final = table_final, est_eff_start = est_eff_start, - est_eff_final = est_eff_final, std_err_start = std_err_start, std_err_final = std_err_final, - t_start = t_start, t_final = t_final, taylor_pred = taylor_pred, perc_bias_pred = perc_bias_pred, + final_switch = final, table_start = table_start, + table_final = table_final, est_eff_start = est_eff_start, + est_eff_final = est_eff_final, std_err_start = std_err_start, + std_err_final = std_err_final, + t_start = t_start, t_final = t_final, taylor_pred = taylor_pred, + perc_bias_pred = perc_bias_pred, step = step, needtworows = allnotenough, final_extra = final_extra ) @@ -354,11 +372,13 @@ table_ob <- matrix(c(a, b, c, d), byrow = TRUE, 2, 2) p_ob <- chisq_p(a, b, c, d) chisq_ob <- chisq_value(a, b, c, d) -# to evaluate whether we are moving cases to invalidate or sustain the inference +# to evaluate whether we are moving cases to +# invalidate or sustain the inference if (p_ob < thr_p){isinvalidate_ob <- TRUE} if (p_ob > thr_p){isinvalidate_ob <- FALSE} -# to evaluate what kind of switches we need - increase or decrease the odds ratio +# to evaluate what kind of switches we need-increase +#or decrease the odds ratio if (odds_ratio >= 1) {dcroddsratio_ob <- isinvalidate_ob} if (odds_ratio < 1) {dcroddsratio_ob <- !isinvalidate_ob} @@ -388,7 +408,8 @@ if (!dcroddsratio_start) { b_tryall <- b - (b - 1) * (1 - as.numeric(switch_trm)) tryall_p <- chisq_p(a_tryall, b_tryall, c_tryall, d_tryall) tryall_est <- log(a_tryall*d_tryall/c_tryall/b_tryall) - allnotenough <- isTRUE((thr_p-tryall_p)*tryall_est< 0 & tryall_est*est > 0) + allnotenough <- isTRUE( + (thr_p-tryall_p)*tryall_est< 0 & tryall_est*est > 0) } if (dcroddsratio_start ) { # transfer cases from A to B or D to C to decrease odds ratio @@ -398,7 +419,8 @@ if (dcroddsratio_start ) { b_tryall <- b + (a - 1) * (1 - as.numeric(switch_trm)) tryall_p <- chisq_p(a_tryall, b_tryall, c_tryall, d_tryall) tryall_est <- log(a_tryall*d_tryall/c_tryall/b_tryall) - allnotenough <- isTRUE((thr_p-tryall_p)*tryall_est> 0 & tryall_est*est > 0) + allnotenough <- isTRUE( + (thr_p-tryall_p)*tryall_est> 0 & tryall_est*est > 0) } ### run following if transfering one row is enough @@ -424,20 +446,23 @@ if (!allnotenough) { ### calculate predicted switches based on Taylor expansion if (switch_trm) { - taylor_pred <- abs(taylorexp(a, b, c, d, step * perc_bias_pred, thr_t)) + taylor_pred <- abs( + taylorexp(a, b, c, d, step * perc_bias_pred, thr_t)) a_taylor <- round(a) b_taylor <- round(b) c_taylor <- round(c + taylor_pred * step) d_taylor <- round(d - taylor_pred * step) } else { - taylor_pred <- abs(taylorexp(d, c, b, a, step * perc_bias_pred, thr_t)) + taylor_pred <- abs( + taylorexp(d, c, b, a, step * perc_bias_pred, thr_t)) a_taylor <- round(a - taylor_pred * step) b_taylor <- round(b + taylor_pred * step) c_taylor <- round(c) d_taylor <- round(d) } - ### check whether taylor_pred move too many and causes non-positive odds ratio + ### check whether taylor_pred move too many + ### and causes non-positive odds ratio if (a_taylor <= 0) { b_taylor <- a_taylor + b_taylor - 1 a_taylor <- 1 @@ -465,9 +490,11 @@ if (!allnotenough) { t_loop <- get_t_kfnl(a_loop, b_loop, c_loop, d_loop) } -### when we need to transfer two rows the previously defined tryall are the starting point for brute force +### when we need to transfer two rows the previously defined +### tryall are the starting point for brute force if (allnotenough) { - ### Later: set tryall as the starting point and call this getswitch function again + ### Later: set tryall as the starting point + ### and call this getswitch function again a_loop <- a_tryall b_loop <- b_tryall c_loop <- c_tryall @@ -487,8 +514,10 @@ if (t_loop < thr_t) { } ### make a small adjustment to make it just below/above the thresold if (t_start > thr_t) { - c_loopsec <- c_loop + 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loop - 1 * (1 - as.numeric(switch_trm == allnotenough)) + c_loopsec <- c_loop + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loop - + 1 * (1 - as.numeric(switch_trm == allnotenough)) a_loopsec <- a_loop - 1 * as.numeric(switch_trm == allnotenough) b_loopsec <- b_loop + 1 * as.numeric(switch_trm == allnotenough) } else if (t_start < thr_t) { @@ -508,8 +537,10 @@ if (t_loop > thr_t) { t_loop <- get_t_kfnl(a_loop, b_loop, c_loop, d_loop) } if (t_start < thr_t) { - c_loopsec <- c_loop - 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loop + 1 * (1 - as.numeric(switch_trm == allnotenough)) + c_loopsec <- c_loop - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loop + + 1 * (1 - as.numeric(switch_trm == allnotenough)) a_loopsec <- a_loop + 1 * as.numeric(switch_trm == allnotenough) b_loopsec <- b_loop - 1 * as.numeric(switch_trm == allnotenough) } else if (t_start > thr_t) { @@ -522,15 +553,21 @@ if (t_loop > thr_t) { p_loopsec <- chisq_p(a_loopsec, b_loopsec, c_loopsec, d_loopsec) -### start 2nd round brute force - use fisher test p value as evaluation criterion -#### scenario 1 need to reduce odds ratio to invalidate the inference-need to increase p +### start 2nd round brute force - use fisher +###test p value as evaluation criterion +#### scenario 1 need to reduce odds ratio to +###invalidate the inference-need to increase p if (isinvalidate_start & dcroddsratio_start){ if (p_loopsec < thr_p) { while (p_loopsec < thr_p) { - c_loopsec <- c_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_loopsec <- a_loopsec - 1 * as.numeric(switch_trm == allnotenough) - b_loopsec <- b_loopsec + 1 * as.numeric(switch_trm == allnotenough) + c_loopsec <- c_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- + d_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_loopsec <- a_loopsec - + 1 * as.numeric(switch_trm == allnotenough) + b_loopsec <- b_loopsec + + 1 * as.numeric(switch_trm == allnotenough) p_loopsec <- chisq_p(a_loopsec, b_loopsec, c_loopsec, d_loopsec) } c_final <- c_loopsec @@ -540,41 +577,63 @@ if (isinvalidate_start & dcroddsratio_start){ } if (p_loopsec > thr_p){ #taylor too much, return some odds ratio while (p_loopsec > thr_p) { - c_loopsec <- c_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_loopsec <- a_loopsec + 1 * as.numeric(switch_trm == allnotenough) - b_loopsec <- b_loopsec - 1 * as.numeric(switch_trm == allnotenough) + c_loopsec <- c_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_loopsec <- a_loopsec + + 1 * as.numeric(switch_trm == allnotenough) + b_loopsec <- b_loopsec - + 1 * as.numeric(switch_trm == allnotenough) p_loopsec <- chisq_p(a_loopsec, b_loopsec, c_loopsec, d_loopsec) } - c_final <- c_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_final <- d_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) + c_final <- c_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_final <- d_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) a_final <- a_loopsec - 1 * as.numeric(switch_trm == allnotenough) b_final <- b_loopsec + 1 * as.numeric(switch_trm == allnotenough) } } -#### scenario 2 need to reduce odds ratio to sustain the inference-need to reduce p +#### scenario 2 need to reduce odds ratio to +#### sustain the inference-need to reduce p if (!isinvalidate_start & dcroddsratio_start) { if (p_loopsec < thr_p) { # taylor too much, return some odds ratio while (p_loopsec < thr_p) { - c_loopsec <- c_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_loopsec <- a_loopsec + 1 * as.numeric(switch_trm == allnotenough) - b_loopsec <- b_loopsec - 1 * as.numeric(switch_trm == allnotenough) + c_loopsec <- c_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_loopsec <- a_loopsec + + 1 * as.numeric(switch_trm == allnotenough) + b_loopsec <- b_loopsec - + 1 * as.numeric(switch_trm == allnotenough) p_loopsec <- chisq_p(a_loopsec, b_loopsec, c_loopsec, d_loopsec) } - c_final <- c_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_final <- d_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_final <- a_loopsec - 1 * as.numeric(switch_trm == allnotenough) - b_final <- b_loopsec + 1 * as.numeric(switch_trm == allnotenough) + c_final <- c_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_final <- d_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_final <- a_loopsec - + 1 * as.numeric(switch_trm == allnotenough) + b_final <- b_loopsec + + 1 * as.numeric(switch_trm == allnotenough) } - if (p_loopsec > thr_p){ # taylor not enough, continue to reduce odds ratio + # taylor not enough, continue to reduce odds ratio + if (p_loopsec > thr_p){ while (p_loopsec > thr_p) { - c_loopsec <- c_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_loopsec <- a_loopsec - 1 * as.numeric(switch_trm == allnotenough) - b_loopsec <- b_loopsec + 1 * as.numeric(switch_trm == allnotenough) - p_loopsec <- chisq_p(a_loopsec, b_loopsec, c_loopsec, d_loopsec) + c_loopsec <- c_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_loopsec <- a_loopsec - + 1 * as.numeric(switch_trm == allnotenough) + b_loopsec <- b_loopsec + + 1 * as.numeric(switch_trm == allnotenough) + p_loopsec <- chisq_p( + a_loopsec, b_loopsec, c_loopsec, d_loopsec + ) } c_final <- c_loopsec d_final <- d_loopsec @@ -583,14 +642,20 @@ if (!isinvalidate_start & dcroddsratio_start) { } } -#### scenario 3 need to increase odds ratio to invalidate the inference-need to increase p +#### scenario 3 need to increase odds ratio to +#### invalidate the inference-need to increase p if (isinvalidate_start & !dcroddsratio_start){ - if (p_loopsec < thr_p){ #taylor not enough, continue to increase odds ratio + #taylor not enough, continue to increase odds ratio + if (p_loopsec < thr_p){ while (p_loopsec < thr_p) { - c_loopsec <- c_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_loopsec <- a_loopsec + 1 * as.numeric(switch_trm == allnotenough) - b_loopsec <- b_loopsec - 1 * as.numeric(switch_trm == allnotenough) + c_loopsec <- c_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_loopsec <- a_loopsec + + 1 * as.numeric(switch_trm == allnotenough) + b_loopsec <- b_loopsec - + 1 * as.numeric(switch_trm == allnotenough) p_loopsec <- chisq_p(a_loopsec, b_loopsec, c_loopsec, d_loopsec) } c_final <- c_loopsec @@ -598,29 +663,42 @@ if (isinvalidate_start & !dcroddsratio_start){ a_final <- a_loopsec b_final <- b_loopsec } - if (p_loopsec > thr_p){#taylor too much, returns some odds ratio - decrease + #taylor too much, returns some odds ratio - decrease + if (p_loopsec > thr_p){ while(p_loopsec > thr_p) { - c_loopsec <- c_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_loopsec <- a_loopsec - 1 * as.numeric(switch_trm == allnotenough) - b_loopsec <- b_loopsec + 1 * as.numeric(switch_trm == allnotenough) + c_loopsec <- c_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_loopsec <- a_loopsec - + 1 * as.numeric(switch_trm == allnotenough) + b_loopsec <- b_loopsec + + 1 * as.numeric(switch_trm == allnotenough) p_loopsec <- chisq_p(a_loopsec, b_loopsec, c_loopsec, d_loopsec) } - c_final <- c_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_final <- d_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) + c_final <- c_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_final <- d_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) a_final <- a_loopsec + 1 * as.numeric(switch_trm == allnotenough) b_final <- b_loopsec - 1 * as.numeric(switch_trm == allnotenough) } } -#### scenario 4 need to increase odds ratio to sustain the inference-need to decrease p +#### scenario 4 need to increase odds ratio +#### to sustain the inference-need to decrease p if (!isinvalidate_start & !dcroddsratio_start){ - if (p_loopsec > thr_p){#taylor not enough, continue to increase odds ratio + #taylor not enough, continue to increase odds ratio + if (p_loopsec > thr_p){ while (p_loopsec > thr_p){ - c_loopsec <- c_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_loopsec <- a_loopsec + 1 * as.numeric(switch_trm == allnotenough) - b_loopsec <- b_loopsec - 1 * as.numeric(switch_trm == allnotenough) + c_loopsec <- c_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_loopsec <- a_loopsec + + 1 * as.numeric(switch_trm == allnotenough) + b_loopsec <- b_loopsec - + 1 * as.numeric(switch_trm == allnotenough) p_loopsec <- chisq_p(a_loopsec, b_loopsec, c_loopsec, d_loopsec) } c_final <- c_loopsec @@ -628,23 +706,34 @@ if (!isinvalidate_start & !dcroddsratio_start){ a_final <- a_loopsec b_final <- b_loopsec } - if (p_loopsec < thr_p){#taylor too much, return some odds ratio - decrease + #taylor too much, return some odds ratio - decrease + if (p_loopsec < thr_p){ while (p_loopsec < thr_p){ - c_loopsec <- c_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_loopsec <- a_loopsec - 1 * as.numeric(switch_trm == allnotenough) - b_loopsec <- b_loopsec + 1 * as.numeric(switch_trm == allnotenough) + c_loopsec <- c_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_loopsec <- a_loopsec - + 1 * as.numeric(switch_trm == allnotenough) + b_loopsec <- b_loopsec + + 1 * as.numeric(switch_trm == allnotenough) p_loopsec <- chisq_p(a_loopsec, b_loopsec, c_loopsec, d_loopsec) } - c_final <- c_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_final <- d_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_final <- a_loopsec + 1 * as.numeric(switch_trm == allnotenough) - b_final <- b_loopsec - 1 * as.numeric(switch_trm == allnotenough) + c_final <- c_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_final <- d_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_final <- a_loopsec + + 1 * as.numeric(switch_trm == allnotenough) + b_final <- b_loopsec - + 1 * as.numeric(switch_trm == allnotenough) } } ### so the final results (after switching) is as follows: -table_final <- matrix(c(a_final, b_final, c_final, d_final), byrow = TRUE, 2, 2) +table_final <- matrix( + c(a_final, b_final, c_final, d_final), byrow = TRUE, 2, 2 +) p_final <- chisq_p(a_final, b_final, c_final, d_final) chisq_final <- chisq_value(a_final, b_final, c_final, d_final) @@ -669,11 +758,18 @@ if (allnotenough) { total_switch <- final + allnotenough*final_extra -result <- list(final_switch = final, User_enter_value = table_start, Transfer_Table = table_final, - p_final = p_final, chisq_final = chisq_final, - needtworows=allnotenough, taylor_pred = taylor_pred, - perc_bias_pred = perc_bias_pred, final_extra = final_extra, - dcroddsratio_ob = dcroddsratio_ob, total_switch = total_switch, isinvalidate_ob = isinvalidate_ob) +result <- list(final_switch = final, + User_enter_value = table_start, + Transfer_Table = table_final, + p_final = p_final, + chisq_final = chisq_final, + needtworows=allnotenough, + taylor_pred = taylor_pred, + perc_bias_pred = perc_bias_pred, + final_extra = final_extra, + dcroddsratio_ob = dcroddsratio_ob, + total_switch = total_switch, + isinvalidate_ob = isinvalidate_ob) return(result) } @@ -693,11 +789,13 @@ getswitch_fisher <- function(a, b, c, d, thr_p = 0.05, switch_trm = T){ table_ob <- matrix(c(a, b, c, d), byrow = TRUE, 2, 2) p_ob <- fisher_p(a, b, c, d) - # to evaluate whther we are moving cases to invalidate or sustain the inference + # to evaluate whther we are moving cases + # to invalidate or sustain the inference if (p_ob < thr_p){isinvalidate_ob <- TRUE} if (p_ob > thr_p){isinvalidate_ob <- FALSE} - # to evaluate what kind of switches we need - increase or decrease the odds ratio + # to evaluate what kind of switches + # we need-increase or decrease the odds ratio if (odds_ratio >= 1) {dcroddsratio_ob <- isinvalidate_ob} if (odds_ratio < 1) {dcroddsratio_ob <- !isinvalidate_ob} @@ -732,7 +830,9 @@ getswitch_fisher <- function(a, b, c, d, thr_p = 0.05, switch_trm = T){ if (d_tryall == 0) {d_tryall <- d_tryall + 0.5} tryall_p <- fisher_p(a_tryall, b_tryall, c_tryall, d_tryall) tryall_est <- log(a_tryall*d_tryall/c_tryall/b_tryall) - allnotenough <- isTRUE((thr_p-tryall_p)*tryall_est< 0 & tryall_est*est > 0) + allnotenough <- isTRUE( + (thr_p-tryall_p)*tryall_est< 0 & tryall_est*est > 0 + ) } if (dcroddsratio_start ) { # transfer cases from A to B or D to C to decrease odds ratio @@ -746,7 +846,9 @@ getswitch_fisher <- function(a, b, c, d, thr_p = 0.05, switch_trm = T){ if (d_tryall == 0) {d_tryall <- d_tryall + 0.5} tryall_p <- fisher_p(a_tryall, b_tryall, c_tryall, d_tryall) tryall_est <- log(a_tryall*d_tryall/c_tryall/b_tryall) - allnotenough <- isTRUE((thr_p-tryall_p)*tryall_est> 0 & tryall_est*est > 0) + allnotenough <- isTRUE( + (thr_p-tryall_p)*tryall_est> 0 & tryall_est*est > 0 + ) } ### run following if transfering one row is enough @@ -772,20 +874,25 @@ getswitch_fisher <- function(a, b, c, d, thr_p = 0.05, switch_trm = T){ ### calculate predicted switches based on Taylor expansion if (switch_trm) { - taylor_pred <- abs(taylorexp(a, b, c, d, step * perc_bias_pred, thr_t)) + taylor_pred <- abs( + taylorexp(a, b, c, d, step * perc_bias_pred, thr_t) + ) a_taylor <- round(a) b_taylor <- round(b) c_taylor <- round(c + taylor_pred * step) d_taylor <- round(d - taylor_pred * step) } else { - taylor_pred <- abs(taylorexp(d, c, b, a, step * perc_bias_pred, thr_t)) + taylor_pred <- abs( + taylorexp(d, c, b, a, step * perc_bias_pred, thr_t) + ) a_taylor <- round(a - taylor_pred * step) b_taylor <- round(b + taylor_pred * step) c_taylor <- round(c) d_taylor <- round(d) } - ### check whether taylor_pred move too many and causes non-positive odds ratio + ### check whether taylor_pred move too many and + ### causes non-positive odds ratio if (a_taylor < 0) { b_taylor <- a_taylor + b_taylor a_taylor <- 0 @@ -813,9 +920,11 @@ getswitch_fisher <- function(a, b, c, d, thr_p = 0.05, switch_trm = T){ t_loop <- get_t_kfnl(a_loop, b_loop, c_loop, d_loop) } - ### when we need to transfer two rows the previously defined tryall are the starting point for brute force + ### when we need to transfer two rows the previously defined + ### tryall are the starting point for brute force if (allnotenough) { - ### Later: set tryall as the starting point and call this getswitch function again + ### Later: set tryall as the starting point and + ### call this getswitch function again a_loop <- a_tryall b_loop <- b_tryall c_loop <- c_tryall @@ -839,8 +948,10 @@ getswitch_fisher <- function(a, b, c, d, thr_p = 0.05, switch_trm = T){ } ### make a small adjustment to make it just below/above the thresold if (t_start > thr_t) { - c_loopsec <- c_loop + 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loop - 1 * (1 - as.numeric(switch_trm == allnotenough)) + c_loopsec <- c_loop + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loop - + 1 * (1 - as.numeric(switch_trm == allnotenough)) a_loopsec <- a_loop - 1 * as.numeric(switch_trm == allnotenough) b_loopsec <- b_loop + 1 * as.numeric(switch_trm == allnotenough) } else if (t_start < thr_t) { @@ -864,8 +975,10 @@ getswitch_fisher <- function(a, b, c, d, thr_p = 0.05, switch_trm = T){ t_loop <- get_t_kfnl(a_loop, b_loop, c_loop, d_loop) } if (t_start < thr_t) { - c_loopsec <- c_loop - 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loop + 1 * (1 - as.numeric(switch_trm == allnotenough)) + c_loopsec <- c_loop - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loop + + 1 * (1 - as.numeric(switch_trm == allnotenough)) a_loopsec <- a_loop + 1 * as.numeric(switch_trm == allnotenough) b_loopsec <- b_loop - 1 * as.numeric(switch_trm == allnotenough) } else if (t_start > thr_t) { @@ -878,59 +991,94 @@ getswitch_fisher <- function(a, b, c, d, thr_p = 0.05, switch_trm = T){ p_loopsec <- fisher_p(a_loopsec, b_loopsec, c_loopsec, d_loopsec) - ### start 2nd round brute force - use fisher test p value as evaluation criterion - #### scenario 1 need to reduce odds ratio to invalidate the inference-need to increase p + ### start 2nd round brute force - use fisher test + ### p value as evaluation criterion + #### scenario 1 need to reduce odds ratio to + ### invalidate the inference-need to increase p if (isinvalidate_start & dcroddsratio_start){ if (p_loopsec < thr_p) { while (p_loopsec < thr_p) { - c_loopsec <- c_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_loopsec <- a_loopsec - 1 * as.numeric(switch_trm == allnotenough) - b_loopsec <- b_loopsec + 1 * as.numeric(switch_trm == allnotenough) - p_loopsec <- fisher_p(a_loopsec, b_loopsec, c_loopsec, d_loopsec) + c_loopsec <- c_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_loopsec <- a_loopsec - + 1 * as.numeric(switch_trm == allnotenough) + b_loopsec <- b_loopsec + + 1 * as.numeric(switch_trm == allnotenough) + p_loopsec <- fisher_p( + a_loopsec, b_loopsec, c_loopsec, d_loopsec + ) } c_final <- c_loopsec d_final <- d_loopsec a_final <- a_loopsec b_final <- b_loopsec } - if (p_loopsec > thr_p){ #taylor too much, return some odds ratio + #taylor too much, return some odds ratio + if (p_loopsec > thr_p){ while (p_loopsec > thr_p) { - c_loopsec <- c_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_loopsec <- a_loopsec + 1 * as.numeric(switch_trm == allnotenough) - b_loopsec <- b_loopsec - 1 * as.numeric(switch_trm == allnotenough) - p_loopsec <- fisher_p(a_loopsec, b_loopsec, c_loopsec, d_loopsec) + c_loopsec <- c_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_loopsec <- a_loopsec + + 1 * as.numeric(switch_trm == allnotenough) + b_loopsec <- b_loopsec - + 1 * as.numeric(switch_trm == allnotenough) + p_loopsec <- fisher_p( + a_loopsec, b_loopsec, c_loopsec, d_loopsec + ) } - c_final <- c_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_final <- d_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) + c_final <- c_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_final <- d_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) a_final <- a_loopsec - 1 * as.numeric(switch_trm == allnotenough) b_final <- b_loopsec + 1 * as.numeric(switch_trm == allnotenough) } } - #### scenario 2 need to reduce odds ratio to sustain the inference-need to reduce p + #### scenario 2 need to reduce odds ratio + ### to sustain the inference-need to reduce p if (!isinvalidate_start & dcroddsratio_start) { if (p_loopsec < thr_p) { # taylor too much, return some odds ratio while (p_loopsec < thr_p) { - c_loopsec <- c_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_loopsec <- a_loopsec + 1 * as.numeric(switch_trm == allnotenough) - b_loopsec <- b_loopsec - 1 * as.numeric(switch_trm == allnotenough) - p_loopsec <- fisher_p(a_loopsec, b_loopsec, c_loopsec, d_loopsec) + c_loopsec <- c_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_loopsec <- a_loopsec + + 1 * as.numeric(switch_trm == allnotenough) + b_loopsec <- b_loopsec - + 1 * as.numeric(switch_trm == allnotenough) + p_loopsec <- fisher_p( + a_loopsec, b_loopsec, c_loopsec, d_loopsec + ) } - c_final <- c_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_final <- d_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_final <- a_loopsec - 1 * as.numeric(switch_trm == allnotenough) - b_final <- b_loopsec + 1 * as.numeric(switch_trm == allnotenough) + c_final <- c_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_final <- d_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_final <- a_loopsec - + 1 * as.numeric(switch_trm == allnotenough) + b_final <- b_loopsec + + 1 * as.numeric(switch_trm == allnotenough) } - if (p_loopsec > thr_p){ # taylor not enough, continue to reduce odds ratio + # taylor not enough, continue to reduce odds ratio + if (p_loopsec > thr_p){ while (p_loopsec > thr_p) { - c_loopsec <- c_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_loopsec <- a_loopsec - 1 * as.numeric(switch_trm == allnotenough) - b_loopsec <- b_loopsec + 1 * as.numeric(switch_trm == allnotenough) - p_loopsec <- fisher_p(a_loopsec, b_loopsec, c_loopsec, d_loopsec) + c_loopsec <- c_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_loopsec <- a_loopsec - + 1 * as.numeric(switch_trm == allnotenough) + b_loopsec <- b_loopsec + + 1 * as.numeric(switch_trm == allnotenough) + p_loopsec <- fisher_p( + a_loopsec, b_loopsec, c_loopsec, d_loopsec + ) } c_final <- c_loopsec d_final <- d_loopsec @@ -939,68 +1087,108 @@ getswitch_fisher <- function(a, b, c, d, thr_p = 0.05, switch_trm = T){ } } - #### scenario 3 need to increase odds ratio to invalidate the inference-need to increase p + #### scenario 3 need to increase odds + ### ratio to invalidate the inference-need to increase p if (isinvalidate_start & !dcroddsratio_start){ - if (p_loopsec < thr_p){ #taylor not enough, continue to increase odds ratio + #taylor not enough, continue to increase odds ratio + if (p_loopsec < thr_p){ while (p_loopsec < thr_p) { - c_loopsec <- c_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_loopsec <- a_loopsec + 1 * as.numeric(switch_trm == allnotenough) - b_loopsec <- b_loopsec - 1 * as.numeric(switch_trm == allnotenough) - p_loopsec <- fisher_p(a_loopsec, b_loopsec, c_loopsec, d_loopsec) + c_loopsec <- c_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_loopsec <- a_loopsec + + 1 * as.numeric(switch_trm == allnotenough) + b_loopsec <- b_loopsec - + 1 * as.numeric(switch_trm == allnotenough) + p_loopsec <- fisher_p( + a_loopsec, b_loopsec, c_loopsec, d_loopsec + ) } c_final <- c_loopsec d_final <- d_loopsec a_final <- a_loopsec b_final <- b_loopsec } - if (p_loopsec > thr_p){#taylor too much, returns some odds ratio - decrease + #taylor too much, returns some odds ratio - decrease + if (p_loopsec > thr_p){ while(p_loopsec > thr_p) { - c_loopsec <- c_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_loopsec <- a_loopsec - 1 * as.numeric(switch_trm == allnotenough) - b_loopsec <- b_loopsec + 1 * as.numeric(switch_trm == allnotenough) - p_loopsec <- fisher_p(a_loopsec, b_loopsec, c_loopsec, d_loopsec) + c_loopsec <- c_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_loopsec <- a_loopsec - + 1 * as.numeric(switch_trm == allnotenough) + b_loopsec <- b_loopsec + + 1 * as.numeric(switch_trm == allnotenough) + p_loopsec <- fisher_p( + a_loopsec, b_loopsec, c_loopsec, d_loopsec + ) } - c_final <- c_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_final <- d_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_final <- a_loopsec + 1 * as.numeric(switch_trm == allnotenough) - b_final <- b_loopsec - 1 * as.numeric(switch_trm == allnotenough) + c_final <- c_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_final <- d_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_final <- a_loopsec + + 1 * as.numeric(switch_trm == allnotenough) + b_final <- b_loopsec - + 1 * as.numeric(switch_trm == allnotenough) } } - #### scenario 4 need to increase odds ratio to sustain the inference-need to decrease p + #### scenario 4 need to increase odds ratio + #### to sustain the inference-need to decrease p if (!isinvalidate_start & !dcroddsratio_start){ - if (p_loopsec > thr_p){#taylor not enough, continue to increase odds ratio + #taylor not enough, continue to increase odds ratio + if (p_loopsec > thr_p){ while (p_loopsec > thr_p){ - c_loopsec <- c_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_loopsec <- a_loopsec + 1 * as.numeric(switch_trm == allnotenough) - b_loopsec <- b_loopsec - 1 * as.numeric(switch_trm == allnotenough) - p_loopsec <- fisher_p(a_loopsec, b_loopsec, c_loopsec, d_loopsec) + c_loopsec <- c_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_loopsec <- a_loopsec + + 1 * as.numeric(switch_trm == allnotenough) + b_loopsec <- b_loopsec - + 1 * as.numeric(switch_trm == allnotenough) + p_loopsec <- fisher_p( + a_loopsec, b_loopsec, c_loopsec, d_loopsec + ) } c_final <- c_loopsec d_final <- d_loopsec a_final <- a_loopsec b_final <- b_loopsec } - if (p_loopsec < thr_p){#taylor too much, return some odds ratio - decrease + #taylor too much, return some odds ratio - decrease + if (p_loopsec < thr_p){ while (p_loopsec < thr_p){ - c_loopsec <- c_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_loopsec <- a_loopsec - 1 * as.numeric(switch_trm == allnotenough) - b_loopsec <- b_loopsec + 1 * as.numeric(switch_trm == allnotenough) - p_loopsec <- fisher_p(a_loopsec, b_loopsec, c_loopsec, d_loopsec) + c_loopsec <- c_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_loopsec <- a_loopsec - + 1 * as.numeric(switch_trm == allnotenough) + b_loopsec <- b_loopsec + + 1 * as.numeric(switch_trm == allnotenough) + p_loopsec <- fisher_p( + a_loopsec, b_loopsec, c_loopsec, d_loopsec + ) } - c_final <- c_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_final <- d_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_final <- a_loopsec + 1 * as.numeric(switch_trm == allnotenough) - b_final <- b_loopsec - 1 * as.numeric(switch_trm == allnotenough) + c_final <- c_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_final <- d_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_final <- a_loopsec + + 1 * as.numeric(switch_trm == allnotenough) + b_final <- b_loopsec - + 1 * as.numeric(switch_trm == allnotenough) } } ### so the final results (after switching) is as follows: - table_final <- matrix(c(a_final, b_final, c_final, d_final), byrow = TRUE, 2, 2) + table_final <- matrix( + c(a_final, b_final, c_final, d_final), byrow = TRUE, 2, 2 + ) p_final <- fisher_p(a_final, b_final, c_final, d_final) fisher_final <- fisher_oddsratio(a_final, b_final, c_final, d_final) @@ -1025,11 +1213,17 @@ getswitch_fisher <- function(a, b, c, d, thr_p = 0.05, switch_trm = T){ total_switch <- final + allnotenough*final_extra - result <- list(final_switch = final, User_enter_value = table_start, Transfer_Table = table_final, - p_final = p_final, fisher_final = fisher_final, - needtworows=allnotenough, taylor_pred = taylor_pred, - perc_bias_pred = perc_bias_pred, final_extra = final_extra, - dcroddsratio_ob = dcroddsratio_ob, total_switch = total_switch, isinvalidate_ob = isinvalidate_ob) + result <- list(final_switch = final, User_enter_value = table_start, + Transfer_Table = table_final, + p_final = p_final, + fisher_final = fisher_final, + needtworows=allnotenough, + taylor_pred = taylor_pred, + perc_bias_pred = perc_bias_pred, + final_extra = final_extra, + dcroddsratio_ob = dcroddsratio_ob, + total_switch = total_switch, + isinvalidate_ob = isinvalidate_ob) return(result) -} \ No newline at end of file +} From 50da8c90f574cedb3e1de9753be515bfb23ed5cd Mon Sep 17 00:00:00 2001 From: wwang93 <142034236+wwang93@users.noreply.github.com> Date: Tue, 21 Nov 2023 09:22:20 -0500 Subject: [PATCH 13/26] Update pkonfound.R Manual modification --- R/pkonfound.R | 85 +++++++++++++++++++++++++++++++++++---------------- 1 file changed, 59 insertions(+), 26 deletions(-) diff --git a/R/pkonfound.R b/R/pkonfound.R index b2146218..19d699d5 100644 --- a/R/pkonfound.R +++ b/R/pkonfound.R @@ -1,34 +1,59 @@ #' Perform sensitivity analysis for published studies -#' @description For published studies, this command calculates (1) how much bias there must be in an estimate to invalidate/sustain an inference; (2) the impact of an omitted variable necessary to invalidate/sustain an inference for a regression coefficient. -#' @param est_eff the estimated effect (such as an unstandardized beta coefficient or a group mean difference) -#' @param std_err the standard error of the estimate of the unstandardized regression coefficient +#' @description For published studies, this command calculates +#' (1) how much bias there must be in an estimate to invalidate/sustain +#' an inference; (2) the impact of an omitted variable necessary to +#' invalidate/sustain an inference for a regression coefficient. +#' @param est_eff the estimated effect (such as an unstandardized beta +#' coefficient or a group mean difference) +#' @param std_err the standard error of the estimate of the unstandardized +#' regression coefficient #' @param n_obs the number of observations in the sample #' @param n_covariates the number of covariates in the regression model #' @param alpha probability of rejecting the null hypothesis (defaults to 0.05) -#' @param tails integer whether hypothesis testing is one-tailed (1) or two-tailed (2; defaults to 2) -#' @param index whether output is RIR or IT (impact threshold); defaults to "RIR" -#' @param nu what hypothesis to be tested; defaults to testing whether est_eff is significantly different from 0 -#' @param model_type the type of model being estimated; defaults to "ols" for a linear regression model; the other option is "logistic" -#' @param n_treat the number of cases associated with the treatment condition; applicable only when model_type = "logistic" -#' @param switch_trm whether to switch the treatment and control cases; defaults to FALSE; applicable only when model_type = "logistic" -#' @param a cell is the number of cases in the control group showing unsuccessful results -#' @param b cell is the number of cases in the control group showing successful results -#' @param c cell is the number of cases in the treatment group showing unsuccessful results -#' @param d cell is the number of cases in the treatment group showing successful results -#' @param two_by_two_table table that is a matrix or can be coerced to one (data.frame, tibble, tribble) from which the a, b, c, and d arguments can be extracted -#' @param test whether using Fisher's Exact Test or A chi-square test; defaults to Fisher's Exact Test -#' @param replace whether using entire sample or the control group to calculate the base rate; default is the control group +#' @param tails integer whether hypothesis testing is one-tailed (1) or +#' two-tailed (2; defaults to 2) +#' @param index whether output is RIR or IT (impact threshold); +#' defaults to "RIR" +#' @param nu what hypothesis to be tested; defaults to testing whether +#' est_eff is significantly different from 0 +#' @param model_type the type of model being estimated; defaults to "ols" for +#' a linear regression model; the other option is "logistic" +#' @param n_treat the number of cases associated with the treatment condition; +#' applicable only when model_type = "logistic" +#' @param switch_trm whether to switch the treatment and control cases; defaults +#' to FALSE; applicable only when model_type = "logistic" +#' @param a cell is the number of cases in the control group showing +#' unsuccessful results +#' @param b cell is the number of cases in the control group showing successful +#' results +#' @param c cell is the number of cases in the treatment group showing +#' unsuccessful results +#' @param d cell is the number of cases in the treatment group showing +#' successful results +#' @param two_by_two_table table that is a matrix or can be coerced to one +#' (data.frame, tibble, tribble) from which the a, b, c, and d arguments can +#' be extracted +#' @param test whether using Fisher's Exact Test or A chi-square test; defaults +#' to Fisher's Exact Test +#' @param replace whether using entire sample or the control group to calculate +#' the base rate; default is the control group #' @param sdx the standard deviation of X #' @param sdy the standard deviation of Y #' @param R2 the unadjusted,original R2 in the observed function #' @param eff_thr unstandardized coefficient threshold to change an inference -#' @param FR2max the largest R2, or R2max, in the final model with unobserved confounder -#' @param FR2max_multiplier the multiplier of R2 to get R2max, default is set to 1.3 -#' @param to_return whether to return a data.frame (by specifying this argument to equal "raw_output" for use in other analyses) or a plot ("plot"); default is to print ("print") the output to the console; can specify a vector of output to return +#' @param FR2max the largest R2, or R2max, in the final model with unobserved +#' confounder +#' @param FR2max_multiplier the multiplier of R2 to get R2max, +#' default is set to 1.3 +#' @param to_return whether to return a data.frame +#' (by specifying this argument to equal "raw_output" for use in other analyses) +#' or a plot ("plot"); default is to print ("print") the output to the console; +#' can specify a vector of output to return #' @importFrom stats fisher.test #' @import lavaan #' @import ggplot2 -#' @return prints the bias and the number of cases that would have to be replaced with cases for which there is no effect to invalidate the inference +#' @return prints the bias and the number of cases that would have to be +#' replaced with cases for which there is no effect to invalidate the inference #' @examples #' # using pkonfound for linear models #' pkonfound(2, .4, 100, 3) @@ -62,9 +87,11 @@ # pkonfound(two_by_two_table = my_table) # #' # use pkonfound to calculate delta* and delta_exact -#' pkonfound(est_eff = .4, std_err = .1, n_obs = 290, sdx = 2, sdy = 6, R2 = .7, eff_thr = 0, FR2max = .8, index = "COP", to_return = "raw_output") +#' pkonfound(est_eff = .4, std_err = .1, n_obs = 290, sdx = 2, sdy = 6, R2 = .7, +#' eff_thr = 0, FR2max = .8, index = "COP", to_return = "raw_output") #' # use pkonfound to calculate rxcv and rycv when preserving standard error -#' pkonfound(est_eff = .5, std_err = .056, n_obs = 6174, eff_thr = .1, sdx = 0.22, sdy = 1, R2 = .3, index = "PSE", to_return = "raw_output") +#' pkonfound(est_eff = .5, std_err = .056, n_obs = 6174, eff_thr = .1, +#' sdx = 0.22, sdy = 1, R2 = .3, index = "PSE", to_return = "raw_output") #' @export pkonfound <- function(est_eff, @@ -92,7 +119,8 @@ FR2max, FR2max_multiplier = 1.3, to_return = "print") { - if ("table" %in% to_return) stop("a table can only be output when using konfound") + if ("table" %in% to_return) stop("a table can only be + output when using konfound") if (index == "COP") { @@ -139,7 +167,10 @@ ) } else if (model_type == "logistic" & is.null(n_treat)) { - warning("For a logistic regression, the log-odds coefficients are used for the pkonfound() calculations. To carry out an analysis using average marginal effects, you can use the konfound() function with the results of a model estimated in R.") + warning("For a logistic regression, the log-odds coefficients + are used for the pkonfound() calculations. To carry out an + analysis using average marginal effects, you can use the + konfound() function with the results of a model estimated in R.") out <- test_sensitivity_ln( est_eff = est_eff, @@ -157,7 +188,8 @@ } else if(!is.null(a)) { # error handling if (is.null(a) | is.null(b) | is.null(c) | is.null(d)) { - stop("Please enter values for a, b, c, and d to use the 2 x 2 table functionality") + stop("Please enter values for a, b, c, + and d to use the 2 x 2 table functionality") } out <- tkonfound(a = a, @@ -208,7 +240,8 @@ if (!is.null(out)) { # dealing with a strange print issue } if (to_return == "print") { - message("For other forms of output, run ?pkonfound and inspect the to_return argument") + message("For other forms of output, run ? + pkonfound and inspect the to_return argument") } message("For models fit in R, consider use of konfound().") From 76eb2a6ce939735e32f606ea5fbaadb7ebd84eef Mon Sep 17 00:00:00 2001 From: wwang93 <142034236+wwang93@users.noreply.github.com> Date: Tue, 21 Nov 2023 10:27:32 -0500 Subject: [PATCH 14/26] Update test_cop.R Manual modification --- R/test_cop.R | 720 ++++++++++++++++++++++++++++----------------------- 1 file changed, 401 insertions(+), 319 deletions(-) diff --git a/R/test_cop.R b/R/test_cop.R index 5f94ff95..f07824f7 100644 --- a/R/test_cop.R +++ b/R/test_cop.R @@ -10,327 +10,409 @@ test_cop <- function(est_eff, # unstandardized R2, # NOT the adjusted R2, should be the original R2 eff_thr = 0, # this is the unstandardized version FR2max_multiplier = 1.3, - FR2max = 0, # NOT the adjusted R2, should be the original R2 + FR2max = 0, #NOT the adjusted R2, should be the original R2 alpha = 0.05, tails = 2, to_return = to_return){ - - ## test example - # est_eff = .125 - # std_err = .050 - # n_obs = 6174 - # n_covariates = 7 - # sdx = .217 - # sdy = .991 - # R2 = .251 - # eff_thr = 0 - # FR2max = .61 - # test_cop(est_eff = .4, std_err = .1, n_obs = 290, - # sdx = 2, sdy = 6, R2 = .7, eff_thr = 0, FR2max = .8, to_return = "short") - - ## prepare input - df = n_obs - n_covariates - 3 ## df of M3 - var_x = sdx^2 - var_y = sdy^2 - - ### if the user specifies R2max directly then we use the specified R2max - if (FR2max == 0){FR2max = FR2max_multiplier * R2} - var_z = sdz = 1 - - ## error message if input is inappropriate - if (!(std_err > 0)) {stop("Did not run! Standard error needs to be greater than zero.")} - if (!(sdx > 0)) {stop("Did not run! Standard deviation of x needs to be greater than zero.")} - if (!(sdy > 0)) {stop("Did not run! Standard deviation of y needs to be greater than zero.")} - if (!(n_obs > n_covariates + 3)) {stop("Did not run! There are too few observations relative to the number of observations and covariates. Please specify a less complex model to use KonFound-It.")} - if (!(R2 < FR2max)) {stop("Did not run! R2 Max needs to be greater than R2.")} - if (!(FR2max < 1)) {stop("Did not run! R2 Max needs to be less than 1.")} - if (!(1-((sdy^2/sdx^2)*(1-R2)/((df+1) * std_err^2))>0)) {stop("Did not run! Entered values produced Rxz^2 <=0, consider adding more significant digits to your entered values.")} - - negest <- 0 # an indicator of whether the use specified est_eff is negative, 1 is yes negative - if (est_eff < 0) { - est_eff <- abs(est_eff) - negest <- 1 - } - - ## now standardize - beta_thr = eff_thr * sdx / sdy - beta = est_eff * sdx / sdy - SE = std_err * sdx / sdy - - ## observed regression, reg y on x Given z - tyxGz = beta / SE ### this should be equal to est_eff / std_err - ryxGz = tyxGz / sqrt(df + 1 + tyxGz^2) - ## df + 1 because omitted variable is NOT included in M2 - ryxGz_M2 = tyxGz / sqrt(n_obs + tyxGz^2) - ## ryxGz_M2 is only for simulation to recover the exact number - - ## make sure R2 due to x alone is not larger than overall or observed R2 - if (ryxGz^2 > R2) {illcnd_ryxGz = T} else {illcond_ryxGz = F} - - ## calculate ryz, rxz, rxy - ryz = rzy = cal_ryz(ryxGz, R2) - rxz = rzx = cal_rxz(var_x, var_y, R2, df + 1, std_err) - ## df + 1 because omitted variable is NOT included in M2 - #### we change the n_obs to df to recover the rxz as in the particular sample - - ## note that in the updated approach rxy is not necessary to calculate rxcv_exact, ryxcv_exact and delta_exact - rxy = ryx = cal_rxy(ryxGz, rxz, ryz) - rxy_M2 = cal_rxy(ryxGz_M2, rxz, ryz) - ## rxy_M2 is only for simulation to recover the exact number - - ## baseline regression model, no z (unconditional) - eff_uncond = sqrt((var_y / var_x)) * rxy - t_uncond = rxy * sqrt(n_obs - 2)/sqrt(1 - rxy^2) - ## n_obs - 2 - adjust the df in the M1 - std_err_uncond = eff_uncond / t_uncond - R2_uncond = rxy^2 - - ## calculate delta_star - delta_star = cal_delta_star(FR2max, R2, R2_uncond, est_eff, eff_thr, var_x, var_y, eff_uncond, rxz, n_obs) - - ## now introduce cv - sdcv = var_cv = 1 - rcvz = rzcv = 0 - v = 1 - rxz^2 # this is to simplify calculation later - D = sqrt(FR2max - R2) # same above - - ## calculate rxcv & rycv implied by Oster from delta_star (assumes rcvz=0) - rxcv_oster = rcvx_oster = delta_star * rxz * (sdcv / sdz) - if (abs(rcvx_oster) <1 && (rcvx_oster^2/v)<1) - {rcvy_oster = rycv_oster = - D * sqrt(1 - (rcvx_oster^2 / v)) + - (ryx * rcvx_oster) / (v) - - (ryz * rcvx_oster * rxz) / (v)} - - # Checking beta, R2, se generated by delta_star with a regression - verify_oster = verify_reg_Gzcv(n_obs, sdx, sdy, sdz, sdcv, - rxy, rxz, rzy, rycv_oster, rxcv_oster, rcvz) - - # prepare some other values in the final Table (long output) - R2_M3_oster = as.numeric(verify_oster[1]) - eff_x_M3_oster = as.numeric(verify_oster[2]) # should be equivalent or very close to eff_thr - se_x_M3_oster = as.numeric(verify_oster[3]) - beta_x_M3_oster = as.numeric(verify_oster[9]) # should be equivalent or very close to beta_thr - t_x_M3_oster = eff_x_M3_oster / se_x_M3_oster - eff_z_M3_oster = as.numeric(verify_oster[4]) - se_z_M3_oster = as.numeric(verify_oster[5]) - eff_cv_M3_oster = as.numeric(verify_oster[6]) - se_cv_M3_oster = as.numeric(verify_oster[7]) - cov_oster = verify_oster[[11]] - cor_oster = verify_oster[[12]] - - ## calculate the exact/true rcvx, rcvy, delta (updated version that does not need rxy) - ### the idea is to calculate everything conditional on z - sdxGz = sdx * sqrt(1 - rxz^2) - sdyGz = sdy * sqrt(1 - ryz^2) - ryxcvGz_exact_sq = (FR2max - ryz^2) / (1 - ryz^2) - ### equation 7 in the manuscript - rxcvGz_exact = (ryxGz - sdxGz / sdyGz * beta_thr) / - sqrt((sdxGz^2) / (sdyGz^2) * (beta_thr^2) - - 2 * ryxGz * sdxGz / sdyGz * beta_thr + - ryxcvGz_exact_sq) - ### equation 6 in the manuscript - rycvGz_exact = ryxGz * rxcvGz_exact + - sqrt((ryxcvGz_exact_sq - ryxGz^2) * - (1 - rxcvGz_exact^2)) - ### now get unconditional exact rxcv and rycv - rycv_exact = sqrt(1 - ryz^2) * rycvGz_exact - rxcv_exact = sqrt(1 - rxz^2) * rxcvGz_exact - delta_exact = rxcv_exact / rxz - - ## previous approach - comment out, but could find in cop_pse_auxiliary - ## exact_result = cal_delta_exact(ryx, ryz, rxz, beta_thr, FR2max, R2, sdx, sdz) - ## rxcv_exact = rcvx_exact = as.numeric(exact_result[1]) - ## rycv_exact = rcvy_exact = as.numeric(exact_result[2]) - ## delta_exact = as.numeric(exact_result[3]) - - # Checking beta, R2, se generated by True/Exact Delta with a regression - verify_exact = verify_reg_Gzcv(n_obs, sdx, sdy, sdz, sdcv, - rxy, rxz, rzy, rycv_exact, rxcv_exact, rcvz) - ### verify_exact[1] == verify_exact[4] == FR2max - ### verify_exact[2] == eff_thr - ### verify_exact[5] == beta_thr - - # calculate % bias in delta comparing oster's delta_star with true delta - delta_pctbias = 100 * (delta_star - delta_exact) / delta_exact - - # prepare some other values in the final Table (long output) - R2_M3 = as.numeric(verify_exact[1]) - eff_x_M3 = as.numeric(verify_exact[2]) # should be equivalent or very close to eff_thr - se_x_M3 = as.numeric(verify_exact[3]) - beta_x_M3 = as.numeric(verify_exact[9]) # should be equivalent or very close to beta_thr - t_x_M3 = eff_x_M3 / se_x_M3 - eff_z_M3 = as.numeric(verify_exact[4]) - se_z_M3 = as.numeric(verify_exact[5]) - eff_cv_M3 = as.numeric(verify_exact[6]) - se_cv_M3 = as.numeric(verify_exact[7]) - cov_exact = verify_exact[[11]] - cor_exact = verify_exact[[12]] - - verify_pse_reg_M2 = verify_reg_Gz(n_obs, sdx, sdy, sdz, rxy_M2, rxz, rzy) - R2_M2 = as.numeric(verify_pse_reg_M2[1]) - eff_x_M2 = as.numeric(verify_pse_reg_M2[2]) # should be equivalent or very close to est_eff - se_x_M2 = as.numeric(verify_pse_reg_M2[3]) # should be equivalent or very close to std_err - eff_z_M2 = as.numeric(verify_pse_reg_M2[4]) - se_z_M2 = as.numeric(verify_pse_reg_M2[5]) - t_x_M2 = eff_x_M2 / se_x_M2 - - verify_pse_reg_M1 = verify_reg_uncond(n_obs, sdx, sdy, rxy) - R2_M1 = as.numeric(verify_pse_reg_M1[1]) # should be equivalent or very close to rxy^2 - eff_x_M1 = as.numeric(verify_pse_reg_M1[2]) # should be equivalent or very close to rxy*sdy/sdx - se_x_M1 = as.numeric(verify_pse_reg_M1[3]) - t_x_M1 = eff_x_M1 / se_x_M1 - - delta_star_restricted = ((est_eff - eff_thr)/(eff_x_M1 - est_eff))* - ((R2_M2 - R2_M1)/(R2_M3 - R2_M2)) - - fTable <- matrix(c(R2_M1, R2_M2, R2_M3, R2_M3_oster, # R2 for three reg models - eff_x_M1, eff_x_M2, eff_x_M3, eff_x_M3_oster, # unstd reg coef for X in three reg models - se_x_M1, se_x_M2, se_x_M3, se_x_M3_oster, # unstd reg se for X in three reg models - rxy, ryxGz, beta_x_M3, beta_x_M3_oster, # std reg coef for X in three reg models - t_x_M1, t_x_M2, t_x_M3, t_x_M3_oster, # t values for X in three reg models - # NA, eff_z_M2, eff_z_M3, eff_z_M3_oster, # reg coef for Z in three reg models - # NA, se_z_M2, se_z_M3, se_z_M3_oster, # se for Z in three reg models - # NA, eff_z_M2 / se_z_M2, eff_z_M3 / se_z_M3, eff_z_M3_oster / se_z_M3_oster, # t for Z in three reg models, - NA, NA, eff_cv_M3, eff_cv_M3_oster, # reg coef for CV in three reg models - NA, NA, se_cv_M3, se_cv_M3_oster, # se for CV in three reg models - NA, NA, eff_cv_M3 / se_cv_M3, eff_cv_M3_oster / se_cv_M3_oster), # t for CV in three reg models - nrow = 8, ncol = 4, byrow = T) - - rownames(fTable) <- c("R2", "coef_X", "SE_X", "std_coef_X", "t_X", - # "coef_Z", "SE_Z", "t_Z", - "coef_CV", "SE_CV", "t_CV") - - colnames(fTable) <- c("M1:X", "M2:X,Z", - "M3(delta_exact):X,Z,CV", - "M3(delta*):X,Z,CV") - - ## figure - figTable <- matrix(c("Baseline(M1)", eff_x_M1, R2_M1, "exact", - "Intermediate(M2)", eff_x_M2, R2, "exact", - "Final(M3)", eff_x_M3, FR2max, "exact", - "Intermediate(M2)", eff_x_M2, R2, "star", - "Final(M3)", eff_x_M3_oster, FR2max, "star"), nrow = 5, ncol = 4, byrow = T) - colnames(figTable) <- c("ModelLabel", "coef_X", "R2", "cat") - - figTable <- as.data.frame(figTable) - figTable$ModelLabel <- as.character(figTable$ModelLabel) - figTable$ModelLabel <- factor(figTable$ModelLabel, - levels = c("Baseline(M1)", - "Intermediate(M2)", - "Final(M3)")) - figTable$cat <- as.character(figTable$cat) - figTable$cat <- factor(figTable$cat, - levels = c("exact", - "star")) - figTable$coef_X <- as.numeric(figTable$coef_X) - figTable$R2 <- as.numeric(figTable$R2) - -scale = 1/(round(max(figTable$coef_X)*10)/10) - -fig <- ggplot2::ggplot(figTable, ggplot2::aes(x = ModelLabel)) + - ggplot2::geom_point(ggplot2::aes(y = coef_X, group = cat, shape = cat), color = "blue", size = 3) + - ggplot2::scale_shape_manual(values = c(16, 1)) + - ggplot2::geom_point(ggplot2::aes(y = R2/scale), color = "#7CAE00", shape = 18, size = 4) + - # scale_linetype_manual(values=c("solid", "dotted")) + - ggplot2::geom_line(ggplot2::aes(y = R2/scale, group = cat), linetype = "solid", color = "#7CAE00") + - ggplot2::geom_line(ggplot2::aes(y = coef_X, group = cat, linetype = cat), color = "blue") + - ggplot2::scale_y_continuous( - # Features of the first axis - name = "Coeffcient (X)", - # Add a second axis and specify its features - sec.axis = ggplot2::sec_axis(~.* scale, - name="R2")) + - ggplot2::theme(axis.title.x = ggplot2::element_blank(), - legend.position = "none", - axis.line.y.right = ggplot2::element_line(color = "#7CAE00"), - axis.title.y.right = ggplot2::element_text(color = "#7CAE00"), - axis.text.y.right = ggplot2::element_text(color = "#7CAE00"), - axis.line.y.left = ggplot2::element_line(color = "blue"), - axis.title.y.left = ggplot2::element_text(color = "blue"), - axis.text.y.left = ggplot2::element_text(color = "blue"), - axis.line.x.bottom = ggplot2::element_line(color = "black"), - axis.text.x.bottom = ggplot2::element_text(color = "black")) - - ############################################## - ######### conditional RIR #################### - - # calculating critical r - if (est_eff < 0) { - critical_t <- stats::qt(1 - (alpha / tails), n_obs - n_covariates - 2) * -1 - } else { - critical_t <- stats::qt(1 - (alpha / tails), n_obs - n_covariates - 2) - } - critical_r <- critical_t / sqrt((critical_t^2) + (n_obs - n_covariates - 2)) - - # final solutions - cond_RIRpi_fixedY <- (R2 - ryz^2 + ryz^2 * critical_r^2 - critical_r^2) / - (R2 - ryz^2 + ryz^2 * critical_r^2) - cond_RIR_fixedY <- cond_RIRpi_fixedY * n_obs - - cond_RIRpi_null <- 1 - sqrt(critical_r^2 / - (R2 - ryz^2 + ryz^2 * critical_r^2)) - cond_RIR_null <- cond_RIRpi_null * n_obs - - cond_RIRpi_rxyz <- 1 - sqrt((critical_r^2 * (1 - ryz^2)) / - (R2 - ryz^2)) - cond_RIR_rxyz <- cond_RIRpi_rxyz * n_obs - - ############################################## - ######### output ############################# - - if (to_return == "raw_output") { - if (negest == 1) { - cat("Using the absolute value of the estimated effect, results can be interpreted by symmetry.") - cat("\n") + + ## test example + # est_eff = .125 + # std_err = .050 + # n_obs = 6174 + # n_covariates = 7 + # sdx = .217 + # sdy = .991 + # R2 = .251 + # eff_thr = 0 + # FR2max = .61 + # test_cop(est_eff = .4, std_err = .1, n_obs = 290, + # sdx = 2, sdy = 6, R2 = .7, eff_thr = 0, + # FR2max = .8, to_return = "short") + + ## prepare input + df = n_obs - n_covariates - 3 ## df of M3 + var_x = sdx^2 + var_y = sdy^2 + + ### if the user specifies R2max directly then we use the specified R2max + if (FR2max == 0){FR2max = FR2max_multiplier * R2} + var_z = sdz = 1 + + ## error message if input is inappropriate + if (!(std_err > 0)) {stop("Did not run! Standard error needs to be greater + than zero.")} + if (!(sdx > 0)) {stop("Did not run! Standard deviation of x needs to + be greater than zero.")} + if (!(sdy > 0)) {stop("Did not run! Standard deviation of y needs to + be greater than zero.")} + if (!(n_obs > n_covariates + 3)) + {stop("Did not run! There are too few observations relative to the + number of observations and covariates. Please specify a less + complex model to use KonFound-It.") + } + if (!(R2 < FR2max)) {stop("Did not run! R2 Max needs + to be greater than R2.")} + if (!(FR2max < 1)) {stop("Did not run! R2 Max needs to be less than 1.")} + if (!(1-((sdy^2/sdx^2)*(1-R2)/((df+1) * std_err^2))>0)) + {stop("Did not run! Entered values produced Rxz^2 <=0, consider + adding more significant digits to your entered values.") + } + # an indicator of whether the use specified est_eff is + # negative, 1 is yes negative + negest <- 0 + if (est_eff < 0) { + est_eff <- abs(est_eff) + negest <- 1 } - output <- list("delta*" = delta_star, - "delta*restricted" = delta_star_restricted, - "delta_exact" = delta_exact, - "delta_pctbias" = delta_pctbias, - #"cov_oster" = cov_oster, - #"cov_exact" = cov_exact, - "cor_oster" = cor_oster, - "cor_exact" = cor_exact, - "var(Y)" = sdy^2, - "var(X)" = sdx^2, - #"var(Z)" = sdz^2, - "var(CV)" = sdcv^2, - "Table" = fTable, - "Figure" = fig, - "conditional RIR pi (fixed y)" = cond_RIRpi_fixedY, - "conditional RIR (fixed y)" = cond_RIR_fixedY, - "conditional RIR pi (null)" = cond_RIRpi_null, - "conditional RIR (null)" = cond_RIR_null, - "conditional RIR pi (rxyGz)" = cond_RIRpi_rxyz, - "conditional RIR (rxyGz)" = cond_RIR_rxyz) - return(output) - } - - if (to_return == "print") { - cat("This function calculates delta* and the exact value of delta.") - cat("\n") - if (negest == 1) { - cat("Using the absolute value of the estimated effect, results can be interpreted by symmetry.") - cat("\n") + + ## now standardize + beta_thr = eff_thr * sdx / sdy + beta = est_eff * sdx / sdy + SE = std_err * sdx / sdy + + ## observed regression, reg y on x Given z + tyxGz = beta / SE ### this should be equal to est_eff / std_err + ryxGz = tyxGz / sqrt(df + 1 + tyxGz^2) + ## df + 1 because omitted variable is NOT included in M2 + ryxGz_M2 = tyxGz / sqrt(n_obs + tyxGz^2) + ## ryxGz_M2 is only for simulation to recover the exact number + + ## make sure R2 due to x alone is not larger than overall or observed R2 + if (ryxGz^2 > R2) {illcnd_ryxGz = T} else {illcond_ryxGz = F} + + ## calculate ryz, rxz, rxy + ryz = rzy = cal_ryz(ryxGz, R2) + rxz = rzx = cal_rxz(var_x, var_y, R2, df + 1, std_err) + ## df + 1 because omitted variable is NOT included in M2 + ### we change the n_obs to df to recover the rxz as in the particular sample + + ## note that in the updated approach rxy is not necessary to calculate + ## rxcv_exact, ryxcv_exact and delta_exact + rxy = ryx = cal_rxy(ryxGz, rxz, ryz) + rxy_M2 = cal_rxy(ryxGz_M2, rxz, ryz) + ## rxy_M2 is only for simulation to recover the exact number + + ## baseline regression model, no z (unconditional) + eff_uncond = sqrt((var_y / var_x)) * rxy + t_uncond = rxy * sqrt(n_obs - 2)/sqrt(1 - rxy^2) + ## n_obs - 2 - adjust the df in the M1 + std_err_uncond = eff_uncond / t_uncond + R2_uncond = rxy^2 + + ## calculate delta_star + delta_star = cal_delta_star(FR2max, + R2, + R2_uncond, + est_eff, + eff_thr, + var_x, + var_y, + eff_uncond, + rxz, + n_obs) + + ## now introduce cv + sdcv = var_cv = 1 + rcvz = rzcv = 0 + v = 1 - rxz^2 # this is to simplify calculation later + D = sqrt(FR2max - R2) # same above + + ## calculate rxcv & rycv implied by Oster from delta_star (assumes rcvz=0) + rxcv_oster = rcvx_oster = delta_star * rxz * (sdcv / sdz) + if (abs(rcvx_oster) <1 && (rcvx_oster^2/v)<1) + {rcvy_oster = rycv_oster = + D * sqrt(1 - (rcvx_oster^2 / v)) + + (ryx * rcvx_oster) / (v) - + (ryz * rcvx_oster * rxz) / (v)} + + # Checking beta, R2, se generated by delta_star with a regression + verify_oster = verify_reg_Gzcv(n_obs, sdx, sdy, sdz, sdcv, + rxy, rxz, rzy, rycv_oster, rxcv_oster, rcvz) + + # prepare some other values in the final Table (long output) + R2_M3_oster = as.numeric(verify_oster[1]) + + eff_x_M3_oster = as.numeric(verify_oster[2]) + # should be equivalent or very close to eff_thr + se_x_M3_oster = as.numeric(verify_oster[3]) + beta_x_M3_oster = as.numeric(verify_oster[9]) + # should be equivalent or very close to beta_thr + t_x_M3_oster = eff_x_M3_oster / se_x_M3_oster + eff_z_M3_oster = as.numeric(verify_oster[4]) + se_z_M3_oster = as.numeric(verify_oster[5]) + eff_cv_M3_oster = as.numeric(verify_oster[6]) + se_cv_M3_oster = as.numeric(verify_oster[7]) + cov_oster = verify_oster[[11]] + cor_oster = verify_oster[[12]] + + ## calculate the exact/true rcvx, rcvy, + ## delta (updated version that does not need rxy) + ### the idea is to calculate everything conditional on z + sdxGz = sdx * sqrt(1 - rxz^2) + sdyGz = sdy * sqrt(1 - ryz^2) + ryxcvGz_exact_sq = (FR2max - ryz^2) / (1 - ryz^2) + ### equation 7 in the manuscript + rxcvGz_exact = (ryxGz - sdxGz / sdyGz * beta_thr) / + sqrt((sdxGz^2) / (sdyGz^2) * (beta_thr^2) - + 2 * ryxGz * sdxGz / sdyGz * beta_thr + + ryxcvGz_exact_sq) + ### equation 6 in the manuscript + rycvGz_exact = ryxGz * rxcvGz_exact + + sqrt((ryxcvGz_exact_sq - ryxGz^2) * + (1 - rxcvGz_exact^2)) + ### now get unconditional exact rxcv and rycv + rycv_exact = sqrt(1 - ryz^2) * rycvGz_exact + rxcv_exact = sqrt(1 - rxz^2) * rxcvGz_exact + delta_exact = rxcv_exact / rxz + + ## previous approach - comment out, but could find in cop_pse_auxiliary + ## exact_result = cal_delta_exact + ## (ryx, ryz, rxz, beta_thr, FR2max, R2, sdx, sdz) + ## rxcv_exact = rcvx_exact = as.numeric(exact_result[1]) + ## rycv_exact = rcvy_exact = as.numeric(exact_result[2]) + ## delta_exact = as.numeric(exact_result[3]) + + # Checking beta, R2, se generated by True/Exact Delta with a regression + verify_exact = verify_reg_Gzcv(n_obs, sdx, sdy, sdz, sdcv, + rxy, rxz, rzy, rycv_exact, rxcv_exact, rcvz) + ### verify_exact[1] == verify_exact[4] == FR2max + ### verify_exact[2] == eff_thr + ### verify_exact[5] == beta_thr + + # calculate % bias in delta comparing oster's delta_star with true delta + delta_pctbias = 100 * (delta_star - delta_exact) / delta_exact + + # prepare some other values in the final Table (long output) + R2_M3 = as.numeric(verify_exact[1]) + eff_x_M3 = as.numeric(verify_exact[2]) + # should be equivalent or very close to eff_thr + se_x_M3 = as.numeric(verify_exact[3]) + beta_x_M3 = as.numeric(verify_exact[9]) + # should be equivalent or very close to beta_thr + t_x_M3 = eff_x_M3 / se_x_M3 + eff_z_M3 = as.numeric(verify_exact[4]) + se_z_M3 = as.numeric(verify_exact[5]) + eff_cv_M3 = as.numeric(verify_exact[6]) + se_cv_M3 = as.numeric(verify_exact[7]) + cov_exact = verify_exact[[11]] + cor_exact = verify_exact[[12]] + + verify_pse_reg_M2 = verify_reg_Gz(n_obs, sdx, sdy, sdz, rxy_M2, rxz, rzy) + R2_M2 = as.numeric(verify_pse_reg_M2[1]) + eff_x_M2 = as.numeric(verify_pse_reg_M2[2]) + # should be equivalent or very close to est_eff + se_x_M2 = as.numeric(verify_pse_reg_M2[3]) + # should be equivalent or very close to std_err + eff_z_M2 = as.numeric(verify_pse_reg_M2[4]) + se_z_M2 = as.numeric(verify_pse_reg_M2[5]) + t_x_M2 = eff_x_M2 / se_x_M2 + + verify_pse_reg_M1 = verify_reg_uncond(n_obs, sdx, sdy, rxy) + R2_M1 = as.numeric(verify_pse_reg_M1[1]) + # should be equivalent or very close to rxy^2 + eff_x_M1 = as.numeric(verify_pse_reg_M1[2]) + # should be equivalent or very close to rxy*sdy/sdx + se_x_M1 = as.numeric(verify_pse_reg_M1[3]) + t_x_M1 = eff_x_M1 / se_x_M1 + + delta_star_restricted = ((est_eff - eff_thr)/(eff_x_M1 - est_eff))* + ((R2_M2 - R2_M1)/(R2_M3 - R2_M2)) + + fTable <- matrix(c(R2_M1, R2_M2, R2_M3, R2_M3_oster, + # R2 for three reg models + eff_x_M1, eff_x_M2, eff_x_M3, eff_x_M3_oster, + # unstd reg coef for X in three reg models + se_x_M1, se_x_M2, se_x_M3, se_x_M3_oster, + # unstd reg se for X in three reg models + rxy, ryxGz, beta_x_M3, beta_x_M3_oster, + # std reg coef for X in three reg models + t_x_M1, t_x_M2, t_x_M3, t_x_M3_oster, + # t values for X in three reg models + + # NA, eff_z_M2, eff_z_M3, eff_z_M3_oster, + # reg coef for Z in three reg models + # NA, se_z_M2, se_z_M3, se_z_M3_oster, + # se for Z in three reg models +# NA, eff_z_M2 / se_z_M2, eff_z_M3 / se_z_M3, eff_z_M3_oster / se_z_M3_oster, + # t for Z in three reg models, + NA, NA, eff_cv_M3, eff_cv_M3_oster, + # reg coef for CV in three reg models + NA, NA, se_cv_M3, se_cv_M3_oster, + # se for CV in three reg models + NA, NA, eff_cv_M3 / se_cv_M3, + eff_cv_M3_oster / se_cv_M3_oster), + # t for CV in three reg models + nrow = 8, ncol = 4, byrow = T) + + rownames(fTable) <- c("R2", "coef_X", "SE_X", "std_coef_X", "t_X", + # "coef_Z", "SE_Z", "t_Z", + "coef_CV", "SE_CV", "t_CV") + + colnames(fTable) <- c("M1:X", "M2:X,Z", + "M3(delta_exact):X,Z,CV", + "M3(delta*):X,Z,CV") + + ## figure + figTable <- matrix(c("Baseline(M1)", eff_x_M1, R2_M1, "exact", + "Intermediate(M2)", eff_x_M2, R2, "exact", + "Final(M3)", eff_x_M3, FR2max, "exact", + "Intermediate(M2)", eff_x_M2, R2, "star", + "Final(M3)", eff_x_M3_oster, FR2max, "star"), + nrow = 5, ncol = 4, byrow = T) + colnames(figTable) <- c("ModelLabel", "coef_X", "R2", "cat") + + figTable <- as.data.frame(figTable) + figTable$ModelLabel <- as.character(figTable$ModelLabel) + figTable$ModelLabel <- factor(figTable$ModelLabel, + levels = c("Baseline(M1)", + "Intermediate(M2)", + "Final(M3)")) + figTable$cat <- as.character(figTable$cat) + figTable$cat <- factor(figTable$cat, + levels = c("exact", + "star")) + figTable$coef_X <- as.numeric(figTable$coef_X) + figTable$R2 <- as.numeric(figTable$R2) + + scale = 1/(round(max(figTable$coef_X)*10)/10) + + fig <- ggplot2::ggplot(figTable, ggplot2::aes(x = ModelLabel)) + + ggplot2::geom_point( + ggplot2::aes( + y = coef_X, + group = cat, + shape = cat + ), + color = "blue", + size = 3 + ) + + ggplot2::scale_shape_manual(values = c(16, 1)) + + ggplot2::geom_point( + ggplot2::aes( + y = R2/scale), color = "#7CAE00", shape = 18, size = 4) + + # scale_linetype_manual(values=c("solid", "dotted")) + + ggplot2::geom_line( + ggplot2::aes( + y = R2/scale, group = cat), + linetype = "solid", color = "#7CAE00") + + ggplot2::geom_line( + ggplot2::aes( + y = coef_X, group = cat, linetype = cat), color = "blue") + + ggplot2::scale_y_continuous( + # Features of the first axis + name = "Coeffcient (X)", + # Add a second axis and specify its features + sec.axis = ggplot2::sec_axis(~.* scale, + name="R2")) + + ggplot2::theme(axis.title.x = ggplot2::element_blank(), + legend.position = "none", + axis.line.y.right = ggplot2::element_line( + color = "#7CAE00"), + axis.title.y.right = ggplot2::element_text( + color = "#7CAE00"), + axis.text.y.right = ggplot2::element_text( + color = "#7CAE00"), + axis.line.y.left = ggplot2::element_line( + color = "blue"), + axis.title.y.left = ggplot2::element_text( + color = "blue"), + axis.text.y.left = ggplot2::element_text( + color = "blue"), + axis.line.x.bottom = ggplot2::element_line( + color = "black"), + axis.text.x.bottom = ggplot2::element_text( + color = "black")) + + ############################################## + ######### conditional RIR #################### + + # calculating critical r + if (est_eff < 0) { + critical_t <- stats::qt(1 - (alpha / tails), + n_obs - n_covariates - 2) * -1 + } else { + critical_t <- stats::qt(1 - (alpha / tails), n_obs - n_covariates - 2) + } + critical_r <- critical_t / sqrt((critical_t^2) + (n_obs - n_covariates - 2)) + + # final solutions + cond_RIRpi_fixedY <- (R2 - ryz^2 + ryz^2 * critical_r^2 - critical_r^2) / + (R2 - ryz^2 + ryz^2 * critical_r^2) + cond_RIR_fixedY <- cond_RIRpi_fixedY * n_obs + + cond_RIRpi_null <- 1 - sqrt(critical_r^2 / + (R2 - ryz^2 + ryz^2 * critical_r^2)) + cond_RIR_null <- cond_RIRpi_null * n_obs + + cond_RIRpi_rxyz <- 1 - sqrt((critical_r^2 * (1 - ryz^2)) / + (R2 - ryz^2)) + cond_RIR_rxyz <- cond_RIRpi_rxyz * n_obs + + ############################################## + ######### output ############################# + + if (to_return == "raw_output") { + if (negest == 1) { + cat("Using the absolute value of the estimated + effect, results can be interpreted by symmetry.") + cat("\n") + } + output <- list("delta*" = delta_star, + "delta*restricted" = delta_star_restricted, + "delta_exact" = delta_exact, + "delta_pctbias" = delta_pctbias, + #"cov_oster" = cov_oster, + #"cov_exact" = cov_exact, + "cor_oster" = cor_oster, + "cor_exact" = cor_exact, + "var(Y)" = sdy^2, + "var(X)" = sdx^2, + #"var(Z)" = sdz^2, + "var(CV)" = sdcv^2, + "Table" = fTable, + "Figure" = fig, + "conditional RIR pi (fixed y)" = cond_RIRpi_fixedY, + "conditional RIR (fixed y)" = cond_RIR_fixedY, + "conditional RIR pi (null)" = cond_RIRpi_null, + "conditional RIR (null)" = cond_RIR_null, + "conditional RIR pi (rxyGz)" = cond_RIRpi_rxyz, + "conditional RIR (rxyGz)" = cond_RIR_rxyz) + return(output) } - cat(sprintf("delta* is %.3f (assuming no covariates in the baseline model M1), the exact delta is %.3f, with a bias of %.3f%%", delta_star, delta_exact, delta_pctbias)) - cat("\n") - cat(sprintf("With delta*, the coefficient in the final model will be %.3f. With the exact delta, the coefficient will be %.3f.", - eff_x_M3_oster,eff_x_M3)) - cat("\n") - cat("Use to_return = raw_ouput to see more specific results and graphic presentation of the result.") - cat("\n") - cat("\n") - cat("This function also calculates conditional RIR that invalidates the statistical inference.") - cat("\n") - cat("If the replacement cases have a fixed value, then RIR =", cond_RIR_fixedY) - cat("\n") - cat("If the replacement cases follow a null distribution, then RIR =", cond_RIR_null) - cat("\n") - cat("If the replacement cases satisfy rxy|Z = 0, then RIR =", cond_RIR_rxyz) - cat("\n") - cat("\n") - } - -} \ No newline at end of file + + if (to_return == "print") { + cat("This function calculates delta* and the exact value of delta.") + cat("\n") + if (negest == 1) { + cat("Using the absolute value of the estimated effect, + results can be interpreted by symmetry.") + cat("\n") + } + cat(sprintf("delta* is %.3f (assuming no covariates in the baseline + model M1), the exact delta is %.3f, with a bias of %.3f%%", + delta_star, delta_exact, delta_pctbias)) + cat("\n") + cat(sprintf("With delta*, the coefficient in the final model will be + %.3f. With the exact delta, the coefficient will be %.3f.", + eff_x_M3_oster,eff_x_M3)) + cat("\n") + cat("Use to_return = raw_ouput to see more specific results and graphic + presentation of the result.") + cat("\n") + cat("\n") + cat("This function also calculates conditional RIR that invalidates the + statistical inference.") + cat("\n") + cat("If the replacement cases have a fixed value, then RIR =", + cond_RIR_fixedY) + cat("\n") + cat("If the replacement cases follow a null distribution, then RIR =", + cond_RIR_null) + cat("\n") + cat("If the replacement cases satisfy rxy|Z = 0, then RIR =", + cond_RIR_rxyz) + cat("\n") + cat("\n") + } + +} From b5e431d46b063f754546ad0254207d6a47392bdc Mon Sep 17 00:00:00 2001 From: wwang93 <142034236+wwang93@users.noreply.github.com> Date: Tue, 21 Nov 2023 10:34:16 -0500 Subject: [PATCH 15/26] Update test_pse.R Manual modification --- R/test_pse.R | 94 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 63 insertions(+), 31 deletions(-) diff --git a/R/test_pse.R b/R/test_pse.R index 0bdb147f..ba8ea77b 100644 --- a/R/test_pse.R +++ b/R/test_pse.R @@ -17,13 +17,21 @@ test_pse <- function(est_eff, df = n_obs - n_covariates - 3 ## error message if input is inappropriate - if (!(std_err > 0)) {stop("Did not run! Standard error needs to be greater than zero.")} - if (!(sdx > 0)) {stop("Did not run! Standard deviation of x needs to be greater than zero.")} - if (!(sdy > 0)) {stop("Did not run! Standard deviation of y needs to be greater than zero.")} - if (!(n_obs > n_covariates + 3)) {stop("Did not run! There are too few observations relative to the number of observations and covariates. Please specify a less complex model to use KonFound-It.")} + if (!(std_err > 0)) {stop("Did not run! Standard error needs + to be greater than zero.")} + if (!(sdx > 0)) {stop("Did not run! Standard deviation of x + needs to be greater than zero.")} + if (!(sdy > 0)) {stop("Did not run! Standard deviation of y + needs to be greater than zero.")} + if (!(n_obs > n_covariates + 3)) + {stop("Did not run! There are too few observations relative to the + number of observations and covariates. Please specify a less + complex model to use KonFound-It.")} if (!(0 < R2)) {stop("Did not run! R2 needs to be greater than zero.")} if (!(R2 < 1)) {stop("Did not run! R2 needs to be less than one.")} - if (!(1-((sdy^2/sdx^2)*(1-R2)/((df+1) * std_err^2))>0)) {stop("Did not run! Entered values produced Rxz^2 <=0, consider adding more significant digits to your entered values.")} + if (!(1-((sdy^2/sdx^2)*(1-R2)/((df+1) * std_err^2))>0)) + {stop("Did not run! Entered values produced Rxz^2 <=0, + consider adding more significant digits to your entered values.")} ## now standardize @@ -51,19 +59,24 @@ test_pse <- function(est_eff, rxcvGz = as.numeric(Gz_pse[1]) rycvGz = as.numeric(Gz_pse[2]) - # convert conditional correlations to unconditional correlations to be used in new regression + # convert conditional correlations to unconditional + # correlations to be used in new regression rxcv = rxcvGz * sqrt((1 - rcvz^2) * (1 - rxz^2)) + rxz * rcvz rycv = rycvGz * sqrt((1 - rcvz^2) * (1 - rzy^2)) + rzy * rcvz - verify_pse_reg_M3 = verify_reg_Gzcv(n_obs, sdx, sdy, sdz, sdcv, rxy, rxz, rzy, rycv, rxcv, rcvz) - verfiy_pse_manual_thr = verify_manual(rxy, rxz, rxcv, ryz, rycv, rzcv, sdy, sdx) + verify_pse_reg_M3 = verify_reg_Gzcv(n_obs, sdx, sdy, sdz, sdcv, rxy, rxz, + rzy, rycv, rxcv, rcvz) + verfiy_pse_manual_thr = verify_manual(rxy, rxz, rxcv, ryz, rycv, + rzcv, sdy, sdx) cov_pse = verify_pse_reg_M3[[11]] # prepare some other values in the final Table (long output) R2_M3 = as.numeric(verify_pse_reg_M3[1]) - eff_x_M3 = as.numeric(verify_pse_reg_M3[2]) # should be equivalent or very close to eff_thr + eff_x_M3 = as.numeric(verify_pse_reg_M3[2]) + # should be equivalent or very close to eff_thr se_x_M3 = as.numeric(verify_pse_reg_M3[3]) - beta_x_M3 = as.numeric(verify_pse_reg_M3[9]) # should be equivalent or very close to thr + beta_x_M3 = as.numeric(verify_pse_reg_M3[9]) + # should be equivalent or very close to thr t_x_M3 = eff_x_M3 / se_x_M3 eff_z_M3 = as.numeric(verify_pse_reg_M3[4]) se_z_M3 = as.numeric(verify_pse_reg_M3[5]) @@ -72,30 +85,44 @@ test_pse <- function(est_eff, verify_pse_reg_M2 = verify_reg_Gz(n_obs, sdx, sdy, sdz, rxy, rxz, rzy) R2_M2 = as.numeric(verify_pse_reg_M2[1]) - eff_x_M2 = as.numeric(verify_pse_reg_M2[2]) # should be equivalent or very close to est_eff - se_x_M2 = as.numeric(verify_pse_reg_M2[3]) # should be equivalent or very close to std_err + eff_x_M2 = as.numeric(verify_pse_reg_M2[2]) + # should be equivalent or very close to est_eff + se_x_M2 = as.numeric(verify_pse_reg_M2[3]) + # should be equivalent or very close to std_err eff_z_M2 = as.numeric(verify_pse_reg_M2[4]) se_z_M2 = as.numeric(verify_pse_reg_M2[5]) t_x_M2 = eff_x_M2 / se_x_M2 verify_pse_reg_M1 = verify_reg_uncond(n_obs, sdx, sdy, rxy) - R2_M1 = as.numeric(verify_pse_reg_M1[1]) # should be equivalent or very close to rxy^2 - eff_x_M1 = as.numeric(verify_pse_reg_M1[2]) # should be equivalent or very close to rxy*sdy/sdx + R2_M1 = as.numeric(verify_pse_reg_M1[1]) + # should be equivalent or very close to rxy^2 + eff_x_M1 = as.numeric(verify_pse_reg_M1[2]) + # should be equivalent or very close to rxy*sdy/sdx se_x_M1 = as.numeric(verify_pse_reg_M1[3]) t_x_M1 = eff_x_M1 / se_x_M1 fTable <- matrix(c(R2_M1, R2_M2, R2_M3, # R2 for three reg models - eff_x_M1, eff_x_M2, eff_x_M3, # unstd reg coef for X in three reg models - se_x_M1, se_x_M2, se_x_M3, # unstd reg se for X in three reg models - rxy, ryxGz, beta_x_M3, # std reg coef for X in three reg models - t_x_M1, t_x_M2, t_x_M3, # t values for X in three reg models - NA, eff_z_M2, eff_z_M3, # reg coef for Z in three reg models - NA, se_z_M2, se_z_M3, # se for Z in three reg models - NA, eff_z_M2 / se_z_M2, eff_z_M3 / se_z_M3, # t for Z in three reg models, - NA, NA, eff_cv_M3, # reg coef for CV in three reg models - NA, NA, se_cv_M3, # se for CV in three reg models - NA, NA, eff_cv_M3 / se_cv_M3), # t for CV in three reg models - nrow = 11, ncol = 3, byrow = T) + eff_x_M1, eff_x_M2, eff_x_M3, + # unstd reg coef for X in three reg models + se_x_M1, se_x_M2, se_x_M3, + # unstd reg se for X in three reg models + rxy, ryxGz, beta_x_M3, + # std reg coef for X in three reg models + t_x_M1, t_x_M2, t_x_M3, + # t values for X in three reg models + NA, eff_z_M2, eff_z_M3, + # reg coef for Z in three reg models + NA, se_z_M2, se_z_M3, + # se for Z in three reg models + NA, eff_z_M2 / se_z_M2, eff_z_M3 / se_z_M3, + # t for Z in three reg models, + NA, NA, eff_cv_M3, + # reg coef for CV in three reg models + NA, NA, se_cv_M3, + # se for CV in three reg models + NA, NA, eff_cv_M3 / se_cv_M3), + # t for CV in three reg models + nrow = 11, ncol = 3, byrow = T) rownames(fTable) <- c("R2", "coef_X", "SE_X", "std_coef_X", "t_X", "coef_Z", "SE_Z", "t_Z", @@ -114,17 +141,22 @@ test_pse <- function(est_eff, } if (to_return == "print") { - cat("This function calculates the conditions that set the estimated effect approximately equal to the threshold while preserving the standard error.") + cat("This function calculates the conditions that set the + estimated effect approximately equal to the threshold + while preserving the standard error.") cat("\n") - cat(sprintf("The correlation between X and CV is %.3f, and the correlation between Y and CV is %.3f.", rxcv, rycv)) + cat(sprintf("The correlation between X and CV is %.3f, + and the correlation between Y and CV is %.3f.", rxcv, rycv)) cat("\n") - cat(sprintf("Conditional on the covariates, the correlation between X and CV is %.3f, and the correlation between Y and CV is %.3f.", rxcvGz, rycvGz)) + cat(sprintf("Conditional on the covariates, the correlation between X + and CV is %.3f, and the correlation between Y and CV + is %.3f.", rxcvGz, rycvGz)) cat("\n") - cat(sprintf("Including such CV, the coefficient changes to %.3f, and standard error is %.3f.", eff_x_M3, se_x_M3)) + cat(sprintf("Including such CV, the coefficient changes to %.3f, + and standard error is %.3f.", eff_x_M3, se_x_M3)) cat("\n") cat("Use to_return = raw_ouput to see more specific results.") } - + } - \ No newline at end of file From 91b62745d53fea1e67b7663dd3599803430db5b2 Mon Sep 17 00:00:00 2001 From: wwang93 <142034236+wwang93@users.noreply.github.com> Date: Tue, 21 Nov 2023 10:40:06 -0500 Subject: [PATCH 16/26] Update test_sensitivity.R Manual modification --- R/test_sensitivity.R | 261 +++++++++++++++++++++++-------------------- 1 file changed, 142 insertions(+), 119 deletions(-) diff --git a/R/test_sensitivity.R b/R/test_sensitivity.R index 8a160d30..35150591 100644 --- a/R/test_sensitivity.R +++ b/R/test_sensitivity.R @@ -1,25 +1,27 @@ # helpers for the core sensitivity analysis function create_konfound_class <- function(x) { - structure(x, class = "konfound") + structure(x, class = "konfound") } #' Concise summary of konfound output -#' @details Prints a concise summary of konfound output with multiple types of data specified in the to_return argument +#' @details Prints a concise summary of konfound output with multiple types +#' of data specified in the to_return argument #' @param object A `konfound` object #' @param ... Additional arguments #' @export summary.konfound <- function(object, ...) { - cat("Created", length(object), "forms of output. To access type: \n") - cat("\n") - - for (name in names(object)) { - cat(rlang::expr_text(substitute(object)), "$", name, "\n", sep = "") - } + cat("Created", length(object), "forms of output. To access type: \n") + cat("\n") + + for (name in names(object)) { + cat(rlang::expr_text(substitute(object)), "$", name, "\n", sep = "") + } } -# Main function to test sensitivity to be wrapped with pkonfound(), konfound(), and mkonfound() +# Main function to test sensitivity to be wrapped with pkonfound(), +# konfound(), and mkonfound() test_sensitivity <- function(est_eff, std_err, @@ -32,114 +34,135 @@ test_sensitivity <- function(est_eff, to_return, model_object, tested_variable) { - if (nu != 0) warning("You entered a non-zero null hypothesis about an effect; this is being interpreted in terms of a partial correlation. Sampling variability is not accounted for.") - - ## error message if input is inappropriate - if (!(std_err > 0)) {stop("Did not run! Standard error needs to be greater than zero.")} - if (!(n_obs > n_covariates + 3)) {stop("Did not run! There are too few observations relative to the number of observations and covariates. Please specify a less complex model to use KonFound-It.")} - - # calculating statistics used in every case - if (est_eff < 0) { - critical_t <- stats::qt(1 - (alpha / tails), n_obs - n_covariates - 3) * -1 - } - else { - critical_t <- stats::qt(1 - (alpha / tails), n_obs - n_covariates - 3) - } - - beta_threshold <- critical_t * std_err - # dealing with cases where hypotheses other than whether est_eff differs from 0 - if (nu != 0) { - est_eff <- abs(est_eff - nu) - } else { - est_eff <- est_eff - 0 - } # this is just to make what this is doing evident - - # for replacement of cases approach - - # calculating percentage of effect and number of observations to sustain or invalidate inference - if (abs(est_eff) > abs(beta_threshold)) { - bias <- 100 * (1 - (beta_threshold / est_eff)) - recase <- round(n_obs * (bias / 100)) - } - else if (abs(est_eff) < abs(beta_threshold)) { - sustain <- 100 * (1 - (est_eff / beta_threshold)) - recase <- round(n_obs * (sustain / 100)) - } - else if (est_eff == beta_threshold) { - stop("The coefficient is exactly equal to the threshold.") - } - - # for correlation-based approach - - # transforming t into r - obs_r <- (est_eff / std_err) / sqrt(((n_obs - n_covariates - 3) + ((est_eff / std_err)^2))) - # finding critical r - critical_r <- critical_t / sqrt((critical_t^2) + (n_obs - n_covariates - 3)) - # calculating threshold - if ((abs(obs_r) > abs(critical_r)) & ((obs_r * critical_r) > 0)) { - mp <- -1 - } else { - mp <- 1 - } - # calculating impact of the confounding variable - itcv <- (obs_r - critical_r) / (1 + mp * abs(critical_r)) - # finding correlation of confound to invalidate / sustain inference - r_con <- round(sqrt(abs(itcv)), 3) - - # if (component_correlations == FALSE){ - # rsq <- # has to come from some kind of model object - # varY <- # has to come from some kind of model object - # varX <- # has to come from some kind of model object - # sdX <- # has to come from some kind of model object - # - # rsqYZ = (((obs_r ^ 2) - Rsq) / ((obs_r ^ 2) - 1)) - # - # rsqXZ = max(0, 1 - ((VarY * (1 - RSQ))) / (VarX * (n_obs - n_covariates - 2) * (sdx * 2))) - # - # r_ycv = r_con * sqrt(1 - rsqYZ) - # r_xcv = r_con * sqrt(1 - rsqXZ) - # # before conditioning on observed covariates - # } - - # output dispatch - - if (length(to_return) > 1) { - to_return <- to_return[!(to_return == "print")] - - konfound_output <- purrr::map( - to_return, - ~ test_sensitivity( - est_eff = est_eff, - std_err = std_err, - n_obs = n_obs, - n_covariates = n_covariates, - alpha = alpha, - tails = tails, - nu = nu, - to_return = . - ) - ) - konfound_output <- create_konfound_class(konfound_output) - names(konfound_output) <- to_return - output_print(est_eff, beta_threshold, bias, sustain, nu, recase, obs_r, critical_r, r_con, itcv, alpha, index) - - cat("\n") - message(paste("Print output created by default. Created", length(konfound_output), "other forms of output. Use list indexing or run summary() on the output to see how to access.")) - - return(konfound_output) - } - - else if (to_return == "raw_output") { - return(output_df(est_eff, beta_threshold, est_eff, bias, sustain, recase, obs_r, critical_r, r_con, itcv)) - } else if (to_return == "thresh_plot") { # this still makes sense for NLMs (just not quite as accurate) - return(plot_threshold(beta_threshold = beta_threshold, est_eff = est_eff)) - } else if (to_return == "corr_plot") { - return(plot_correlation(r_con = r_con, obs_r = obs_r, critical_r = critical_r)) - } else if (to_return == "print") { - return(output_print(est_eff, beta_threshold, bias, sustain, nu, recase, obs_r, critical_r, r_con, itcv, alpha, index)) - } else if (to_return == "table") { - return(output_table(model_object, tested_variable)) - } else { - stop("to_return must be set to 'raw_output', 'print', 'table', 'thresh_plot', or 'corr_plot' or some combination thereof") - } + if (nu != 0) warning("You entered a non-zero null hypothesis about an + effect;this is being interpreted in terms of a partial + correlation.Sampling variability is not accounted for.") + + ## error message if input is inappropriate + if (!(std_err > 0)) {stop("Did not run! Standard error needs to + be greater than zero.")} + if (!(n_obs > n_covariates + 3)) + {stop("Did not run! There are too few observations relative to + the number of observations and covariates. Please specify a + less complex model to use KonFound-It.")} + + # calculating statistics used in every case + if (est_eff < 0) { + critical_t <- stats::qt(1 - (alpha / tails), + n_obs - n_covariates - 3) * -1 + } + else { + critical_t <- stats::qt(1 - (alpha / tails), n_obs - n_covariates - 3) + } + + beta_threshold <- critical_t * std_err + # dealing with cases where hypotheses other than whether + # est_eff differs from 0 + if (nu != 0) { + est_eff <- abs(est_eff - nu) + } else { + est_eff <- est_eff - 0 + } # this is just to make what this is doing evident + + # for replacement of cases approach + + # calculating percentage of effect and number of observations to + # sustain or invalidate inference + if (abs(est_eff) > abs(beta_threshold)) { + bias <- 100 * (1 - (beta_threshold / est_eff)) + recase <- round(n_obs * (bias / 100)) + } + else if (abs(est_eff) < abs(beta_threshold)) { + sustain <- 100 * (1 - (est_eff / beta_threshold)) + recase <- round(n_obs * (sustain / 100)) + } + else if (est_eff == beta_threshold) { + stop("The coefficient is exactly equal to the threshold.") + } + + # for correlation-based approach + + # transforming t into r + obs_r <- (est_eff / std_err) / sqrt(((n_obs - n_covariates - 3) + + ((est_eff / std_err)^2))) + # finding critical r + critical_r <- critical_t / sqrt((critical_t^2) + (n_obs - n_covariates - 3)) + # calculating threshold + if ((abs(obs_r) > abs(critical_r)) & ((obs_r * critical_r) > 0)) { + mp <- -1 + } else { + mp <- 1 + } + # calculating impact of the confounding variable + itcv <- (obs_r - critical_r) / (1 + mp * abs(critical_r)) + # finding correlation of confound to invalidate / sustain inference + r_con <- round(sqrt(abs(itcv)), 3) + + # if (component_correlations == FALSE){ + # rsq <- # has to come from some kind of model object + # varY <- # has to come from some kind of model object + # varX <- # has to come from some kind of model object + # sdX <- # has to come from some kind of model object + # + # rsqYZ = (((obs_r ^ 2) - Rsq) / ((obs_r ^ 2) - 1)) + # + # rsqXZ = max(0, 1 - ((VarY * (1 - RSQ))) / + # (VarX * (n_obs - n_covariates - 2) * (sdx * 2))) + # + # r_ycv = r_con * sqrt(1 - rsqYZ) + # r_xcv = r_con * sqrt(1 - rsqXZ) + # # before conditioning on observed covariates + # } + + # output dispatch + + if (length(to_return) > 1) { + to_return <- to_return[!(to_return == "print")] + + konfound_output <- purrr::map( + to_return, + ~ test_sensitivity( + est_eff = est_eff, + std_err = std_err, + n_obs = n_obs, + n_covariates = n_covariates, + alpha = alpha, + tails = tails, + nu = nu, + to_return = . + ) + ) + konfound_output <- create_konfound_class(konfound_output) + names(konfound_output) <- to_return + output_print(est_eff, beta_threshold, bias, sustain, nu, + recase, obs_r, critical_r, r_con, itcv, alpha, index) + + cat("\n") + message(paste( + "Print output created by default. Created", length(konfound_output), + "other forms of output. Use list indexing or run summary() on the + output to see how to access.")) + + return(konfound_output) + } + + else if (to_return == "raw_output") { + return(output_df(est_eff, beta_threshold, est_eff, bias, sustain, + recase, obs_r, critical_r, r_con, itcv)) + } else if (to_return == "thresh_plot") { + # this still makes sense for NLMs (just not quite as accurate) + return(plot_threshold(beta_threshold = beta_threshold, + est_eff = est_eff)) + } else if (to_return == "corr_plot") { + return(plot_correlation(r_con = r_con, obs_r = obs_r, + critical_r = critical_r)) + } else if (to_return == "print") { + return(output_print(est_eff, beta_threshold, bias, sustain, nu, recase, + obs_r, critical_r, r_con, itcv, alpha, index)) + } else if (to_return == "table") { + return(output_table(model_object, tested_variable)) + } else { + stop("to_return must be set to 'raw_output', 'print', 'table', + 'thresh_plot', or 'corr_plot' or some combination thereof") + } } From c049eef0b667b57a59df1369884fc9888c6bdfb3 Mon Sep 17 00:00:00 2001 From: wwang93 <142034236+wwang93@users.noreply.github.com> Date: Tue, 21 Nov 2023 14:03:53 -0500 Subject: [PATCH 17/26] Update test_sensitivity_ln.R Manual modification --- R/test_sensitivity_ln.R | 688 +++++++++++++++++++++------------------- 1 file changed, 370 insertions(+), 318 deletions(-) diff --git a/R/test_sensitivity_ln.R b/R/test_sensitivity_ln.R index cca907ae..4d13bd05 100644 --- a/R/test_sensitivity_ln.R +++ b/R/test_sensitivity_ln.R @@ -1,5 +1,5 @@ -# Main function to test sensitivity for non-linear models to be wrapped with pkonfound(), konfound(), and mkonfound() - +# Main function to test sensitivity for non-linear models to +# be wrapped with pkonfound(), konfound(), and mkonfound() test_sensitivity_ln <- function(est_eff, std_err, n_obs, @@ -13,347 +13,399 @@ test_sensitivity_ln <- function(est_eff, to_return, model_object, tested_variable) { - - ## error message if input is inappropriate - if (!(std_err > 0)) {stop("Did not run! Standard error needs to be greater than zero.")} - if (!(n_obs > n_covariates + 3)) {stop("Did not run! There are too few observations relative to the number of observations and covariates. Please specify a less complex model to use KonFound-It.")} - - if (est_eff < 0) { - thr_t <- stats::qt(1 - (alpha / tails), n_obs - n_covariates - 3) * -1 - } else { - thr_t <- stats::qt(1 - (alpha / tails), n_obs - n_covariates - 3) - } - - # stop message - if (n_obs <= 0 || n_treat <= 0) { - stop("Please enter positive integers for sample size and number of treatment group cases.") - } - if (n_obs <= n_treat) { - stop("The total sample size should be larger than the number of treatment group cases.") - } - - odds_ratio <- exp(est_eff) - - # updated approach to deal with imaginary - minse <- sqrt((4 * n_obs + - sqrt(16 * n_obs^2 + 4 * n_treat * (n_obs - n_treat) * - ((4 + 4 * odds_ratio^2) / odds_ratio - 7.999999)))/ - (2 * n_treat * (n_obs - n_treat))) - # check if the implied table solution may contain imaginary numbers - changeSE <- F - if (std_err < minse) { - haveimaginary <- T - changeSE <- T - user_std_err <- std_err - std_err <- minse - } + ## error message if input is inappropriate + if (!(std_err > 0)) {stop("Did not run! Standard error needs + to be greater than zero.")} + if (!(n_obs > n_covariates + 3)) + {stop("Did not run! There are too few observations relative to the + number of observations and covariates. Please specify a less + complex model to use KonFound-It.")} - # n_treat is the number of observations in the treatment group (c+d) - # n_cnt is the number of observations in the control group (a+b) - n_cnt <- n_obs - n_treat - # t_ob is the t value calculated based on observed estimate and standard error - t_ob <- est_eff / std_err - # invalidate_ob is true - the observed result is significant - we are invalidating the observed result - invalidate_ob <- isinvalidate(thr_t, t_ob) - # dcroddsratio_ob is true - our goal is to decrease the odds ratio - dcroddsratio_ob <- isdcroddsratio(thr_t, t_ob) - - # previous approach to deal with imaginary - # to record the original treatment cases in case we need to adjust it - # user_ntrm <- n_treat - # check if the implied table solution may contain imaginary numbers - # haveimaginary <- F - # changepi <- F - # set the default value for whether we need and can adjust pi (ratio of treatment cases) - # to remove the imaginary part - # keyimagin <- (4 + 4 * odds_ratio^2 + odds_ratio * - # (-8 + 4 * n_obs * std_err^2 - n_obs * n_treat * std_err^4 + n_treat^2 * std_err^4)) - # minimgain <- 4 + 4 * odds_ratio^2 + odds_ratio * (-8 + n_obs * std_err^2 * (4 - 0.25 * n_obs * std_err^2)) - # keyx1 <- 4 + 4 * odds_ratio^2 + odds_ratio * (-8 + 4 * n_obs * std_err^2) - # if (keyimagin > 0) { + if (est_eff < 0) { + thr_t <- stats::qt(1 - (alpha / tails), n_obs - n_covariates - 3) * -1 + } else { + thr_t <- stats::qt(1 - (alpha / tails), n_obs - n_covariates - 3) + } + + # stop message + if (n_obs <= 0 || n_treat <= 0) { + stop("Please enter positive integers for sample size and number of + treatment group cases.") + } + if (n_obs <= n_treat) { + stop("The total sample size should be larger than the number of + treatment group cases.") + } + + odds_ratio <- exp(est_eff) + + # updated approach to deal with imaginary + minse <- sqrt((4 * n_obs + + sqrt(16 * n_obs^2 + 4 * n_treat * (n_obs - n_treat) * + ((4 + 4 * odds_ratio^2) / + odds_ratio - 7.999999)))/ + (2 * n_treat * (n_obs - n_treat))) + # check if the implied table solution may contain imaginary numbers + changeSE <- F + if (std_err < minse) { + haveimaginary <- T + changeSE <- T + user_std_err <- std_err + std_err <- minse + } + + # n_treat is the number of observations in the treatment group (c+d) + # n_cnt is the number of observations in the control group (a+b) + n_cnt <- n_obs - n_treat + #t_ob is the t value calculated based on observed estimate and standard error + t_ob <- est_eff / std_err + # invalidate_ob is true - the observed result is significant + # - we are invalidating the observed result + invalidate_ob <- isinvalidate(thr_t, t_ob) + # dcroddsratio_ob is true - our goal is to decrease the odds ratio + dcroddsratio_ob <- isdcroddsratio(thr_t, t_ob) + + # previous approach to deal with imaginary + # to record the original treatment cases in case we need to adjust it + # user_ntrm <- n_treat + # check if the implied table solution may contain imaginary numbers + # haveimaginary <- F + # changepi <- F + # set the default value for whether we need and can adjust pi + # (ratio of treatment cases) + # to remove the imaginary part + # keyimagin <- (4 + 4 * odds_ratio^2 + odds_ratio * + # (-8 + 4 * n_obs * std_err^2 - n_obs * n_treat * std_err^4 + # + n_treat^2 * std_err^4)) + # minimgain <- 4 + 4 * odds_ratio^2 + odds_ratio * (-8 + n_obs * std_err^2 * + # (4 - 0.25 * n_obs * std_err^2)) + # keyx1 <- 4 + 4 * odds_ratio^2 + odds_ratio * (-8 + 4 * n_obs * std_err^2) + # if (keyimagin > 0) { # haveimaginary <- T # if (minimgain <= 0 && keyx1 > 0) { - # changepi <- T - # n_treat <- n_obs * get_pi(odds_ratio, std_err, n_obs, n_treat) - # n_cnt <- n_obs - n_treat - # } else { - # stop("Cannot generate a usable contingency table; Please consider using the Pearson's chi-squared approach (under development).") - # } - #} - - # a1, b1, c1, d1 are one solution for the 4 cells in the contingency table - a1 <- get_a1_kfnl(odds_ratio, std_err, n_obs, n_treat) - b1 <- n_cnt - a1 - c1 <- get_c1_kfnl(odds_ratio, std_err, n_obs, n_treat) - d1 <- n_treat - c1 - - # a2, b2, c2, d2 are the second solution for the 4 cells in the contingency table - a2 <- get_a2_kfnl(odds_ratio, std_err, n_obs, n_treat) - b2 <- n_cnt - a2 - c2 <- get_c2_kfnl(odds_ratio, std_err, n_obs, n_treat) - d2 <- n_treat - c2 - - # Differences between these two sets of solutions: - ### a1 c1 are small while a2 c2 are large - ### b1 d1 are large while b2 d2 are small - ### the goal is to get fewest swithes to invalidate the inference - ### remove the solution if one cell has fewerer than 5 cases or negative cells or nan cells - check1 <- check2 <- TRUE - if (!(n_cnt >= a1 && a1 >= 5 && n_cnt >= b1 && b1 >= 5 && n_treat >= c1 && c1 >= 5 && n_treat >= d1 && d1 >= 5) - || is.nan(a1) || is.nan(b1) || is.nan(c1) || is.nan(d1)) { - check1 <- FALSE - } - - if (!(n_cnt >= a2 && a2 >= 5 && n_cnt >= b2 && b2 >= 5 && n_treat >= c2 && c2 >= 5 && n_treat >= d2 && d2 >= 5) - || is.nan(a2) || is.nan(b2) || is.nan(c2) || is.nan(d2)) { - check2 <- FALSE - } - - if (check1) { - table_bstart1 <- get_abcd_kfnl(a1, b1, c1, d1) - solution1 <- getswitch(table_bstart1, thr_t, switch_trm, n_obs) - } - if (check2) { - table_bstart2 <- get_abcd_kfnl(a2, b2, c2, d2) - solution2 <- getswitch(table_bstart2, thr_t, switch_trm, n_obs) - } - if (!check1 && !check2) { - stop("Cannot generate a usable contingency table!") - } - - # get the number of switches for solutions that satisfy the requirements - if (check1 && check2) { - final_switch1 <- solution1$final_switch - final_switch2 <- solution2$final_switch - if (final_switch1 < final_switch2) { - final_solution <- getswitch(table_bstart1, thr_t, switch_trm, n_obs) - } else { - final_solution <- getswitch(table_bstart2, thr_t, switch_trm, n_obs) + # changepi <- T + # n_treat <- n_obs * get_pi(odds_ratio, std_err, n_obs, n_treat) + # n_cnt <- n_obs - n_treat + # } else { + # stop("Cannot generate a usable contingency table; Please consider + # using the Pearson's chi-squared approach (under development).") + # } + #} + + # a1, b1, c1, d1 are one solution for the 4 cells in the contingency table + a1 <- get_a1_kfnl(odds_ratio, std_err, n_obs, n_treat) + b1 <- n_cnt - a1 + c1 <- get_c1_kfnl(odds_ratio, std_err, n_obs, n_treat) + d1 <- n_treat - c1 + + # a2, b2, c2, d2 are the second solution for the 4 cells in the + # contingency table + a2 <- get_a2_kfnl(odds_ratio, std_err, n_obs, n_treat) + b2 <- n_cnt - a2 + c2 <- get_c2_kfnl(odds_ratio, std_err, n_obs, n_treat) + d2 <- n_treat - c2 + + # Differences between these two sets of solutions: + ### a1 c1 are small while a2 c2 are large + ### b1 d1 are large while b2 d2 are small + ### the goal is to get fewest swithes to invalidate the inference + ### remove the solution if one cell has fewerer than 5 cases + ### or negative cells or nan cells + check1 <- check2 <- TRUE + if (!(n_cnt >= a1 && a1 >= 5 && n_cnt >= b1 && b1 >= 5 && + n_treat >= c1 && c1 >= 5 && n_treat >= d1 && d1 >= 5) + || is.nan(a1) || is.nan(b1) || is.nan(c1) || is.nan(d1)) { + check1 <- FALSE + } + + if (!(n_cnt >= a2 && a2 >= 5 && n_cnt >= b2 && b2 >= 5 && + n_treat >= c2 && c2 >= 5 && n_treat >= d2 && d2 >= 5) + || is.nan(a2) || is.nan(b2) || is.nan(c2) || is.nan(d2)) { + check2 <- FALSE + } + + if (check1) { + table_bstart1 <- get_abcd_kfnl(a1, b1, c1, d1) + solution1 <- getswitch(table_bstart1, thr_t, switch_trm, n_obs) + } + if (check2) { + table_bstart2 <- get_abcd_kfnl(a2, b2, c2, d2) + solution2 <- getswitch(table_bstart2, thr_t, switch_trm, n_obs) + } + if (!check1 && !check2) { + stop("Cannot generate a usable contingency table!") + } + + # get the number of switches for solutions that satisfy the requirements + if (check1 && check2) { + final_switch1 <- solution1$final_switch + final_switch2 <- solution2$final_switch + if (final_switch1 < final_switch2) { + final_solution <- getswitch(table_bstart1, thr_t, switch_trm, n_obs) + } else { + final_solution <- getswitch(table_bstart2, thr_t, switch_trm, n_obs) + } + } + + if (check1 && !check2) { + final_solution <- getswitch(table_bstart1, thr_t, switch_trm, n_obs) } - } - - if (check1 && !check2) { - final_solution <- getswitch(table_bstart1, thr_t, switch_trm, n_obs) - } - - if (!check1 && check2) { - final_solution <- getswitch(table_bstart2, thr_t, switch_trm, n_obs) - } - - final <- final_solution$final_switch - a <- final_solution$table_start[1,1] - b <- final_solution$table_start[1,2] - c <- final_solution$table_start[2,1] - d <- final_solution$table_start[2,2] - - if (switch_trm && dcroddsratio_ob) { - transferway <- "treatment success to treatment failure," - RIR <- ceiling(final/((a+c)/n_obs))*(replace=="entire") + ceiling(final/(a/(a+b)))*(1-(replace=="entire")) - RIRway <- "treatment success" - } - if (switch_trm && !dcroddsratio_ob) { - transferway <- "treatment failure to treatment success," - RIR <- ceiling(final/((b+d)/n_obs))*(replace=="entire") + ceiling(final/(b/(a+b)))*(1-(replace=="entire")) - RIRway <- "treatment failure" - } - if (!switch_trm && dcroddsratio_ob) { - transferway <- "control failure to control success," - RIR <- ceiling(final/((b+d)/n_obs))*(replace=="entire") + ceiling(final/(b/(a+b)))*(1-(replace=="entire")) - RIRway <- "control failure" - } - if (!switch_trm && !dcroddsratio_ob) { - transferway <- "control success to control failure," - RIR <- ceiling(final/((a+c)/n_obs))*(replace=="entire") + ceiling(final/(a/(a+b)))*(1-(replace=="entire")) - RIRway <- "control success" - } - - if (final_solution$needtworows) { - final_extra <- final_solution$final_extra + + if (!check1 && check2) { + final_solution <- getswitch(table_bstart2, thr_t, switch_trm, n_obs) + } + + final <- final_solution$final_switch + a <- final_solution$table_start[1,1] + b <- final_solution$table_start[1,2] + c <- final_solution$table_start[2,1] + d <- final_solution$table_start[2,2] + if (switch_trm && dcroddsratio_ob) { - transferway_extra <- "control failure to control success," - RIR_extra <- ceiling(final_extra/((b+d)/n_obs))*(replace=="entire") + - ceiling(final_extra/(b/(b+d)))*(1-(replace=="entire")) - RIRway_extra <- "control failure" + transferway <- "treatment success to treatment failure," + RIR <- ceiling(final/((a+c)/n_obs))*(replace=="entire") + + ceiling(final/(a/(a+b)))*(1-(replace=="entire")) + RIRway <- "treatment success" } if (switch_trm && !dcroddsratio_ob) { - transferway_extra <- "control success to control failure," - RIR_extra <- ceiling(final_extra/((a+c)/n_obs))*(replace=="entire") + - ceiling(final_extra/(a/(a+b)))*(1-(replace=="entire")) - RIRway_extra <- "control success" + transferway <- "treatment failure to treatment success," + RIR <- ceiling(final/((b+d)/n_obs))*(replace=="entire") + + ceiling(final/(b/(a+b)))*(1-(replace=="entire")) + RIRway <- "treatment failure" } if (!switch_trm && dcroddsratio_ob) { - transferway_extra <- "treatment success to treatment failure," - RIR_extra <- ceiling(final_extra/((a+c)/n_obs))*(replace=="entire") + - ceiling(final_extra/(a/(a+b)))*(1-(replace=="entire")) - RIRway_extra <- "treatment success" + transferway <- "control failure to control success," + RIR <- ceiling(final/((b+d)/n_obs))*(replace=="entire") + + ceiling(final/(b/(a+b)))*(1-(replace=="entire")) + RIRway <- "control failure" } if (!switch_trm && !dcroddsratio_ob) { - transferway_extra <- "treatment failure to treatment success," - RIR_extra <- ceiling(final_extra/((b+d)/n_obs))*(replace=="entire") + - ceiling(final_extra/(b/(b+d)))*(1-(replace=="entire")) - RIRway_extra <- "treatment failure" + transferway <- "control success to control failure," + RIR <- ceiling(final/((a+c)/n_obs))*(replace=="entire") + + ceiling(final/(a/(a+b)))*(1-(replace=="entire")) + RIRway <- "control success" } - } - - if (invalidate_ob) { - change <- "To invalidate the inference," - } else { - if (est_eff >= 0) { - change <- "To sustain an inference for a positive treatment effect," - } else { - change <- "To sustain an inference for a negative treatment effect," + + if (final_solution$needtworows) { + final_extra <- final_solution$final_extra + if (switch_trm && dcroddsratio_ob) { + transferway_extra <- "control failure to control success," + RIR_extra <- ceiling(final_extra/((b+d)/n_obs))* + (replace=="entire") + + ceiling(final_extra/(b/(b+d)))*(1-(replace=="entire")) + RIRway_extra <- "control failure" + } + if (switch_trm && !dcroddsratio_ob) { + transferway_extra <- "control success to control failure," + RIR_extra <- ceiling(final_extra/((a+c)/n_obs))* + (replace=="entire") + + ceiling(final_extra/(a/(a+b)))*(1-(replace=="entire")) + RIRway_extra <- "control success" + } + if (!switch_trm && dcroddsratio_ob) { + transferway_extra <- "treatment success to treatment failure," + RIR_extra <- ceiling(final_extra/((a+c)/n_obs))* + (replace=="entire") + + ceiling(final_extra/(a/(a+b)))*(1-(replace=="entire")) + RIRway_extra <- "treatment success" + } + if (!switch_trm && !dcroddsratio_ob) { + transferway_extra <- "treatment failure to treatment success," + RIR_extra <- ceiling(final_extra/((b+d)/n_obs))* + (replace=="entire") + + ceiling(final_extra/(b/(b+d)))*(1-(replace=="entire")) + RIRway_extra <- "treatment failure" + } } - } - - if (!final_solution$needtworows & final_solution$final_switch > 1) { - conclusion1 <- paste( - change, sprintf("you would need to replace %d", RIR), RIRway, "cases") - - if (replace == "control") { - conclusion1a <- sprintf("with cases for which the probability of failure in the control group applies (RIR = %d).", RIR) + + if (invalidate_ob) { + change <- "To invalidate the inference," } else { - conclusion1a <- sprintf("with cases for which the probability of failure in the entire sample applies (RIR = %d).", RIR) + if (est_eff >= 0) { + change <- "To sustain an inference for a positive treatment effect," + } else { + change <- "To sustain an inference for a negative treatment effect," + } } - - conclusion1b <- paste( - sprintf("This is equivalent to transferring %d", final_solution$final_switch), - c("cases from"), transferway) - - conclusion1c <- "as shown, from the Implied Table to the Transfer Table." - } else if (!final_solution$needtworows & final_solution$final_switch == 1) { - conclusion1 <- paste( - change, sprintf("you would need to replace %d", RIR), RIRway, "cases") - - if (replace == "control") { - conclusion1a <- sprintf("with cases for which the probability of failure in the control group applies (RIR = %d).", RIR) + if (!final_solution$needtworows & final_solution$final_switch > 1) { + conclusion1 <- paste( + change, sprintf("you would need to replace %d", + RIR), RIRway, "cases") + + if (replace == "control") { + conclusion1a <- sprintf( + "with cases for which the probability of failure in the + control group applies (RIR = %d).", RIR) + } else { + conclusion1a <- sprintf( + "with cases for which the probability of failure in the entire + sample applies (RIR = %d).", RIR) + } + + conclusion1b <- paste( + sprintf("This is equivalent to transferring %d", + final_solution$final_switch), + c("cases from"), transferway) + + conclusion1c <- + "as shown, from the Implied Table to the Transfer Table." + + } else if (!final_solution$needtworows & final_solution$final_switch == 1) { + conclusion1 <- paste( + change, sprintf("you would need to replace %d", + RIR), RIRway, "cases") + + if (replace == "control") { + conclusion1a <- sprintf("with cases for which the probability of + failure in the control group applies + (RIR = %d).", RIR) + } else { + conclusion1a <- sprintf("with cases for which the probability of + failure in the entire sample applies + (RIR = %d).", RIR) + } + + conclusion1b <- paste( + sprintf("This is equivalent to transferring %d", + final_solution$final_switch), + c("case from"), transferway) + + conclusion1c <- + "as shown, from the Implied Table to the Transfer Table." + } else { - conclusion1a <- sprintf("with cases for which the probability of failure in the entire sample applies (RIR = %d).", RIR) + conclusion1 <- paste( + change, c( + "only transferring cases from"), transferway, "is not enough.") + + conclusion1b <- paste(sprintf("We also need to transfer %d cases from", + final_solution$final_extra), + transferway_extra, c("as shown, from the + User-entered Table to the + Transfer Table.")) + + conclusion1c <- paste(sprintf("This means we need to replace %d of", + RIR), RIRway, + sprintf("with null hypothesis cases; and + replace %d", RIR_extra), RIRway_extra, + c("with null hypothesis cases to change + the inference.") + ) } - conclusion1b <- paste( - sprintf("This is equivalent to transferring %d", final_solution$final_switch), - c("case from"), transferway) - - conclusion1c <- "as shown, from the Implied Table to the Transfer Table." - - } else { - conclusion1 <- paste( - change, c("only transferring cases from"), transferway, "is not enough.") - - conclusion1b <- paste(sprintf("We also need to transfer %d cases from", final_solution$final_extra), - transferway_extra, c("as shown, from the User-entered Table to the Transfer Table.")) + conclusion2 <- sprintf( + "For the Implied Table, we have an estimate of %.3f, + with a SE of %.3f and a t-ratio of %.3f.", + final_solution$est_eff_start, final_solution$std_err_start, + final_solution$t_start + ) - conclusion1c <- paste(sprintf("This means we need to replace %d of", RIR), RIRway, - sprintf("with null hypothesis cases; and replace %d", RIR_extra), RIRway_extra, - c("with null hypothesis cases to change the inference.") + conclusion3 <- sprintf( + "For the Transfer Table, we have an estimate of %.3f, + with a SE of %.3f and a t-ratio of %.3f.", + final_solution$est_eff_final, final_solution$std_err_final, + final_solution$t_final ) - } - - conclusion2 <- sprintf( - "For the Implied Table, we have an estimate of %.3f, with a SE of %.3f and a t-ratio of %.3f.", - final_solution$est_eff_start, final_solution$std_err_start, final_solution$t_start - ) - - conclusion3 <- sprintf( - "For the Transfer Table, we have an estimate of %.3f, with a SE of %.3f and a t-ratio of %.3f.", - final_solution$est_eff_final, final_solution$std_err_final, final_solution$t_final - ) - - notice <- "Note: Values have been rounded to the nearest integer." - noticeb <- "This may cause a little change to the estimated effect for the Implied Table." - - if (changeSE) { - notice_SE <- sprintf( - "In order to generate a usable implied contingency table, we increased the standard error to %.3f (the original one is %.3f).", - std_err, user_std_err) - } - - if (final_solution$needtworows) { - total_switch <- final_solution$final_switch+final_solution$final_extra - total_RIR <- RIR + RIR_extra - } else { - total_switch <- final_solution$final_switch - total_RIR <- RIR - } - - # result <- list(conclusion1, - # Implied_Table = final_solution$table_start, notice, Transfer_Table = final_solution$table_final, - # conclusion2, conclusion3, - # total_RIR = total_RIR, total_switch = total_switch - # ) - - # output dispatch - if (to_return == "raw_output") { + + notice <- "Note: Values have been rounded to the nearest integer." + noticeb <- + "This may cause a little change to the estimated + effect for the Implied Table." if (changeSE) { - result <- list(conclusion1, - conclusion1b, - conclusion1c, - Implied_Table = final_solution$table_start, - notice, - Transfer_Table = final_solution$table_final, - conclusion2, - conclusion3, - RIR = RIR, - notice_SE) + notice_SE <- sprintf( + "In order to generate a usable implied contingency table, we + increased the standard error to %.3f (the original one is %.3f).", + std_err, user_std_err) + } + + if (final_solution$needtworows) { + total_switch <- final_solution$final_switch+final_solution$final_extra + total_RIR <- RIR + RIR_extra } else { - result <- list(conclusion1, - conclusion1b, - conclusion1c, - Implied_Table = final_solution$table_start, - notice, - Transfer_Table = final_solution$table_final, - conclusion2, - conclusion3, - RIR = RIR) + total_switch <- final_solution$final_switch + total_RIR <- RIR } - - return(result) - } else if (to_return == "print") { + # result <- list(conclusion1, + # Implied_Table = final_solution$table_start, notice, + # Transfer_Table = final_solution$table_final, + # conclusion2, conclusion3, + # total_RIR = total_RIR, total_switch = total_switch + # ) - # cat(crayon::bold("Background Information:")) - # cat("\n") - # cat(info1) - # cat("\n") - # cat("\n") - cat(crayon::bold("Conclusion:")) - cat("\n") - cat(crayon::underline("User-entered Table:")) - cat("\n") - print(final_solution$table_start) - cat("\n") - if (changeSE) { - cat(notice_SE) - cat("\n") - } - cat(notice) - cat(noticeb) - cat("\n") - cat("\n") - cat(conclusion1) - cat("\n") - cat(conclusion1a) - cat("\n") - cat(conclusion1b) - cat("\n") - cat(conclusion1c) - cat("\n") - cat("\n") - cat(crayon::underline("Transfer Table:")) - cat("\n") - print(final_solution$table_final) - cat("\n") - cat(conclusion2) - cat("\n") - cat(conclusion3) - cat("\n") - cat("\n") - cat(crayon::bold("RIR:")) - cat("\n") - cat("RIR =", RIR) - cat("\n") - - } + # output dispatch + if (to_return == "raw_output") { + + if (changeSE) { + result <- list(conclusion1, + conclusion1b, + conclusion1c, + Implied_Table = final_solution$table_start, + notice, + Transfer_Table = final_solution$table_final, + conclusion2, + conclusion3, + RIR = RIR, + notice_SE) + } else { + result <- list(conclusion1, + conclusion1b, + conclusion1c, + Implied_Table = final_solution$table_start, + notice, + Transfer_Table = final_solution$table_final, + conclusion2, + conclusion3, + RIR = RIR) + } + + return(result) + + } else if (to_return == "print") { + + # cat(crayon::bold("Background Information:")) + # cat("\n") + # cat(info1) + # cat("\n") + # cat("\n") + cat(crayon::bold("Conclusion:")) + cat("\n") + cat(crayon::underline("User-entered Table:")) + cat("\n") + print(final_solution$table_start) + cat("\n") + if (changeSE) { + cat(notice_SE) + cat("\n") + } + cat(notice) + cat(noticeb) + cat("\n") + cat("\n") + cat(conclusion1) + cat("\n") + cat(conclusion1a) + cat("\n") + cat(conclusion1b) + cat("\n") + cat(conclusion1c) + cat("\n") + cat("\n") + cat(crayon::underline("Transfer Table:")) + cat("\n") + print(final_solution$table_final) + cat("\n") + cat(conclusion2) + cat("\n") + cat(conclusion3) + cat("\n") + cat("\n") + cat(crayon::bold("RIR:")) + cat("\n") + cat("RIR =", RIR) + cat("\n") + + } } From 6991bf852b7e122302cbdfaf6d56e4ef1e51f5a9 Mon Sep 17 00:00:00 2001 From: wwang93 <142034236+wwang93@users.noreply.github.com> Date: Tue, 21 Nov 2023 14:45:59 -0500 Subject: [PATCH 18/26] Update tkonfound.R Manual modification --- R/tkonfound.R | 482 ++++++++++++++++++++++++++------------------------ 1 file changed, 252 insertions(+), 230 deletions(-) diff --git a/R/tkonfound.R b/R/tkonfound.R index 1433418f..95bac426 100644 --- a/R/tkonfound.R +++ b/R/tkonfound.R @@ -4,254 +4,276 @@ tkonfound <- function(a, b, c, d, test = "fisher", replace = "control", to_return = to_return){ - # a <- 35 - # b <- 17 - # c <- 17 - # d <- 38 - # alpha <- 0.05 - # switch_trm <- T - # test <- "fisher" - - # stop message - if (a < 0 || b < 0 || c < 0 || d < 0) { - stop("Please enter non-negative integers for each cell.") - } - - if (a != as.integer(a) || b != as.integer(b) || c != as.integer(c) || d != as.integer(d)) { - stop("Please enter non-negative integers for each cell.") - } - - # use fisher if any of the cell is smaller than 5 - if (a < 5 || b < 5 || c < 5 || d < 5){ - test <- "fisher" - } - - # odds_ratio <- a*d/(b*c) - n_cnt <- a+b - n_trm <- c+d - n_obs <- n_cnt + n_trm - # est <- log(odds_ratio) - - # this is the 2 by 2 table we start with - table_ob <- matrix(c(a, b, c, d), byrow = TRUE, 2, 2) - - if (test == "fisher") { - p_ob <- fisher_p(a, b, c, d) - fisher_ob <- fisher_oddsratio(a, b, c, d) - } - if (test == "chisq") { - p_ob <- chisq_p(a, b, c, d) - chisq_ob <- chisq_value(a, b, c, d) - } - - # get solution - if (test == "chisq"){ - solution <- getswitch_chisq(a, b, c, d, alpha, switch_trm) - chisq_final <- solution$chisq_final - } - - if (test == "fisher"){ - solution <- getswitch_fisher(a, b, c, d, alpha, switch_trm) - fisher_final <- solution$fisher_final - } - - table_final <- solution$Transfer_Table - table_start <- table_ob - dcroddsratio_ob <- solution$dcroddsratio_ob - allnotenough <- solution$needtworows - final <- solution$final_switch - p_final <- solution$p_final - taylor_pred <- solution$taylor_pred - perc_bias_pred <- solution$perc_bias_pred - final_extra <- solution$final_extra - total_switch <- solution$total_switch - - ### add column and row names to contingency tables - rownames(table_start) <- rownames(table_final) <- c("Control", "Treatment") - colnames(table_start) <- colnames(table_final) <- c("Fail", "Success") - - if (switch_trm && dcroddsratio_ob) { - transferway <- "treatment success to treatment failure" - RIR <- ceiling(final/((a+c)/n_obs))*(replace=="entire") + ceiling(final/(a/(a+b)))*(1-(replace=="entire")) - RIRway <- "treatment success" - } - if (switch_trm && !dcroddsratio_ob) { - transferway <- "treatment failure to treatment success" - RIR <- ceiling(final/((b+d)/n_obs))*(replace=="entire") + ceiling(final/(b/(a+b)))*(1-(replace=="entire")) - RIRway <- "treatment failure" - } - if (!switch_trm && dcroddsratio_ob) { - transferway <- "control failure to control success" - RIR <- ceiling(final/((b+d)/n_obs))*(replace=="entire") + ceiling(final/(b/(a+b)))*(1-(replace=="entire")) - RIRway <- "control failure" - } - if (!switch_trm && !dcroddsratio_ob) { - transferway <- "control success to control failure" - RIR <- ceiling(final/((a+c)/n_obs))*(replace=="entire") + ceiling(final/(a/(a+b)))*(1-(replace=="entire")) - RIRway <- "control success" - } - - if (allnotenough) { + # a <- 35 + # b <- 17 + # c <- 17 + # d <- 38 + # alpha <- 0.05 + # switch_trm <- T + # test <- "fisher" + + # stop message + if (a < 0 || b < 0 || c < 0 || d < 0) { + stop("Please enter non-negative integers for each cell.") + } + + if (a != as.integer(a) || b != as.integer(b) || + c != as.integer(c) || d != as.integer(d)) { + stop("Please enter non-negative integers for each cell.") + } + + # use fisher if any of the cell is smaller than 5 + if (a < 5 || b < 5 || c < 5 || d < 5){ + test <- "fisher" + } + + # odds_ratio <- a*d/(b*c) + n_cnt <- a+b + n_trm <- c+d + n_obs <- n_cnt + n_trm + # est <- log(odds_ratio) + + # this is the 2 by 2 table we start with + table_ob <- matrix(c(a, b, c, d), byrow = TRUE, 2, 2) + + if (test == "fisher") { + p_ob <- fisher_p(a, b, c, d) + fisher_ob <- fisher_oddsratio(a, b, c, d) + } + if (test == "chisq") { + p_ob <- chisq_p(a, b, c, d) + chisq_ob <- chisq_value(a, b, c, d) + } + + # get solution + if (test == "chisq"){ + solution <- getswitch_chisq(a, b, c, d, alpha, switch_trm) + chisq_final <- solution$chisq_final + } + + if (test == "fisher"){ + solution <- getswitch_fisher(a, b, c, d, alpha, switch_trm) + fisher_final <- solution$fisher_final + } + + table_final <- solution$Transfer_Table + table_start <- table_ob + dcroddsratio_ob <- solution$dcroddsratio_ob + allnotenough <- solution$needtworows + final <- solution$final_switch + p_final <- solution$p_final + taylor_pred <- solution$taylor_pred + perc_bias_pred <- solution$perc_bias_pred + final_extra <- solution$final_extra + total_switch <- solution$total_switch + + ### add column and row names to contingency tables + rownames(table_start) <- rownames(table_final) <- c("Control", "Treatment") + colnames(table_start) <- colnames(table_final) <- c("Fail", "Success") + if (switch_trm && dcroddsratio_ob) { - transferway_extra <- "control failure to control success" - RIR_extra <- ceiling(final_extra/((b+d)/n_obs))*(replace=="entire") + - ceiling(final_extra/(b/(b+d)))*(1-(replace=="entire")) - RIRway_extra <- "control failure" + transferway <- "treatment success to treatment failure" + RIR <- ceiling(final/((a+c)/n_obs))*(replace=="entire") + + ceiling(final/(a/(a+b)))*(1-(replace=="entire")) + RIRway <- "treatment success" } if (switch_trm && !dcroddsratio_ob) { - transferway_extra <- "control success to control failure" - RIR_extra <- ceiling(final_extra/((a+c)/n_obs))*(replace=="entire") + - ceiling(final_extra/(a/(a+b)))*(1-(replace=="entire")) - RIRway_extra <- "control success" + transferway <- "treatment failure to treatment success" + RIR <- ceiling(final/((b+d)/n_obs))*(replace=="entire") + + ceiling(final/(b/(a+b)))*(1-(replace=="entire")) + RIRway <- "treatment failure" } if (!switch_trm && dcroddsratio_ob) { - transferway_extra <- "treatment success to treatment failure" - RIR_extra <- ceiling(final_extra/((a+c)/n_obs))*(replace=="entire") + - ceiling(final_extra/(a/(a+b)))*(1-(replace=="entire")) - RIRway_extra <- "treatment success" + transferway <- "control failure to control success" + RIR <- ceiling(final/((b+d)/n_obs))*(replace=="entire") + + ceiling(final/(b/(a+b)))*(1-(replace=="entire")) + RIRway <- "control failure" } if (!switch_trm && !dcroddsratio_ob) { - transferway_extra <- "treatment failure to treatment success" - RIR_extra <- ceiling(final_extra/((b+d)/n_obs))*(replace=="entire") + - ceiling(final_extra/(b/(b+d)))*(1-(replace=="entire")) - RIRway_extra <- "treatment failure" + transferway <- "control success to control failure" + RIR <- ceiling(final/((a+c)/n_obs))*(replace=="entire") + + ceiling(final/(a/(a+b)))*(1-(replace=="entire")) + RIRway <- "control success" } - } - - if (p_ob < alpha) { - change <- "To invalidate the inference, " - } else { - change <- "To sustain an inference, " - } - - if (!allnotenough & final > 1) { - conclusion1 <- paste0( - change, sprintf("you would need to replace %d ", RIR), RIRway) - if (replace == "control") { - conclusion1b <- paste0( - sprintf("cases for which the probability of failure in the control group applies (RIR = %d). ", RIR)) - } else { - conclusion1b <- paste0( - sprintf("cases for which the probability of failure in the entire group applies (RIR = %d). ", RIR)) + if (allnotenough) { + if (switch_trm && dcroddsratio_ob) { + transferway_extra <- "control failure to control success" + RIR_extra <- ceiling(final_extra/((b+d)/n_obs))* + (replace=="entire") + + ceiling(final_extra/(b/(b+d)))*(1-(replace=="entire")) + RIRway_extra <- "control failure" + } + if (switch_trm && !dcroddsratio_ob) { + transferway_extra <- "control success to control failure" + RIR_extra <- ceiling(final_extra/((a+c)/n_obs))* + (replace=="entire") + + ceiling(final_extra/(a/(a+b)))*(1-(replace=="entire")) + RIRway_extra <- "control success" + } + if (!switch_trm && dcroddsratio_ob) { + transferway_extra <- "treatment success to treatment failure" + RIR_extra <- ceiling(final_extra/((a+c)/n_obs))* + (replace=="entire") + + ceiling(final_extra/(a/(a+b)))*(1-(replace=="entire")) + RIRway_extra <- "treatment success" + } + if (!switch_trm && !dcroddsratio_ob) { + transferway_extra <- "treatment failure to treatment success" + RIR_extra <- ceiling(final_extra/((b+d)/n_obs))* + (replace=="entire") + + ceiling(final_extra/(b/(b+d)))*(1-(replace=="entire")) + RIRway_extra <- "treatment failure" + } } - conclusion1c <- paste0( - sprintf("This is equivalent to transferring %d", final), - " cases from ", transferway, "." - ) - } - - if (!allnotenough & final == 1) { - conclusion1 <- paste0( - change, sprintf("you would need to replace %d ", RIR), RIRway) - - if (replace == "control") { - conclusion1b <- paste0( - sprintf("cases for which the probability of failure in the control group applies (RIR = %d). ", RIR)) + if (p_ob < alpha) { + change <- "To invalidate the inference, " } else { - conclusion1b <- paste0( - sprintf("cases for which the probability of failure in the entire group applies (RIR = %d). ", RIR)) - } + change <- "To sustain an inference, " + } - conclusion1c <- paste0( - sprintf("This is equivalent to transferring %d", final), - " case from ", transferway, ".") - } - - if (allnotenough){ - conclusion1 <- paste( - change, c("only transferring cases from" ), transferway, - sprintf(" is not enough. We also need to transfer %d cases from ", final_extra)) + if (!allnotenough & final > 1) { + conclusion1 <- paste0( + change, sprintf("you would need to replace %d ", RIR), RIRway) + + if (replace == "control") { + conclusion1b <- paste0( + sprintf("cases for which the probability of failure in + the control group applies (RIR = %d). ", RIR)) + } else { + conclusion1b <- paste0( + sprintf("cases for which the probability of failure in + the entire group applies (RIR = %d). ", RIR)) + } + + conclusion1c <- paste0( + sprintf("This is equivalent to transferring %d", final), + " cases from ", transferway, "." + ) + } - conclusion1b <- paste0( - transferway_extra, c("as shown, from the User-entered Table to the Transfer Table.")) + if (!allnotenough & final == 1) { + conclusion1 <- paste0( + change, sprintf("you would need to replace %d ", RIR), RIRway) + + if (replace == "control") { + conclusion1b <- paste0( + sprintf("cases for which the probability of failure in the + control group applies (RIR = %d). ", RIR)) + } else { + conclusion1b <- paste0( + sprintf("cases for which the probability of failure in the + entire group applies (RIR = %d). ", RIR)) + } + + conclusion1c <- paste0( + sprintf("This is equivalent to transferring %d", final), + " case from ", transferway, ".") + } - conclusion1c <- paste0(sprintf(" This means we need to replace %d of ", RIR), RIRway, - sprintf( "with null hypothesis cases; and replace %d ", RIR_extra), RIRway_extra, - c(" with null hypothesis cases to change the inference.")) - } - - if (test == "chisq"){ - conclusion2 <- sprintf( - "For the User-entered Table, we have a Pearson's chi square of %.3f, with p-value of %.3f:", chisq_ob, p_ob) - conclusion3 <- sprintf( - "For the Transfer Table, we have a Pearson's chi square of %.3f, with p-value of %.3f:", chisq_final, p_final) - } - - if (test == "fisher"){ - conclusion2 <- sprintf( - "For the User-entered Table, we have an estimated odds ratio of %.3f, with p-value of %.3f:", fisher_ob, p_ob) - conclusion3 <- sprintf( - "For the Transfer Table, we have an estimated odds ratio of %.3f, with p-value of %.3f:", fisher_final, p_final) - } - - info1 <- "This function calculates the number of cases that would have to be replaced" - info2 <- "with no effect cases (RIR) to invalidate an inference made about the association" - info3 <- "between the rows and columns in a 2x2 table." - info4 <- "One can also interpret this as switches from one cell to another, such as from" - info5 <- "the treatment success cell to the treatment failure cell." - - if (to_return == "raw_output") { + if (allnotenough){ + conclusion1 <- paste( + change, c("only transferring cases from" ), transferway, + sprintf(" is not enough. We also need to transfer %d cases from ", + final_extra)) + + conclusion1b <- paste0( + transferway_extra, c("as shown, from the User-entered Table + to the Transfer Table.")) + + conclusion1c <- paste0(sprintf(" This means we need to replace %d of ", + RIR), RIRway, + sprintf( "with null hypothesis cases; and + replace %d ", RIR_extra), RIRway_extra, + c(" with null hypothesis cases to change + the inference.")) + } - result <- list(info1, - info2, - conclusion1, - conclusion1b, - conclusion1c, - User_enter_value = table_start, - Transfer_Table = table_final, - conclusion2, - conclusion3, - RIR = RIR) + if (test == "chisq"){ + conclusion2 <- sprintf( + "For the User-entered Table, we have a Pearson's chi square of %.3f, + with p-value of %.3f:", chisq_ob, p_ob) + conclusion3 <- sprintf( + "For the Transfer Table, we have a Pearson's chi square of %.3f, + with p-value of %.3f:", chisq_final, p_final) + } - return(result) + if (test == "fisher"){ + conclusion2 <- sprintf( + "For the User-entered Table, we have an estimated odds ratio + of %.3f, with p-value of %.3f:", fisher_ob, p_ob) + conclusion3 <- sprintf( + "For the Transfer Table, we have an estimated odds ratio of + %.3f, with p-value of %.3f:", fisher_final, p_final) + } - } else if (to_return == "print") { + info1 <- "This function calculates the number of cases that + would have to be replaced" + info2 <- "with no effect cases (RIR) to invalidate an inference + made about the association" + info3 <- "between the rows and columns in a 2x2 table." + info4 <- "One can also interpret this as switches from one cell + to another, such as from" + info5 <- "the treatment success cell to the treatment failure cell." - cat(crayon::bold("Background Information:")) - cat("\n") - cat(info1) - cat("\n") - cat(info2) - cat("\n") - cat(info3) - cat("\n") - cat(info4) - cat("\n") - cat(info5) - cat("\n") - cat("\n") - cat(crayon::bold("Conclusion:")) - cat("\n") - cat(conclusion1) - cat("\n") - cat(conclusion1b) - cat("\n") - cat(conclusion1c) - cat("\n") - cat(conclusion2) - cat("\n") - cat("\n") - cat(crayon::underline("User-entered Table:")) - cat("\n") - print(table_start) - cat("\n") - cat("\n") - cat(conclusion3) - cat("\n") - cat(crayon::underline("Transfer Table:")) - cat("\n") - print(table_final) - cat("\n") - cat(crayon::bold("RIR:")) - cat("\n") - cat("RIR =", RIR) - cat("\n") + if (to_return == "raw_output") { + + result <- list(info1, + info2, + conclusion1, + conclusion1b, + conclusion1c, + User_enter_value = table_start, + Transfer_Table = table_final, + conclusion2, + conclusion3, + RIR = RIR) + + return(result) + + } else if (to_return == "print") { + + cat(crayon::bold("Background Information:")) + cat("\n") + cat(info1) + cat("\n") + cat(info2) + cat("\n") + cat(info3) + cat("\n") + cat(info4) + cat("\n") + cat(info5) + cat("\n") + cat("\n") + cat(crayon::bold("Conclusion:")) + cat("\n") + cat(conclusion1) + cat("\n") + cat(conclusion1b) + cat("\n") + cat(conclusion1c) + cat("\n") + cat(conclusion2) + cat("\n") + cat("\n") + cat(crayon::underline("User-entered Table:")) + cat("\n") + print(table_start) + cat("\n") + cat("\n") + cat(conclusion3) + cat("\n") + cat(crayon::underline("Transfer Table:")) + cat("\n") + print(table_final) + cat("\n") + cat(crayon::bold("RIR:")) + cat("\n") + cat("RIR =", RIR) + cat("\n") + + } - } - } - - - From 1b9157e9bf6ff0eea897705d2b3b8c3b0f687959 Mon Sep 17 00:00:00 2001 From: wwang93 <142034236+wwang93@users.noreply.github.com> Date: Tue, 21 Nov 2023 14:54:32 -0500 Subject: [PATCH 19/26] Update tkonfound_fig.R Manual modification --- R/tkonfound_fig.R | 698 +++++++++++++++++++++++++--------------------- 1 file changed, 382 insertions(+), 316 deletions(-) diff --git a/R/tkonfound_fig.R b/R/tkonfound_fig.R index 61c380e5..a91a11aa 100644 --- a/R/tkonfound_fig.R +++ b/R/tkonfound_fig.R @@ -1,339 +1,405 @@ -#' Draw figures for change in effect size as a function of switching or replacing outcomes -#' @description This function returns two plots for change in effect size as a function of switching or replacing outcomes, one for all possibilities (switching), another zoomed in the area for positive RIR -#' @param a cell is the number of cases in the control group showing unsuccessful results -#' @param b cell is the number of cases in the control group showing successful results -#' @param c cell is the number of cases in the treatment group showing unsuccessful results -#' @param d cell is the number of cases in the treatment group showing successful results -#' @param thr_p the p-value threshold used to evaluate statistical significance, with the default of 0.05 -#' @param switch_trm whether switching the two cells in the treatment row or the two cells in the control row, with the default of the treatment row +#' Draw figures for change in effect size as a function of switching +#' or replacing outcomes +#' @description This function returns two plots for change in effect size +#' as a function of switching or replacing outcomes, one for all +#' possibilities (switching), another zoomed in the area for positive RIR +#' @param a cell is the number of cases in the control group showing +#' unsuccessful results +#' @param b cell is the number of cases in the control group showing +#' successful results +#' @param c cell is the number of cases in the treatment group showing +#' unsuccessful results +#' @param d cell is the number of cases in the treatment group showing +#' successful results +#' @param thr_p the p-value threshold used to evaluate statistical +#' significance, with the default of 0.05 +#' @param switch_trm whether switching the two cells in the treatment row +#' or the two cells in the control row, with the default of the treatment row #' @param test whether using Fisher's Exact Test (default) or chi-square test -#' @param replace whether using entire sample or the control group to calculate the base rate, with the default of the control group +#' @param replace whether using entire sample or the control group to calculate +#' the base rate, with the default of the control group #' @importFrom stats chisq.test -#' @return prints 2 figures for how number of hypothetical cases switched changes the effect size +#' @return prints 2 figures for how number of hypothetical cases switched +#' changes the effect size #' @examples #' # using tkonfound_fig for a study where 2 by 2 table is (35, 17, 17, 38) #' tkonfound_fig(35, 17, 17, 38) #' tkonfound_fig(35, 17, 17, 38, thr_p = 0.01) #' tkonfound_fig(35, 17, 17, 38, thr_p = 0.01, switch_trm = FALSE) -#' tkonfound_fig(35, 17, 17, 38, thr_p = 0.01, switch_trm = TRUE, test = "chisq") -#' tkonfound_fig(35, 17, 17, 38, thr_p = 0.01, switch_trm = TRUE, test = "chisq", replace = "entire") +#' tkonfound_fig(35, 17, 17, 38, thr_p = 0.01, switch_trm = TRUE, +#' test = "chisq") +#' tkonfound_fig(35, 17, 17, 38, thr_p = 0.01, switch_trm = TRUE, +#' test = "chisq", replace = "entire") #' #' @export #' -tkonfound_fig <- function(a, b, c, d, thr_p = 0.05, switch_trm = T, test = "fisher", replace = "control"){ - -n_obs <- a + b + c + d -###***generate the log odds for each step of switch -meta <- data.frame(matrix(ncol = 10, nrow = n_obs-3)) -colnames(meta) <- c("a", "b", "c", "d", "nobs", "switch", "logodds","cntrl_p","tr_p","pdif") -if (switch_trm == T) { - for (i in 1:(n_obs-3)){ - if (i <= a){ - #from table(1, a+b-1, c+d-1, 1) to table(a, b, c+d-1, 1) - meta$a[i] <- i - meta$b[i] <- a+b-i - meta$c[i] <- c+d-1 - meta$d[i] <- 1 - meta$nobs[i] <- meta$a[i] + meta$b[i] + meta$c[i] + meta$d[i] - meta$switch[i] <- -(d-1+a-i) +tkonfound_fig <- function(a, b, c, d, thr_p = 0.05, switch_trm = T, + test = "fisher", replace = "control"){ + + n_obs <- a + b + c + d + ###***generate the log odds for each step of switch + meta <- data.frame(matrix(ncol = 10, nrow = n_obs-3)) + colnames(meta) <- c("a", "b", "c", "d", "nobs", "switch", + "logodds","cntrl_p","tr_p","pdif") + if (switch_trm == T) { + for (i in 1:(n_obs-3)){ + if (i <= a){ + #from table(1, a+b-1, c+d-1, 1) to table(a, b, c+d-1, 1) + meta$a[i] <- i + meta$b[i] <- a+b-i + meta$c[i] <- c+d-1 + meta$d[i] <- 1 + meta$nobs[i] <- meta$a[i] + meta$b[i] + meta$c[i] + meta$d[i] + meta$switch[i] <- -(d-1+a-i) + } + if (i>a & i<=a+d-1){ + # from table(a, b, c+d-1, 1) to table(a, b, c, d) + meta$a[i] <- a + meta$b[i] <- b + meta$c[i] <- c+d-1-i+a + meta$d[i] <- 1+i-a + meta$nobs[i] <- meta$a[i] + meta$b[i] + meta$c[i] + meta$d[i] + meta$switch[i] <- -(a+d-1-i) + } + if (i>a+d-1 & i<=a+d+c-2){ + # from table(a, b, c, d) to table(a, b, 1, d+c-1) + meta$a[i] <- a + meta$b[i] <- b + meta$c[i] <- c-(i-a-d+1) + meta$d[i] <- d+(i-a-d+1) + meta$nobs[i] <- meta$a[i] + meta$b[i] + meta$c[i] + meta$d[i] + meta$switch[i] <- i-(a+d-1) + } + if (i>a+d+c-2 & i<=a+b+c+d-3){ + # from table(a, b, 1, d+c-1) to table(a+b-1, 1, 1, d+c-1) + meta$a[i] <- a+(i-a-d-c+2) + meta$b[i] <- b-(i-a-d-c+2) + meta$c[i] <- 1 + meta$d[i] <- c+d-1 + meta$nobs[i] <- meta$a[i] + meta$b[i] + meta$c[i] + meta$d[i] + meta$switch[i] <- i-(a+d-1) + } + } } - if (i>a & i<=a+d-1){ - # from table(a, b, c+d-1, 1) to table(a, b, c, d) - meta$a[i] <- a - meta$b[i] <- b - meta$c[i] <- c+d-1-i+a - meta$d[i] <- 1+i-a - meta$nobs[i] <- meta$a[i] + meta$b[i] + meta$c[i] + meta$d[i] - meta$switch[i] <- -(a+d-1-i) + if (switch_trm == F) { + for (i in 1:(n_obs-3)){ + if (i <= d){ + #from table(1, a+b-1, c+d-1, 1) to table(1, a+b-1, c, d) + meta$d[i] <- i + meta$c[i] <- d+c-i + meta$b[i] <- b+a-1 + meta$a[i] <- 1 + meta$nobs[i] <- meta$a[i] + meta$b[i] + meta$c[i] + meta$d[i] + meta$switch[i] <- -(a-1+d-i) + } + if (i>d & i<=a+d-1){ + # from table(1, a+b-1, c, d) to table(a, b, c, d) + meta$d[i] <- d + meta$c[i] <- c + meta$b[i] <- b+a-1-i+d + meta$a[i] <- 1+i-d + meta$nobs[i] <- meta$a[i] + meta$b[i] + meta$c[i] + meta$d[i] + meta$switch[i] <- -(a+d-1-i) + } + if (i>a+d-1 & i<=a+d+b-2){ + # from table(a, b, c, d) to table(a+b-1, 1, c, d) + meta$d[i] <- d + meta$c[i] <- c + meta$b[i] <- b-(i-a-d+1) + meta$a[i] <- a+(i-a-d+1) + meta$nobs[i] <- meta$a[i] + meta$b[i] + meta$c[i] + meta$d[i] + meta$switch[i] <- i-(a+d-1) + } + if (i>a+d+b-2 & i<=a+b+c+d-3){ + # from table(a+b-1, 1, c, d) to table(a+b-1, 1, 1, d+c-1) + meta$d[i] <- d+(i-d-a-b+2) + meta$c[i] <- c-(i-a-d-b+2) + meta$b[i] <- 1 + meta$a[i] <- b+a-1 + meta$nobs[i] <- meta$a[i] + meta$b[i] + meta$c[i] + meta$d[i] + meta$switch[i] <- i-(a+d-1) + } + } } - if (i>a+d-1 & i<=a+d+c-2){ - # from table(a, b, c, d) to table(a, b, 1, d+c-1) - meta$a[i] <- a - meta$b[i] <- b - meta$c[i] <- c-(i-a-d+1) - meta$d[i] <- d+(i-a-d+1) - meta$nobs[i] <- meta$a[i] + meta$b[i] + meta$c[i] + meta$d[i] - meta$switch[i] <- i-(a+d-1) + meta$cntrl_p <- meta$b/(meta$a + meta$b) + meta$tr_p <- meta$d/(meta$c + meta$d) + meta$pdif <- meta$tr_p - meta$cntrl_p + + ###***find out significant thresholds + if (test == "chisq"){ + solution <- getswitch_chisq(a, b, c, d, thr_p, switch_trm) + meta$logodds <- log(meta$a*meta$d/meta$c/meta$b) } - if (i>a+d+c-2 & i<=a+b+c+d-3){ - # from table(a, b, 1, d+c-1) to table(a+b-1, 1, 1, d+c-1) - meta$a[i] <- a+(i-a-d-c+2) - meta$b[i] <- b-(i-a-d-c+2) - meta$c[i] <- 1 - meta$d[i] <- c+d-1 - meta$nobs[i] <- meta$a[i] + meta$b[i] + meta$c[i] + meta$d[i] - meta$switch[i] <- i-(a+d-1) - } - } -} -if (switch_trm == F) { - for (i in 1:(n_obs-3)){ - if (i <= d){ - #from table(1, a+b-1, c+d-1, 1) to table(1, a+b-1, c, d) - meta$d[i] <- i - meta$c[i] <- d+c-i - meta$b[i] <- b+a-1 - meta$a[i] <- 1 - meta$nobs[i] <- meta$a[i] + meta$b[i] + meta$c[i] + meta$d[i] - meta$switch[i] <- -(a-1+d-i) + + if (test == "fisher"){ + solution <- getswitch_fisher(a, b, c, d, thr_p, switch_trm) + for (i in 1:(n_obs-3)){ + meta$logodds[i] <- log( + fisher_oddsratio(meta$a[i], meta$b[i], meta$c[i], meta$d[i])) + } } - if (i>d & i<=a+d-1){ - # from table(1, a+b-1, c, d) to table(a, b, c, d) - meta$d[i] <- d - meta$c[i] <- c - meta$b[i] <- b+a-1-i+d - meta$a[i] <- 1+i-d - meta$nobs[i] <- meta$a[i] + meta$b[i] + meta$c[i] + meta$d[i] - meta$switch[i] <- -(a+d-1-i) + + table_final <- solution$Transfer_Table + dcroddsratio_ob <- solution$dcroddsratio_ob + + meta$sig <- ifelse(meta$a==table_final[1,1] & + meta$b==table_final[1,2] & meta$c==table_final[2,1] & + meta$d==table_final[2,2],1,0) + + if (meta[meta$sig==1,]$logodds > 0){ + if (dcroddsratio_ob){#from sig to not sig by decreasing odds ratio + posinsig <- meta[meta$sig==1,]$switch + pos_thr <- (meta[meta$switch==posinsig,]$logodds+ + meta[meta$switch==(posinsig+1),]$logodds)/2 + pos_thr_pdif <- (meta[meta$switch==posinsig,]$pdif+ + meta[meta$switch==(posinsig+1),]$pdif)/2 + zoom_lower <- -(posinsig + 2) + meta$sigpoint <- ifelse(meta$switch==(posinsig+1), + "positive","other") + } else {#from not sig to sig by increasing positive effect + possig <- meta[meta$sig==1,]$switch + pos_thr <- (meta[meta$switch==possig,]$logodds+ + meta[meta$switch==(possig-1),]$logodds)/2 + pos_thr_pdif <- (meta[meta$switch==possig,]$pdif+ + meta[meta$switch==(possig-1),]$pdif)/2 + zoom_lower <- -(possig + 2) + meta$sigpoint <- ifelse(meta$switch==possig,"positive","other") + } + # find out the row that is cloest to logodds of 0 but negative + temp1 <- meta[meta$logodds<0,] + temp1 <- temp1[order(abs(temp1$logodds)),] + j <- 1 + if (test == "chisq"){ + while (chisq_p(temp1$a[j], temp1$b[j], + temp1$c[j], temp1$d[j])>thr_p){ + j <- j+1 + } + } + if (test == "fisher"){ + while (fisher_p(temp1$a[j], temp1$b[j], + temp1$c[j], temp1$d[j])>thr_p){ + j <- j+1 + } + } + neg_thr <- (temp1$logodds[j-1]+temp1$logodds[j])/2 + neg_thr_pdif <- (temp1$pdif[j-1]+temp1$pdif[j])/2 + zoom_upper <- -(temp1$switch[j]-2) + meta$sigpoint <- ifelse(meta$switch==temp1$switch[j], + "negative",meta$sigpoint) } - if (i>a+d-1 & i<=a+d+b-2){ - # from table(a, b, c, d) to table(a+b-1, 1, c, d) - meta$d[i] <- d - meta$c[i] <- c - meta$b[i] <- b-(i-a-d+1) - meta$a[i] <- a+(i-a-d+1) - meta$nobs[i] <- meta$a[i] + meta$b[i] + meta$c[i] + meta$d[i] - meta$switch[i] <- i-(a+d-1) + + if (meta[meta$sig==1,]$logodds < 0){ + if (dcroddsratio_ob){ # from not sig to sig by decreasing odds ratio + negsig <- meta[meta$sig==1,]$switch + neg_thr <- (meta[meta$switch==negsig,]$logodds+ + meta[meta$switch==(negsig+1),]$logodds)/2 + neg_thr_pdif <- (meta[meta$switch==negsig,]$pdif+ + meta[meta$switch==(negsig+1),]$pdif)/2 + zoom_upper <- -(negsig - 4) + meta$sigpoint <- ifelse(meta$switch==negsig,"negative","other") + } else { + # from sig to not sig by increasing odds ratio that is smaller than 1 + neginsig <- meta[meta$sig==1,]$switch + neg_thr <- (meta[meta$switch==neginsig,]$logodds+ + meta[meta$switch==(neginsig-1),]$logodds)/2 + neg_thr_pdif <- (meta[meta$switch==neginsig,]$pdif+ + meta[meta$switch==(neginsig-1),]$pdif)/2 + zoom_upper <- -(neginsig - 2) + meta$sigpoint <- ifelse(meta$switch==neginsig-1,"negative","other") + } + # find out the row that is cloest to logodds of 0 but positive + temp1 <- meta[meta$logodds>0,] + temp1 <- temp1[order(abs(temp1$logodds)),] + j <- 1 + if (test == "chisq"){ + while (chisq_p(temp1$a[j], temp1$b[j], + temp1$c[j], temp1$d[j])>thr_p){ + j <- j+1 + } + } + if (test == "fisher"){ + while (fisher_p(temp1$a[j], temp1$b[j], + temp1$c[j], temp1$d[j])>thr_p){ + j <- j+1 + } + } + pos_thr <- (temp1$logodds[j-1]+temp1$logodds[j])/2 + pos_thr_pdif <- (temp1$pdif[j-1]+temp1$pdif[j])/2 + zoom_lower <- -(temp1$switch[j]+2) + meta$sigpoint <- ifelse(meta$switch==temp1$switch[j], + "positive",meta$sigpoint) } - if (i>a+d+b-2 & i<=a+b+c+d-3){ - # from table(a+b-1, 1, c, d) to table(a+b-1, 1, 1, d+c-1) - meta$d[i] <- d+(i-d-a-b+2) - meta$c[i] <- c-(i-a-d-b+2) - meta$b[i] <- 1 - meta$a[i] <- b+a-1 - meta$nobs[i] <- meta$a[i] + meta$b[i] + meta$c[i] + meta$d[i] - meta$switch[i] <- i-(a+d-1) - } - } -} -meta$cntrl_p <- meta$b/(meta$a + meta$b) -meta$tr_p <- meta$d/(meta$c + meta$d) -meta$pdif <- meta$tr_p - meta$cntrl_p - -###***find out significant thresholds -if (test == "chisq"){ - solution <- getswitch_chisq(a, b, c, d, thr_p, switch_trm) - meta$logodds <- log(meta$a*meta$d/meta$c/meta$b) -} - -if (test == "fisher"){ - solution <- getswitch_fisher(a, b, c, d, thr_p, switch_trm) - for (i in 1:(n_obs-3)){ - meta$logodds[i] <- log(fisher_oddsratio(meta$a[i], meta$b[i], meta$c[i], meta$d[i])) - } -} - -table_final <- solution$Transfer_Table -dcroddsratio_ob <- solution$dcroddsratio_ob - -meta$sig <- ifelse(meta$a==table_final[1,1] & meta$b==table_final[1,2] & meta$c==table_final[2,1] & meta$d==table_final[2,2], - 1,0) - -if (meta[meta$sig==1,]$logodds > 0){ - if (dcroddsratio_ob){#from sig to not sig by decreasing odds ratio - posinsig <- meta[meta$sig==1,]$switch - pos_thr <- (meta[meta$switch==posinsig,]$logodds+meta[meta$switch==(posinsig+1),]$logodds)/2 - pos_thr_pdif <- (meta[meta$switch==posinsig,]$pdif+meta[meta$switch==(posinsig+1),]$pdif)/2 - zoom_lower <- -(posinsig + 2) - meta$sigpoint <- ifelse(meta$switch==(posinsig+1),"positive","other") - } else {#from not sig to sig by increasing positive effect - possig <- meta[meta$sig==1,]$switch - pos_thr <- (meta[meta$switch==possig,]$logodds+meta[meta$switch==(possig-1),]$logodds)/2 - pos_thr_pdif <- (meta[meta$switch==possig,]$pdif+meta[meta$switch==(possig-1),]$pdif)/2 - zoom_lower <- -(possig + 2) - meta$sigpoint <- ifelse(meta$switch==possig,"positive","other") + + meta$current <- ifelse(meta$switch==0, "current", "other") + meta$currentlabel <- ifelse(meta$switch==0, "current", NA) + meta$sigpoint <- ifelse(meta$switch==0, "current",meta$sigpoint) + + if (switch_trm && dcroddsratio_ob) { + # transferway <- "treatment success to treatment failure," + meta$switch <- meta$switch*(-1) } - # find out the row that is cloest to logodds of 0 but negative - temp1 <- meta[meta$logodds<0,] - temp1 <- temp1[order(abs(temp1$logodds)),] - j <- 1 - if (test == "chisq"){ - while (chisq_p(temp1$a[j], temp1$b[j], temp1$c[j], temp1$d[j])>thr_p){ - j <- j+1 + + if (!switch_trm && dcroddsratio_ob) { + #transferway <- "control failure to control success," + meta$switch <- meta$switch*(-1) } - } - if (test == "fisher"){ - while (fisher_p(temp1$a[j], temp1$b[j], temp1$c[j], temp1$d[j])>thr_p){ - j <- j+1 + + fillcol <-c("current"="white", + "positive"="green4","negative"="red","other"="white") + pointshape <- c("current"=15,"other"=21) + + if (switch_trm && dcroddsratio_ob) { + meta$RIR <- ceiling(meta$switch/((a+c)/n_obs))*(replace=="entire") + + ceiling(meta$switch/(a/(a+b)))*(1-(replace=="entire")) } - } - neg_thr <- (temp1$logodds[j-1]+temp1$logodds[j])/2 - neg_thr_pdif <- (temp1$pdif[j-1]+temp1$pdif[j])/2 - zoom_upper <- -(temp1$switch[j]-2) - meta$sigpoint <- ifelse(meta$switch==temp1$switch[j],"negative",meta$sigpoint) -} - -if (meta[meta$sig==1,]$logodds < 0){ - if (dcroddsratio_ob){ # from not sig to sig by decreasing odds ratio - negsig <- meta[meta$sig==1,]$switch - neg_thr <- (meta[meta$switch==negsig,]$logodds+meta[meta$switch==(negsig+1),]$logodds)/2 - neg_thr_pdif <- (meta[meta$switch==negsig,]$pdif+meta[meta$switch==(negsig+1),]$pdif)/2 - zoom_upper <- -(negsig - 4) - meta$sigpoint <- ifelse(meta$switch==negsig,"negative","other") - } else { # from sig to not sig by increasing odds ratio that is smaller than 1 - neginsig <- meta[meta$sig==1,]$switch - neg_thr <- (meta[meta$switch==neginsig,]$logodds+meta[meta$switch==(neginsig-1),]$logodds)/2 - neg_thr_pdif <- (meta[meta$switch==neginsig,]$pdif+meta[meta$switch==(neginsig-1),]$pdif)/2 - zoom_upper <- -(neginsig - 2) - meta$sigpoint <- ifelse(meta$switch==neginsig-1,"negative","other") - } - # find out the row that is cloest to logodds of 0 but positive - temp1 <- meta[meta$logodds>0,] - temp1 <- temp1[order(abs(temp1$logodds)),] - j <- 1 - if (test == "chisq"){ - while (chisq_p(temp1$a[j], temp1$b[j], temp1$c[j], temp1$d[j])>thr_p){ - j <- j+1 + if (switch_trm && !dcroddsratio_ob) { + meta$RIR <- ceiling(meta$switch/((b+d)/n_obs))*(replace=="entire") + + ceiling(meta$switch/(b/(a+b)))*(1-(replace=="entire")) } - } - if (test == "fisher"){ - while (fisher_p(temp1$a[j], temp1$b[j], temp1$c[j], temp1$d[j])>thr_p){ - j <- j+1 + if (!switch_trm && dcroddsratio_ob) { + meta$RIR <- ceiling(meta$switch/((b+d)/n_obs))*(replace=="entire") + + ceiling(meta$switch/(b/(a+b)))*(1-(replace=="entire")) } - } - pos_thr <- (temp1$logodds[j-1]+temp1$logodds[j])/2 - pos_thr_pdif <- (temp1$pdif[j-1]+temp1$pdif[j])/2 - zoom_lower <- -(temp1$switch[j]+2) - meta$sigpoint <- ifelse(meta$switch==temp1$switch[j],"positive",meta$sigpoint) -} - -meta$current <- ifelse(meta$switch==0, "current", "other") -meta$currentlabel <- ifelse(meta$switch==0, "current", NA) -meta$sigpoint <- ifelse(meta$switch==0, "current",meta$sigpoint) - -if (switch_trm && dcroddsratio_ob) { - # transferway <- "treatment success to treatment failure," - meta$switch <- meta$switch*(-1) -} - -if (!switch_trm && dcroddsratio_ob) { - #transferway <- "control failure to control success," - meta$switch <- meta$switch*(-1) -} - -fillcol <-c("current"="white","positive"="green4","negative"="red","other"="white") -pointshape <- c("current"=15,"other"=21) - -if (switch_trm && dcroddsratio_ob) { - meta$RIR <- ceiling(meta$switch/((a+c)/n_obs))*(replace=="entire") + ceiling(meta$switch/(a/(a+b)))*(1-(replace=="entire")) -} -if (switch_trm && !dcroddsratio_ob) { - meta$RIR <- ceiling(meta$switch/((b+d)/n_obs))*(replace=="entire") + ceiling(meta$switch/(b/(a+b)))*(1-(replace=="entire")) -} -if (!switch_trm && dcroddsratio_ob) { - meta$RIR <- ceiling(meta$switch/((b+d)/n_obs))*(replace=="entire") + ceiling(meta$switch/(b/(a+b)))*(1-(replace=="entire")) -} -if (!switch_trm && !dcroddsratio_ob) { - meta$RIR <- ceiling(meta$switch/((a+c)/n_obs))*(replace=="entire") + ceiling(meta$switch/(a/(a+b)))*(1-(replace=="entire")) -} - -meta$xaxis <- paste(meta$RIR,"\n","(", meta$switch, ")", sep = "") - -fig1 <- ggplot2::ggplot(meta, ggplot2::aes_string(x="RIR", y="pdif"))+ - ggplot2::geom_line(ggplot2::aes_string(y="pdif"), size = 1) + - ggplot2::geom_point(ggplot2::aes_string(y="pdif", shape = "current",fill = "sigpoint"))+ - ggplot2::scale_fill_manual(values=fillcol)+ - ggplot2::scale_shape_manual(values=pointshape)+ - ggrepel::geom_label_repel(ggplot2::aes_string(label="currentlabel"))+ - ggplot2::geom_hline(yintercept = pos_thr_pdif, linetype = "dashed", color="green4", size = 1)+ - ggplot2::geom_hline(yintercept = neg_thr_pdif, linetype = "dashed", color="red", size = 1)+ - ggplot2::scale_y_continuous(name="Difference in probability of successful outcome (treatment - control)")+ - ggplot2::scale_x_continuous(name="RIR (Fragility)", - breaks= c(meta[meta$switch==0,]$RIR, meta[meta$sigpoint=="negative",]$RIR, - meta[meta$sigpoint=="positive",]$RIR), - labels= c(meta[meta$switch==0,]$xaxis, meta[meta$sigpoint=="negative",]$xaxis, - meta[meta$sigpoint=="positive",]$xaxis)) + - ggplot2::theme(#axis.title = ggplot2::element_text(size = 15), - #axis.text= ggplot2::element_text(size = 14), - panel.grid.major = ggplot2::element_blank(), - panel.grid.minor = ggplot2::element_blank(), - panel.background = ggplot2::element_blank(), - axis.line = ggplot2::element_line(colour = "black"), - legend.position = "none") - -zoom <- meta[meta$switch<=zoom_upper & meta$switch>=zoom_lower,] -zoom <- zoom[zoom$switch>=0,] - -if (switch_trm && dcroddsratio_ob) { - zoom <- zoom[zoom$RIR<=d,] -} -if (switch_trm && !dcroddsratio_ob) { - zoom <- zoom[zoom$RIR<=c,] -} -if (!switch_trm && dcroddsratio_ob) { - zoom <- zoom[zoom$RIR<=a,] -} -if (!switch_trm && !dcroddsratio_ob) { - zoom <- zoom[zoom$RIR<=b,] -} - -zoom$label <- ifelse(zoom$sigpoint=="positive", - paste("sig pos:RIR=", zoom[zoom$sigpoint=="positive",]$RIR),NA) -zoom$label <- ifelse(zoom$sigpoint=="negative", - paste("sig neg:RIR=", zoom[zoom$sigpoint=="negative",]$RIR),zoom$label) -zoom$label <- ifelse(zoom$sigpoint=="current", - paste("current"),zoom$label) - -fig2 <- ggplot2::ggplot(zoom, ggplot2::aes_string(x="RIR",y="pdif"))+ - ggplot2::geom_line(ggplot2::aes_string(y="pdif"), size = 1) + - ggplot2::geom_point(ggplot2::aes_string(y="pdif", shape = "current",fill = "sigpoint"), - size = 1)+ - ggrepel::geom_label_repel(ggplot2::aes_string(label="label"))+ - ggplot2::scale_fill_manual(values=fillcol)+ - ggplot2::scale_shape_manual(values=pointshape)+ - ggplot2::scale_y_continuous(name="Difference in probability of successful outcome (treatment - control)")+ - ggplot2::scale_x_continuous(name="RIR (Fragility)", - breaks= c(zoom$RIR[1], zoom$RIR[as.integer(length(zoom$RIR)/2)], - zoom$RIR[as.integer(length(zoom$RIR))]), - labels= c(zoom$xaxis[1], zoom$xaxis[as.integer(length(zoom$RIR)/2)], - zoom$xaxis[as.integer(length(zoom$RIR))])) + - ggplot2::theme(panel.grid.major = ggplot2::element_blank(), - panel.grid.minor = ggplot2::element_blank(), - panel.background = ggplot2::element_blank(), - axis.line = ggplot2::element_line(colour = "black"), - legend.position = "none") - -if (pos_thr_pdif <= max(zoom$pdif) && pos_thr_pdif >= min(zoom$pdif)) { - fig2 <- fig2 + ggplot2::geom_hline(yintercept = pos_thr_pdif, linetype = "dashed", color="green4", size = 1) -} - -if (neg_thr_pdif <= max(zoom$pdif) && neg_thr_pdif >= min(zoom$pdif)) { - fig2 <- fig2 + ggplot2::geom_hline(yintercept = neg_thr_pdif, linetype = "dashed", color="red", size = 1) -} - -###plot figure 3 RIS% as sample size gets larger, using t statistic as the criterion -# if (plt3 == TRUE) { -# meta3 <- data.frame(matrix(ncol = 7, nrow = 11)) -# colnames(meta3) <- c("a", "b", "c", "d", "nobs", "RIS", "RISperc_total", "RISperc_D") -# for (i in 1:11){ -# meta3$nobs[i] <- size <- 2^(i+5) -# meta3$a[i] <- a_i <- round(size/n_obs*a) -# meta3$b[i] <- b_i <- round(size/n_obs*b) -# meta3$c[i] <- c_i <- round(size/n_obs*c) -# meta3$d[i] <- d_i <- round(size/n_obs*d) -# table_i <- get_abcd_kfnl(a_i, b_i, c_i, d_i) -# thr_i_t <- stats::qt(1 - thr_p/2, size - 1) -# meta3$RIS[i] <- RIS_i <- getswitch(table_i, thr_t_i, switch_trm, size)$final_switch + getswitch(table_i, thr_t_i, switch_trm, size)$final_extra -# meta3$RISperc[i] <- RISperc_i <- RIS_i/size*100 -# } -# fig3 <- ggplot2::ggplot(meta3, aes(x=nobs, y=RISperc))+ -# geom_line(size = 1)+ -# geom_point(size = 2.5)+ -# labs(title = "RIS as % of Sample Size")+ -# scale_x_continuous(name="Sample Size", labels=scales::comma)+ -# scale_y_continuous(name="RIS%") + -# theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 12), -# axis.text = element_text(size = 12)) -#} - -if (switch_trm == T) { - note <- "A bend in line indicates switches from the control row because the treatment row was exhausted." - } else { - note <- "A bend in line indicates switches from the treatment row because the control row was exhausted." - } - -result <- list(fig1, note, fig2) - -return(result) + if (!switch_trm && !dcroddsratio_ob) { + meta$RIR <- ceiling(meta$switch/((a+c)/n_obs))*(replace=="entire") + + ceiling(meta$switch/(a/(a+b)))*(1-(replace=="entire")) + } + + meta$xaxis <- paste(meta$RIR,"\n","(", meta$switch, ")", sep = "") + + fig1 <- ggplot2::ggplot(meta, ggplot2::aes_string(x="RIR", y="pdif"))+ + ggplot2::geom_line(ggplot2::aes_string(y="pdif"), size = 1) + + ggplot2::geom_point(ggplot2::aes_string( + y="pdif", shape = "current",fill = "sigpoint"))+ + ggplot2::scale_fill_manual(values=fillcol)+ + ggplot2::scale_shape_manual(values=pointshape)+ + ggrepel::geom_label_repel(ggplot2::aes_string(label="currentlabel"))+ + ggplot2::geom_hline( + yintercept = pos_thr_pdif, + linetype = "dashed", color="green4", size = 1)+ + ggplot2::geom_hline( + yintercept = neg_thr_pdif, + linetype = "dashed", color="red", size = 1)+ + ggplot2::scale_y_continuous( + name="Difference in probability of + successful outcome (treatment - control)")+ + ggplot2::scale_x_continuous(name="RIR (Fragility)", + breaks= c(meta[meta$switch==0,]$RIR, meta[ + meta$sigpoint=="negative",]$RIR, + meta[meta$sigpoint=="positive", + ]$RIR), + labels= c(meta[meta$switch==0,]$xaxis, + meta[meta$sigpoint=="negative", + ]$xaxis, + meta[meta$sigpoint=="positive", + ]$xaxis)) + + ggplot2::theme(#axis.title = ggplot2::element_text(size = 15), + #axis.text= ggplot2::element_text(size = 14), + panel.grid.major = ggplot2::element_blank(), + panel.grid.minor = ggplot2::element_blank(), + panel.background = ggplot2::element_blank(), + axis.line = ggplot2::element_line(colour = "black"), + legend.position = "none") + + zoom <- meta[meta$switch<=zoom_upper & meta$switch>=zoom_lower,] + zoom <- zoom[zoom$switch>=0,] + + if (switch_trm && dcroddsratio_ob) { + zoom <- zoom[zoom$RIR<=d,] + } + if (switch_trm && !dcroddsratio_ob) { + zoom <- zoom[zoom$RIR<=c,] + } + if (!switch_trm && dcroddsratio_ob) { + zoom <- zoom[zoom$RIR<=a,] + } + if (!switch_trm && !dcroddsratio_ob) { + zoom <- zoom[zoom$RIR<=b,] + } + + zoom$label <- ifelse(zoom$sigpoint=="positive", + paste("sig pos:RIR=", zoom[zoom$sigpoint=="positive", + ]$RIR),NA) + zoom$label <- ifelse(zoom$sigpoint=="negative", + paste("sig neg:RIR=", zoom[zoom$sigpoint=="negative", + ]$RIR),zoom$label) + zoom$label <- ifelse(zoom$sigpoint=="current", + paste("current"),zoom$label) + + fig2 <- ggplot2::ggplot(zoom, ggplot2::aes_string(x="RIR",y="pdif"))+ + ggplot2::geom_line(ggplot2::aes_string(y="pdif"), size = 1) + + ggplot2::geom_point(ggplot2::aes_string(y="pdif", shape = "current",fill + = "sigpoint"), + size = 1)+ + ggrepel::geom_label_repel(ggplot2::aes_string(label="label"))+ + ggplot2::scale_fill_manual(values=fillcol)+ + ggplot2::scale_shape_manual(values=pointshape)+ + ggplot2::scale_y_continuous(name="Difference in probability of + successful outcome (treatment - control)")+ + ggplot2::scale_x_continuous(name="RIR (Fragility)", + breaks= c(zoom$RIR[1], zoom$RIR[ + as.integer(length(zoom$RIR)/2)], + zoom$RIR[as.integer( + length(zoom$RIR))]), + labels= c(zoom$xaxis[1], zoom$xaxis[ + as.integer(length(zoom$RIR)/2)], + zoom$xaxis[as.integer( + length(zoom$RIR))])) + + ggplot2::theme(panel.grid.major = ggplot2::element_blank(), + panel.grid.minor = ggplot2::element_blank(), + panel.background = ggplot2::element_blank(), + axis.line = ggplot2::element_line(colour = "black"), + legend.position = "none") + + if (pos_thr_pdif <= max(zoom$pdif) && pos_thr_pdif >= min(zoom$pdif)) { + fig2 <- fig2 + ggplot2::geom_hline(yintercept = pos_thr_pdif, linetype + = "dashed", color="green4", size = 1) + } + + if (neg_thr_pdif <= max(zoom$pdif) && neg_thr_pdif >= min(zoom$pdif)) { + fig2 <- fig2 + ggplot2::geom_hline(yintercept = neg_thr_pdif, linetype + = "dashed", color="red", size = 1) + } + + ###plot figure 3 RIS% as sample size gets larger, + ### using t statistic as the criterion + # if (plt3 == TRUE) { + # meta3 <- data.frame(matrix(ncol = 7, nrow = 11)) + # colnames(meta3) <- c("a", "b", "c", "d", "nobs", "RIS", "RISperc_total", + # "RISperc_D") + # for (i in 1:11){ + # meta3$nobs[i] <- size <- 2^(i+5) + # meta3$a[i] <- a_i <- round(size/n_obs*a) + # meta3$b[i] <- b_i <- round(size/n_obs*b) + # meta3$c[i] <- c_i <- round(size/n_obs*c) + # meta3$d[i] <- d_i <- round(size/n_obs*d) + # table_i <- get_abcd_kfnl(a_i, b_i, c_i, d_i) + # thr_i_t <- stats::qt(1 - thr_p/2, size - 1) + # meta3$RIS[i] <- RIS_i <- getswitch(table_i, thr_t_i, + # switch_trm, size)$final_switch + getswitch(table_i, thr_t_i, + # switch_trm, size)$final_extra + # meta3$RISperc[i] <- RISperc_i <- RIS_i/size*100 + # } + # fig3 <- ggplot2::ggplot(meta3, aes(x=nobs, y=RISperc))+ + # geom_line(size = 1)+ + # geom_point(size = 2.5)+ + # labs(title = "RIS as % of Sample Size")+ + # scale_x_continuous(name="Sample Size", labels=scales::comma)+ + # scale_y_continuous(name="RIS%") + + # theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 12), + # axis.text = element_text(size = 12)) + #} + + if (switch_trm == T) { + note <- "A bend in line indicates switches from the control + row because the treatment row was exhausted." + } else { + note <- "A bend in line indicates switches from the treatment row + because the control row was exhausted." + } + + result <- list(fig1, note, fig2) + + return(result) } From 96e340d429d8bcc3c2b2ffaf3292212a718b13d9 Mon Sep 17 00:00:00 2001 From: wwang93 <142034236+wwang93@users.noreply.github.com> Date: Tue, 21 Nov 2023 14:56:23 -0500 Subject: [PATCH 20/26] Update zzz.R Manual modification --- R/zzz.R | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/R/zzz.R b/R/zzz.R index 32b5dce8..01d8f048 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1,8 +1,14 @@ -## quiets concerns (notes) of R CMD check re: the vars that are evaluated using non-standard evaluation -if (getRversion() >= "2.15.1") utils::globalVariables(c("inference", "key", "replace_null_cases", "percent_bias", "val")) +## quiets concerns (notes) of R CMD check re: the vars that are evaluated +## using non-standard evaluation +if (getRversion() >= "2.15.1") + utils::globalVariables(c("inference", "key", "replace_null_cases", + "percent_bias", "val")) .onAttach <- function(libname, pkgname) { - packageStartupMessage("Sensitivity analysis as described in Frank, Maroulis, Duong, and Kelcey (2013) and in Frank (2000).\nFor more information visit http://konfound-it.com.") + packageStartupMessage("Sensitivity analysis as described in Frank, + Maroulis, Duong, and Kelcey (2013) and + in Frank (2000).\nFor more information visit + http://konfound-it.com.") } #' Open interactive web application for konfound @@ -11,8 +17,10 @@ if (getRversion() >= "2.15.1") utils::globalVariables(c("inference", "key", "rep #' @export launch_shiny <- function() { - utils::browseURL("http://konfound-it.com") + utils::browseURL("http://konfound-it.com") } -# addresses concerns (notes) of R CMD check re: the vars that are evaluated using non-standard evaluation -# if (getRversion() >= "2.15.1") utils::globalVariables(c("itcv", "term", "unstd_beta1", "var_name", "x", "y")) +# addresses concerns (notes) of R CMD check re: the vars that are +# evaluated using non-standard evaluation +# if (getRversion() >= "2.15.1") +# utils::globalVariables(c("itcv", "term", "unstd_beta1", "var_name", "x", "y")) From c6b3a7069e7e4c75dbeed5d71fd8c71e14ad92a7 Mon Sep 17 00:00:00 2001 From: wwang93 <142034236+wwang93@users.noreply.github.com> Date: Tue, 21 Nov 2023 15:07:34 -0500 Subject: [PATCH 21/26] Update konfound.R line 162, there is a link can't break lines --- R/konfound.R | 226 +++++++++++++++++++++++++++------------------------ 1 file changed, 121 insertions(+), 105 deletions(-) diff --git a/R/konfound.R b/R/konfound.R index e9ea1a84..18fdce62 100644 --- a/R/konfound.R +++ b/R/konfound.R @@ -1,12 +1,22 @@ #' Perform sensitivity analysis on fitted models -#' @description For fitted models, this command calculates (1) how much bias there must be in an estimate to invalidate/sustain an inference; (2) the impact of an omitted variable necessary to invalidate/sustain an inference for a regression coefficient. Currently works for: models created with lm() (linear models). +#' @description For fitted models, this command calculates +#' (1) how much bias there must be in an estimate to invalidate/sustain +#' an inference; (2) the impact of an omitted variable necessary to +#' invalidate/sustain an inference for a regression coefficient. +#' Currently works for: models created with lm() (linear models). #' @param model_object output from a model (currently works for: lm) -#' @param tested_variable Variable associated with the unstandardized beta coefficient to be tested +#' @param tested_variable Variable associated with the unstandardized beta +#' coefficient to be tested #' @inheritParams pkonfound -#' @param index whether output is RIR or IT (impact threshold); defaults to "RIR" -#' @param two_by_two whether or not the tested variable is a dichotomous variable in a GLM; if so, the 2X2 table approach is used; only works for single variables at present (so test_all = TRUE will return an error) -#' @param test_all whether to carry out the sensitivity test for all of the coefficients (defaults to FALSE) -#' @return prints the bias and the number of cases that would have to be replaced with cases for which there is no effect to invalidate the inference +#' @param index whether output is RIR or IT (impact threshold); +#' defaults to "RIR" +#' @param two_by_two whether or not the tested variable is a dichotomous +#' variable in a GLM; if so, the 2X2 table approach is used; only works for +#' single variables at present (so test_all = TRUE will return an error) +#' @param test_all whether to carry out the sensitivity test for all of the +#' coefficients (defaults to FALSE) +#' @return prints the bias and the number of cases that would have to be +#' replaced with cases for which there is no effect to invalidate the inference #' @importFrom rlang .data #' @examples #' # using lm() for linear models @@ -32,7 +42,8 @@ #' konfound(m3, Days) #' } #' -#' m4 <- glm(outcome ~ condition, data = binary_dummy_data, family = binomial(link = "logit")) +#' m4 <- glm(outcome ~ condition, data = binary_dummy_data, +#' family = binomial(link = "logit")) #' konfound(m4, condition, two_by_two = TRUE, n_treat = 55) #' @@ -49,120 +60,125 @@ konfound <- function(model_object, n_treat = NULL, switch_trm = TRUE, replace = "control") { - - # Stop messages - if (!(class(model_object)[1] %in% c("lm", "glm", "lmerMod"))) { - stop("konfound() is currently implemented for models estimated with + + # Stop messages + if (!(class(model_object)[1] %in% c("lm", "glm", "lmerMod"))) { + stop("konfound() is currently implemented for models estimated with lm(), glm(), and lme4::lmer(); consider using pkonfound() instead") - } - - if ("table" %in% to_return & test_all == TRUE){ - stop("cannot return a table when test_all is set to TRUE") - } - # Dealing with non-standard evaluation - - # dealing with non-standard evaluation - #(so unquoted names for tested_variable can be used) - tested_variable_enquo <- rlang::enquo(tested_variable) - tested_variable_string <- rlang::quo_name(tested_variable_enquo) - - # Dispatching based on class - if (class(model_object)[1] == "lm") { - output <- konfound_lm( - model_object = model_object, - tested_variable_string = tested_variable_string, - test_all = test_all, - alpha = alpha, - tails = tails, - index = index, - to_return = to_return - ) + } - if (is.null(output)) { - - } else { - return(output) + if ("table" %in% to_return & test_all == TRUE){ + stop("cannot return a table when test_all is set to TRUE") } - } - - if (inherits(model_object, "glm") & two_by_two == FALSE) { - message("Note that for a non-linear model, + # Dealing with non-standard evaluation + + # dealing with non-standard evaluation + #(so unquoted names for tested_variable can be used) + tested_variable_enquo <- rlang::enquo(tested_variable) + tested_variable_string <- rlang::quo_name(tested_variable_enquo) + + # Dispatching based on class + if (class(model_object)[1] == "lm") { + output <- konfound_lm( + model_object = model_object, + tested_variable_string = tested_variable_string, + test_all = test_all, + alpha = alpha, + tails = tails, + index = index, + to_return = to_return + ) + + if (is.null(output)) { + + } else { + return(output) + } + } + + if (inherits(model_object, "glm") & two_by_two == FALSE) { + message("Note that for a non-linear model, impact threshold should not be interpreted.") - message("Note that this is only an approximation. For exact results + message("Note that this is only an approximation. For exact results in terms of the number of cases that must be switched from treatment success to treatment failure to invalidate the inference see: - https://msu.edu/~kenfrank/non%20linear%20replacement%20treatment.xlsm") - message("If a dichotomous independent variable is used, consider using + https://msu.edu/~kenfrank/non%20linear%20replacement%20treatment.xlsm") + message("If a dichotomous independent variable is used, consider using the 2X2 table approach enabled with the argument two_by_two = TRUE") - output <- konfound_glm( - model_object = model_object, - tested_variable_string = tested_variable_string, - test_all = test_all, - alpha = alpha, - tails = tails, - to_return = to_return - ) + output <- konfound_glm( + model_object = model_object, + tested_variable_string = tested_variable_string, + test_all = test_all, + alpha = alpha, + tails = tails, + to_return = to_return + ) + + return(output) + } - return(output) - } - - if (inherits(model_object, "glm") & two_by_two == TRUE) { - - if(is.null(n_treat)){ - stop("Please provide a value for + if (inherits(model_object, "glm") & two_by_two == TRUE) { + + if(is.null(n_treat)){ + stop("Please provide a value for n_treat to use this functionality with a dichotomous predictor") - } - if (test_all == TRUE) { - stop("test_all = TRUE is not supported when two_by_two is specified") + } + if (test_all == TRUE) { + stop("test_all = TRUE is not supported + when two_by_two is specified") + } + output <- konfound_glm_dichotomous( + model_object = model_object, + tested_variable_string = tested_variable_string, + test_all = test_all, + alpha = alpha, + tails = tails, + to_return = to_return, + n_treat = n_treat, + switch_trm = switch_trm, + replace = replace + ) + + return(output) + } - output <- konfound_glm_dichotomous( - model_object = model_object, - tested_variable_string = tested_variable_string, - test_all = test_all, - alpha = alpha, - tails = tails, - to_return = to_return, - n_treat = n_treat, - switch_trm = switch_trm, - replace = replace - ) - - return(output) - } - - if (inherits(model_object, "lmerMod")) { - output <- konfound_lmer( - model_object = model_object, - tested_variable_string = tested_variable_string, - test_all = test_all, - alpha = alpha, - tails = tails, - index = index, - to_return = to_return - ) - - message("Note that the Kenward-Roger approximation is used to + if (inherits(model_object, "lmerMod")) { + output <- konfound_lmer( + model_object = model_object, + tested_variable_string = tested_variable_string, + test_all = test_all, + alpha = alpha, + tails = tails, + index = index, + to_return = to_return + ) + + message("Note that the Kenward-Roger approximation is used to estimate degrees of freedom for the predictor(s) of interest. We are presently working to add other methods for calculating the degrees of freedom for the predictor(s) of interest. If you wish to use other methods now, consider those detailed here: https://bbolker.github.io/mixedmodels-misc/glmmFAQ.html#why-doesnt-lme4-display-denominator-degrees-of-freedomp-values-what-other-options-do-i-have. - You can then enter degrees of freedom obtained from another method along with the coefficient, - number of observations, and number of covariates to the pkonfound() function to quantify the robustness of the inference.") + You can then enter degrees of freedom obtained from another method + along with the coefficient, + number of observations, and number of covariates to the pkonfound() + function to quantify the robustness of the inference.") + + return(output) + } - return(output) - } - - if (!("table" %in% to_return)) { - message("For more detailed output, consider setting `to_return` to table") - } - - if (test_all == FALSE) { - message("To consider other predictors of interest, + if (!("table" %in% to_return)) { + message("For more detailed output, + consider setting `to_return` to table") + } + + if (test_all == FALSE) { + message("To consider other predictors of interest, consider setting `test_all` to TRUE.") - } else { - message("Note that presently these predictors of interest are tested - independently; future output may use the approach used in mkonfound.") - } + } else { + message("Note that presently these predictors of interest are tested + independently; future output may use the approach used + in mkonfound.") + } } From 4b6cafc60758e390d65c75c88fe6ad002766dd09 Mon Sep 17 00:00:00 2001 From: wwang93 <142034236+wwang93@users.noreply.github.com> Date: Tue, 21 Nov 2023 15:13:40 -0500 Subject: [PATCH 22/26] Update cop_pse_auxiliary.R "R:222:235 R:248 over 80 characters cause of calculation formula" --- R/cop_pse_auxiliary.R | 109 ++++++++++++++++++++++++++---------------- 1 file changed, 67 insertions(+), 42 deletions(-) diff --git a/R/cop_pse_auxiliary.R b/R/cop_pse_auxiliary.R index cc958b81..8425616f 100644 --- a/R/cop_pse_auxiliary.R +++ b/R/cop_pse_auxiliary.R @@ -21,9 +21,17 @@ cal_rxy <- function(ryxGz, rxz, ryz){ return(rxy) } -cal_delta_star <- function(FR2max, R2, R2_uncond, est_eff, eff_thr, var_x, var_y, est_uncond, rxz, n_obs){ +cal_delta_star <- function(FR2max, + R2, R2_uncond, + est_eff, + eff_thr, + var_x, + var_y, + est_uncond, + rxz, + n_obs){ if (FR2max > .99) {FR2max = .99} - # if (FR2max < R2 + inci) {FR2max = R2 + inci} check with Ken what this means + # if (FR2max < R2 + inci) {FR2max = R2 + inci}check with Ken what this means if (FR2max > R2) {D = sqrt(FR2max - R2)} #elements for computing Oster's delta_star @@ -35,7 +43,8 @@ cal_delta_star <- function(FR2max, R2, R2_uncond, est_eff, eff_thr, var_x, var_y t_x = var_x * (n_obs / (n_obs - 1)) * (1 - rxz^2) ## adjust df for var_x ## var_x is population variance, need sample variance from x - ## this adjustment is to get closer to what robomit generates as they run regression using the sample data + ## this adjustment is to get closer to what robomit generates as they + ## run regression using the sample data num1 = bt_m_b * rt_m_ro_t_syy * t_x num2 = bt_m_b * var_x * t_x * b0_m_b1^2 num3 = 2 * bt_m_b^2 * (t_x * b0_m_b1 * var_x) @@ -115,8 +124,8 @@ verify_reg_Gzcv = function(n_obs, sdx, sdy, sdz, sdcv, flag_cov <- tryCatch( expr = { lavaan::sem(model, - sample.cov = cov.matrix, - sample.nobs = n_obs) + sample.cov = cov.matrix, + sample.nobs = n_obs) }, error = function(e){ flag_cov = F @@ -130,18 +139,24 @@ verify_reg_Gzcv = function(n_obs, sdx, sdy, sdz, sdcv, #if model can be run to verify true delta, then run it can save results if (class(flag_cov) == "lavaan") { fit <- lavaan::sem(model, - sample.cov = cov.matrix, - sample.nobs = n_obs) + sample.cov = cov.matrix, + sample.nobs = n_obs) ## the R2 extracted from summary is NOT right, do the calculation below R2 <- (sdy^2 - lavaan::parameterEstimates(fit)[4,]$est) / sdy^2 - betaX <- lavaan::parameterEstimates(fit)[lavaan::parameterEstimates(fit)$label == 'beta1',]$est - seX <- lavaan::parameterEstimates(fit)[lavaan::parameterEstimates(fit)$label == 'beta1',]$se - betaZ <- lavaan::parameterEstimates(fit)[lavaan::parameterEstimates(fit)$label == 'beta2',]$est - seZ <- lavaan::parameterEstimates(fit)[lavaan::parameterEstimates(fit)$label == 'beta2',]$se - betaCV <- lavaan::parameterEstimates(fit)[lavaan::parameterEstimates(fit)$label == 'beta3',]$est - seCV <- lavaan::parameterEstimates(fit)[lavaan::parameterEstimates(fit)$label == 'beta3',]$se + betaX <- lavaan::parameterEstimates(fit)[ + lavaan::parameterEstimates(fit)$label == 'beta1',]$est + seX <- lavaan::parameterEstimates(fit)[ + lavaan::parameterEstimates(fit)$label == 'beta1',]$se + betaZ <- lavaan::parameterEstimates(fit)[ + lavaan::parameterEstimates(fit)$label == 'beta2',]$est + seZ <- lavaan::parameterEstimates(fit)[ + lavaan::parameterEstimates(fit)$label == 'beta2',]$se + betaCV <- lavaan::parameterEstimates(fit)[ + lavaan::parameterEstimates(fit)$label == 'beta3',]$est + seCV <- lavaan::parameterEstimates(fit)[ + lavaan::parameterEstimates(fit)$label == 'beta3',]$se } - + #get regression based on true delta in terms of standardized coefficent cor.matrix <- matrix(c(1,rxy, rzy, rcvy, rxy, 1, rxz, rcvx, @@ -153,8 +168,8 @@ verify_reg_Gzcv = function(n_obs, sdx, sdy, sdz, sdcv, flag_cor <- tryCatch( expr = { lavaan::sem(model, - sample.cov = cor.matrix, - sample.nobs = n_obs) + sample.cov = cor.matrix, + sample.nobs = n_obs) }, error = function(e){ flag_cor = F @@ -169,15 +184,21 @@ verify_reg_Gzcv = function(n_obs, sdx, sdy, sdz, sdcv, # if model can be run, then run it if (class(flag_cor) == "lavaan") { fit <- lavaan::sem(model, - sample.cov = cor.matrix, - sample.nobs = n_obs) + sample.cov = cor.matrix, + sample.nobs = n_obs) std_R2 <- 1 - lavaan::parameterEstimates(fit)[4,]$est - std_betaX <- lavaan::parameterEstimates(fit)[lavaan::parameterEstimates(fit)$label == 'beta1',]$est - std_seX <- lavaan::parameterEstimates(fit)[lavaan::parameterEstimates(fit)$label == 'beta1',]$se - std_betaZ <- lavaan::parameterEstimates(fit)[lavaan::parameterEstimates(fit)$label == 'beta2',]$est - std_seZ <- lavaan::parameterEstimates(fit)[lavaan::parameterEstimates(fit)$label == 'beta2',]$se - std_betaCV <- lavaan::parameterEstimates(fit)[lavaan::parameterEstimates(fit)$label == 'beta3',]$est - std_seCV <- lavaan::parameterEstimates(fit)[lavaan::parameterEstimates(fit)$label == 'beta3',]$se + std_betaX <- lavaan::parameterEstimates(fit)[ + lavaan::parameterEstimates(fit)$label == 'beta1',]$est + std_seX <- lavaan::parameterEstimates(fit)[ + lavaan::parameterEstimates(fit)$label == 'beta1',]$se + std_betaZ <- lavaan::parameterEstimates(fit)[ + lavaan::parameterEstimates(fit)$label == 'beta2',]$est + std_seZ <- lavaan::parameterEstimates(fit)[ + lavaan::parameterEstimates(fit)$label == 'beta2',]$se + std_betaCV <- lavaan::parameterEstimates(fit)[ + lavaan::parameterEstimates(fit)$label == 'beta3',]$est + std_seCV <- lavaan::parameterEstimates(fit)[ + lavaan::parameterEstimates(fit)$label == 'beta3',]$se } if (class(flag_cor) == "lavaan" && class(flag_cov) == "lavaan") { @@ -247,8 +268,8 @@ verify_reg_Gz = function(n_obs, sdx, sdy, sdz, rxy, rxz, rzy){ flag_cov <- tryCatch( expr = { lavaan::sem(model, - sample.cov = cov.matrix, - sample.nobs = n_obs) + sample.cov = cov.matrix, + sample.nobs = n_obs) }, error = function(e){ flag_cov = F @@ -262,16 +283,20 @@ verify_reg_Gz = function(n_obs, sdx, sdy, sdz, rxy, rxz, rzy){ #if model can be run to verify true delta, then run it can save results if (class(flag_cov) == "lavaan") { fit <- lavaan::sem(model, - sample.cov = cov.matrix, - sample.nobs = n_obs) + sample.cov = cov.matrix, + sample.nobs = n_obs) ## the R2 extracted from summary is NOT right, do the calculation below R2 <- (sdy^2 - lavaan::parameterEstimates(fit)[3,]$est) / sdy^2 - betaX <- lavaan::parameterEstimates(fit)[lavaan::parameterEstimates(fit)$label == 'beta1',]$est - seX <- lavaan::parameterEstimates(fit)[lavaan::parameterEstimates(fit)$label == 'beta1',]$se - betaZ <- lavaan::parameterEstimates(fit)[lavaan::parameterEstimates(fit)$label == 'beta2',]$est - seZ <- lavaan::parameterEstimates(fit)[lavaan::parameterEstimates(fit)$label == 'beta2',]$se - } - + betaX <- lavaan::parameterEstimates(fit)[ + lavaan::parameterEstimates(fit)$label == 'beta1',]$est + seX <- lavaan::parameterEstimates(fit)[ + lavaan::parameterEstimates(fit)$label == 'beta1',]$se + betaZ <- lavaan::parameterEstimates(fit)[ + lavaan::parameterEstimates(fit)$label == 'beta2',]$est + seZ <- lavaan::parameterEstimates(fit)[ + lavaan::parameterEstimates(fit)$label == 'beta2',]$se + } + if (class(flag_cov) == "lavaan") { result = list(R2, betaX, seX, betaZ, seZ) return(result) @@ -294,8 +319,8 @@ verify_reg_uncond = function(n_obs, sdx, sdy, rxy){ flag_cov <- tryCatch( expr = { lavaan::sem(model, - sample.cov = cov.matrix, - sample.nobs = n_obs) + sample.cov = cov.matrix, + sample.nobs = n_obs) }, error = function(e){ flag_cov = F @@ -309,12 +334,14 @@ verify_reg_uncond = function(n_obs, sdx, sdy, rxy){ #if model can be run to verify true delta, then run it can save results if (class(flag_cov) == "lavaan") { fit <- lavaan::sem(model, - sample.cov = cov.matrix, - sample.nobs = n_obs) + sample.cov = cov.matrix, + sample.nobs = n_obs) ## the R2 extracted from summary is NOT right, do the calculation below R2 <- (sdy^2 - lavaan::parameterEstimates(fit)[2,]$est) / sdy^2 - betaX <- lavaan::parameterEstimates(fit)[lavaan::parameterEstimates(fit)$label == 'beta1',]$est - seX <- lavaan::parameterEstimates(fit)[lavaan::parameterEstimates(fit)$label == 'beta1',]$se + betaX <- lavaan::parameterEstimates(fit)[ + lavaan::parameterEstimates(fit)$label == 'beta1',]$est + seX <- lavaan::parameterEstimates(fit)[ + lavaan::parameterEstimates(fit)$label == 'beta1',]$se } if (class(flag_cov) == "lavaan") { @@ -324,5 +351,3 @@ verify_reg_uncond = function(n_obs, sdx, sdy, rxy){ stop("Error!") } } - - From 49b663463d72501ca872e6fe35ca24c8f091b31d Mon Sep 17 00:00:00 2001 From: wwang93 <142034236+wwang93@users.noreply.github.com> Date: Tue, 21 Nov 2023 15:14:43 -0500 Subject: [PATCH 23/26] Update concord1.R Manual modification: no more than 80 characters per line of code --- R/concord1.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/R/concord1.R b/R/concord1.R index a406e2f1..9346f9a0 100644 --- a/R/concord1.R +++ b/R/concord1.R @@ -4,6 +4,7 @@ #' #' @docType data #' @name concord1 -#' @references Hamilton, Lawrence C. 1983. Saving water: A causal model of household conservation. Sociological Perspectives 26(4):355-374. +#' @references Hamilton, Lawrence C. 1983. Saving water: +#' A causal model of household conservation. Sociological Perspectives 26(4):355-374. #' @format A data.frame with 496 rows and 10 variables. -NULL \ No newline at end of file +NULL From 39f3418a22e86fe2412f1d31505edd3f4e467176 Mon Sep 17 00:00:00 2001 From: wwang93 <142034236+wwang93@users.noreply.github.com> Date: Tue, 21 Nov 2023 15:16:54 -0500 Subject: [PATCH 24/26] Update mkonfound-data.R Manual modification: no more than 80 characters per line of code --- R/mkonfound-data.R | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/R/mkonfound-data.R b/R/mkonfound-data.R index db063821..bf16f4f2 100644 --- a/R/mkonfound-data.R +++ b/R/mkonfound-data.R @@ -1,7 +1,9 @@ #' Example data for the mkonfound function #' -#' A dataset containing t and df values from example studies from Educational Evaluation -#' and Policy Analysis (as detailed in Frank et al., 2013): https://drive.google.com/file/d/1aGhxGjvMvEPVAgOA8rrxvA97uUO5TTMe/view +#' A dataset containing t and df values from example studies +#' from Educational Evaluation +#' and Policy Analysis (as detailed in Frank et al., 2013): +#' https://drive.google.com/file/d/1aGhxGjvMvEPVAgOA8rrxvA97uUO5TTMe/view #' #' @format A data frame with 30 rows and 2 variables: #' \describe{ @@ -9,5 +11,6 @@ #' \item{df}{degrees of freedom associated with the t value} #' ... #' } -#' @source \url{https://drive.google.com/file/d/1aGhxGjvMvEPVAgOA8rrxvA97uUO5TTMe/view} -"mkonfound_ex" \ No newline at end of file +#' @source \url +#' {https://drive.google.com/file/d/1aGhxGjvMvEPVAgOA8rrxvA97uUO5TTMe/view} +"mkonfound_ex" From 5aedde0cc225cc8deb5b7d31044f33b8b0db3a85 Mon Sep 17 00:00:00 2001 From: wwang93 <142034236+wwang93@users.noreply.github.com> Date: Tue, 21 Nov 2023 15:59:53 -0500 Subject: [PATCH 25/26] Update tkonfound_fig.R --- R/tkonfound_fig.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/tkonfound_fig.R b/R/tkonfound_fig.R index a91a11aa..0add9b3d 100644 --- a/R/tkonfound_fig.R +++ b/R/tkonfound_fig.R @@ -199,7 +199,7 @@ tkonfound_fig <- function(a, b, c, d, thr_p = 0.05, switch_trm = T, zoom_upper <- -(negsig - 4) meta$sigpoint <- ifelse(meta$switch==negsig,"negative","other") } else { - # from sig to not sig by increasing odds ratio that is smaller than 1 + # from sig to not sig by increasing odds ratio that is smaller than 1 neginsig <- meta[meta$sig==1,]$switch neg_thr <- (meta[meta$switch==neginsig,]$logodds+ meta[meta$switch==(neginsig-1),]$logodds)/2 From 2c428400c8d0f977c65002654f6f78cbfcce9424 Mon Sep 17 00:00:00 2001 From: wwang93 <142034236+wwang93@users.noreply.github.com> Date: Tue, 21 Nov 2023 16:00:39 -0500 Subject: [PATCH 26/26] Update tkonfound_fig.R --- R/tkonfound_fig.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/tkonfound_fig.R b/R/tkonfound_fig.R index 0add9b3d..4f7e5f65 100644 --- a/R/tkonfound_fig.R +++ b/R/tkonfound_fig.R @@ -199,7 +199,8 @@ tkonfound_fig <- function(a, b, c, d, thr_p = 0.05, switch_trm = T, zoom_upper <- -(negsig - 4) meta$sigpoint <- ifelse(meta$switch==negsig,"negative","other") } else { - # from sig to not sig by increasing odds ratio that is smaller than 1 + # from sig to not sig by increasing odds ratio + # that is smaller than 1 neginsig <- meta[meta$sig==1,]$switch neg_thr <- (meta[meta$switch==neginsig,]$logodds+ meta[meta$switch==(neginsig-1),]$logodds)/2