From ff23efacaf01b8e3609ed94794090254f6e1a83a Mon Sep 17 00:00:00 2001 From: maxgriswold Date: Wed, 15 Jan 2025 16:01:01 -0800 Subject: [PATCH 1/4] Fixing noconf CSA models (#27) Updated no confounding code so that CSA models work throughout OPTIC. Fixed errors in autoreg model formulas. Updated apply_treatment_effect code, then modified concurrent method to work with new implementation. Updated code so that additional new model types will work with noconfounding (however, still need to update code for standard errors using these methods). This does not fix issues using CSA when model is concurrent! --- R/apply-treatment-effect.R | 12 +-- R/concurrent-methods.R | 3 +- R/no-confounding-methods.R | 159 ++++++++++++++++++++++++++++++------- R/optic-model-class.R | 2 +- man/optic_model.Rd | 2 +- vignettes/intro_optic.Rmd | 4 +- 6 files changed, 141 insertions(+), 41 deletions(-) diff --git a/R/apply-treatment-effect.R b/R/apply-treatment-effect.R index 09c9531..9af50c4 100644 --- a/R/apply-treatment-effect.R +++ b/R/apply-treatment-effect.R @@ -16,12 +16,10 @@ #' @param concurrent bool for whether this is concurrent run or not #' #' @noRd -apply_treatment_effect <- function(x, model_formula, model_call, te, effect_direction, concurrent) { - # identify outcome - outcome <- model_terms(model_formula)[["lhs"]] +apply_treatment_effect <- function(x, model_formula, outcome, model_call, te, effect_direction, concurrent){ # identify additive or multiplicative modification of outcome required - if (model_call == "lm" | model_call == "feols" | model_call == "multisynth" | model_call == 'lmer') { + if (model_call != "glm.nb") { modifier <- "additive" } else if (model_call == "glm.nb") { modifier <- "multiplicative" @@ -46,9 +44,11 @@ apply_treatment_effect <- function(x, model_formula, model_call, te, effect_dire } else { x[[outcome]] <- x[[outcome]] + (x[[outcome]] * te * x[["treatment"]]) } - x[[outcome]] <- round2(x[[outcome]], 0) + + #l + # x[[outcome]] <- round2(x[[outcome]], 0) } return(x) } -} \ No newline at end of file +} diff --git a/R/concurrent-methods.R b/R/concurrent-methods.R index 24341c2..dfdf2fb 100644 --- a/R/concurrent-methods.R +++ b/R/concurrent-methods.R @@ -183,7 +183,8 @@ concurrent_premodel <- function(model_simulation) { model_call=model$model_call, te=c(model_simulation$effect_magnitude1, model_simulation$effect_magnitude2), effect_direction=model_simulation$effect_direction, - concurrent=TRUE + concurrent=TRUE, + outcome = outcome ) # if autoregressive, need to add lag for crude rate diff --git a/R/no-confounding-methods.R b/R/no-confounding-methods.R index 5f0faa0..592f783 100644 --- a/R/no-confounding-methods.R +++ b/R/no-confounding-methods.R @@ -45,11 +45,12 @@ noconf_sample <- function(single_simulation) { model_type = names(single_simulation$models) # If there is at least one did model - if(!any(model_type == "drdid")){ + if(!any(model_type == "did")){ outcomes <- unique(sapply(single_simulation$models, function(x) { optic::model_terms(x[["model_formula"]])[["lhs"]] })) }else{ - outcomes <- as.character(single_simulation$models$drdid$model_args$yname) + outcomes <- as.character(single_simulation$models$did$model_args$yname) } + ############################################### ### IDENTIFY TREATED UNITS AND TIME PERIODS ### ############################################### @@ -206,8 +207,15 @@ noconf_premodel <- function(model_simulation) { x <- model_simulation$data model <- model_simulation$models - outcome <- optic::model_terms(model$model_formula)[["lhs"]] - oo <- dplyr::sym(outcome) + + if (model$type != "did") { + outcome <- optic::model_terms(model$model_formula)[["lhs"]] + oo <- dplyr::sym(outcome) + } else if (model$type=='did') { + outcome <- as.character(model_simulation$models$model_args$yname) + oo <- dplyr::sym(outcome) + } + model_type <- model$type balance_statistics <- NULL @@ -221,38 +229,34 @@ noconf_premodel <- function(model_simulation) { model_call=model$model_call, te=c(model_simulation$effect_magnitude), effect_direction=model_simulation$effect_direction, - concurrent=FALSE + concurrent=FALSE, + outcome = outcome ) + + unit_sym <- dplyr::sym(model_simulation$unit_var) + time_sym <- dplyr::sym(model_simulation$time_var) # PNL note # this implementation does not seem to use the lagged crude rate # if autoregressive, need to add lag for crude rate # when outcome is deaths, derive new crude rate from modified outcome + # if autoregressive, need to add lagged outcome if (model_type == "autoreg") { - # get lag of crude rate and add it to the model - unit_sym <- dplyr::sym(model_simulation$unit_var) - time_sym <- dplyr::sym(model_simulation$time_var) - x <- x %>% dplyr::arrange(!!unit_sym, !!time_sym) %>% dplyr::group_by(!!unit_sym) %>% dplyr::mutate(lag_outcome = dplyr::lag(!!oo, n=1L)) %>% dplyr::ungroup() - if(model$model_call == "feols"){ formula_components <- as.character(model_simulation$models$model_formula) updated_3 <- strsplit(formula_components[3], " | ", fixed=TRUE) - if(length(updated_3[[1]]==1)){ - new_fmla <- as.formula(paste(formula_components[2], formula_components[1], updated_3[[1]][1], "+ lag_outcome")) - }else{ - new_fmla <- as.formula(paste(formula_components[2], formula_components[1], updated_3[[1]][1], "+ lag_outcome |", updated_3[[1]][2])) - } + + new_fmla <- as.formula(paste(formula_components[2], formula_components[1], updated_3[[1]][[1]], "+ lag_outcome")) + model_simulation$models$model_formula <- new_fmla - }else{ - model_simulation$models$model_formula <- update.formula(model_simulation$models$model_formula, ~ . + lag_outcome) - } + } else if (model_type == "multisynth") { x$treatment[x$treatment > 0] <- 1 x$treatment_level[x$treatment_level > 0] <- 1 @@ -261,9 +265,11 @@ noconf_premodel <- function(model_simulation) { if (sum(is.na(x[[outcome]])) > 0) { stop("multisynth method cannot handle missingness in outcome.") } - } else if (model_type == "drdid") { + } else if (model_type == "did") { + x$treatment[x$treatment > 0] <- 1 x$treatment_level[x$treatment_level > 0] <- 1 + } # get balance information @@ -305,6 +311,53 @@ noconf_premodel <- function(model_simulation) { mu0 = mean((!!oo)[treatment == 0], na.rm=T), sd = sd(!!oo, na.rm=T)) bal_stats = bind_cols(bal_stats, bal_stats2) + + # Add on first year for treatment, which we will use to define + # treatment status across models. + + x <- x %>% + group_by(!!unit_sym) %>% + mutate(treatment_date = ifelse(max(trt_ind == 1), max(year ^ (1-treatment)), 0)) %>% + ungroup() + + # Depending on the model, we may need to recode treatment year or + # treatment date: + + if (model_type == "reg"|model_type == "autoreg"){ + + # Sun and Abraham models expect treatment year to be Inf, if untreated: + x <- x %>% + mutate(treatment_date = ifelse(treatment_date == 0, Inf, treatment_date)) + + }else if (model_type == "did"){ + + # CSA models expect treatment year to be Inf, if untreated and expects + # location to be a numeric variable + x <- x %>% + mutate(treatment_date = ifelse(treatment_date == 0, Inf, treatment_date), + !!unit_sym := as.numeric(as.factor(!!unit_sym))) + + }else if (model_type == "multisynth"){ + + # Multisynth expects treatment to be either a 0/1 (sampling process uses this + # variable instrumentally to produce draws of treatment effect. So reset to + # binary) + + x <- x %>% mutate(treatment = ifelse(treatment > 0, 1, 0)) + + }else if (model_type == "did_imputation"){ + + # I added this section for completeness, in case we want to modify later. + #Borusyak, Jaravel, and Spiess expects treatment_date to be zero for + # untreated units. So no need to change data coding. + + }else if (model_type == "did2s"){ + + # Similar to multisynth, we need treatment to again be binary for + # the first stage + x <- x %>% mutate(treatment = ifelse(treatment > 0, 1, 0)) + + } model_simulation$balance_statistics <- bal_stats model_simulation$data <- x @@ -329,14 +382,30 @@ noconf_premodel <- function(model_simulation) { #' @noRd noconf_model <- function(model_simulation) { model <- model_simulation$models + model_type <- model_simulation$models$type + addtl_args <- model$model_args + x <- model_simulation$data - if (model_simulation$models$name == "drdid") { - args = c(list(data=x), model[["model_args"]]) - } else if(model$model_call == "feols"){ - args = c(list(data=x, fml=model[["model_formula"]]), model[["model_args"]], notes=FALSE) - }else { - args = c(list(data=x, formula=model[["model_formula"]]), model[["model_args"]]) + # The only consistent argument across these + # models is "data". We need to also pass along user provided arguments + # and set some arguments based off the specific model type + + args <- list(data=x) + + if (length(addtl_args) >= 1){ + args <- append(args, addtl_args) + } + + # Change names of provided arguments to meet the needs of respective packages + if (model_type == "reg"|model_type == "autoreg"){ + args[['formula']] <- model$model_formula + }else if (model_type == "multisynth"){ + args[['form']] <- model$model_formula + }else if (model_type == "did_imputation"){ + args[['horizon']] <- T + }else if (model_type == "did2s"){ + args[['treatment']] <- 'treatment' } m <- do.call( @@ -361,7 +430,12 @@ noconf_model <- function(model_simulation) { #' @importFrom stats resid #' @noRd noconf_postmodel <- function(model_simulation) { - outcome <- optic::model_terms(model_simulation$models[["model_formula"]])[["lhs"]] + + if (model_simulation$models[["type"]] == "did"){ + outcome <- as.character(model_simulation$models$model_args$yname) + }else{ + outcome <- optic::model_terms(model_simulation$models[["model_formula"]])[["lhs"]] + } # get run metadata to merge in after meta_data <- data.frame( model_name = model_simulation$models$name, @@ -375,10 +449,11 @@ noconf_postmodel <- function(model_simulation) { n_units=model_simulation$n_units, effect_direction=model_simulation$effect_direction ) + + m <- model_simulation$model_result # get model result information and apply standard error adjustments - if (model_simulation$models[["type"]] != "multisynth") { - m <- model_simulation$model_result + if (model_simulation$models[["type"]] == "reg"|model_simulation$models[["type"]] == "autoreg"|model_simulation$models[["type"]] == "did2s") { if(model_simulation$models$model_call=="feols"){ cf <- as.data.frame(summary(m)$coeftable) } else{ @@ -413,7 +488,31 @@ noconf_postmodel <- function(model_simulation) { mse=mean(resid(m)^2, na.rm=T), stringsAsFactors=FALSE ) - } else { + } else if (model_simulation$models[["type"]] == "did"){ + m_agg <- did::aggte(m, type='group', na.rm=T) + + cf <- data.frame("estimate" = m_agg$overall.att, + "se" = m_agg$overall.se) + + estimate <- cf$estimate + se <- cf$se + variance <- se ^ 2 + t_stat <- NA + p_value <- 2 * pnorm(abs(estimate/se), lower.tail = FALSE) + + results <- data.frame( + outcome=outcome, + se_adjustment="none", + estimate=estimate, + se=se, + variance=variance, + t_stat=NA, + p_value=p_value, + mse = NA, + stringsAsFactors=FALSE + ) + } + else{ m <- model_simulation$model_result cf <- summary(m) cf <- cf$att @@ -588,9 +687,9 @@ noconf_postmodel <- function(model_simulation) { } results <- left_join(results, meta_data, by="outcome") - results <- left_join(results, model_simulation$balance_statistics, by="outcome") return(results) + } diff --git a/R/optic-model-class.R b/R/optic-model-class.R index 7d9615d..4245991 100644 --- a/R/optic-model-class.R +++ b/R/optic-model-class.R @@ -67,7 +67,7 @@ optic_model <- function(name, type,call, formula, se_adjust, ...) { # Constructor: new_optic_model <- function( name, - type= c("reg", "autoreg", "multisynth", "drdid"), + type= c("reg", "autoreg", "multisynth", "did"), call= c("lm", "feols", "multisynth", "glm.nb"), formula, args=list(weights=as.name('population')), diff --git a/man/optic_model.Rd b/man/optic_model.Rd index e5a4ad6..606ed50 100644 --- a/man/optic_model.Rd +++ b/man/optic_model.Rd @@ -9,7 +9,7 @@ optic_model(name, type, call, formula, se_adjust, ...) \arguments{ \item{name}{Name of the model object, used to identify the model when reviewing simulation results} -\item{type}{Estimator used to identify the treatment effect using simulated data. Specified as a string, which can either be 'reg' (regression), 'autoreg' (autoregression, which adds a lag for the outcome variable to a regression model), 'drdid' (doubly-robust difference-in-difference estimator), or 'multisynth' (augmented synthetic control)} +\item{type}{Estimator used to identify the treatment effect using simulated data. Specified as a string, which can either be 'reg' (regression), 'autoreg' (autoregression, which adds a lag for the outcome variable to a regression model), 'did' (doubly-robust difference-in-difference estimator), or 'multisynth' (augmented synthetic control)} \item{call}{String which specifies the R function to call for applying the estimator. Package currently supports either 'lm' (linear model), 'feols' (fixed-effect OLS), 'multisynth' (pooled synthetic controls), or 'glm.nb' (negative-binomial generalized nearlized linear model)} diff --git a/vignettes/intro_optic.Rmd b/vignettes/intro_optic.Rmd index 20995d8..3d3cef2 100644 --- a/vignettes/intro_optic.Rmd +++ b/vignettes/intro_optic.Rmd @@ -141,7 +141,7 @@ then attempt to estimate this effect based on user-provided models. The - `name`: A name for the model to identify the model type in results. -- `type`: Users can set this as either "autoreg", "drdid", +- `type`: Users can set this as either "autoreg", "did", "multisynth", or "reg". - "reg" uses a typical regression framework, implementing the @@ -149,7 +149,7 @@ then attempt to estimate this effect based on user-provided models. The - "autoreg" adds a dependent lag to the model formula. - - "drdid" estimates a treatment effect using a doubly-robust + - "did" estimates a treatment effect using a doubly-robust difference-in-difference estimator, with covariates in the `model_formula` argument used within both the propensity score stage and outcome modeling stage (for more details on doubly robust From 479ed2e28cf8dcc66393c55f764e7f1994dcb364 Mon Sep 17 00:00:00 2001 From: maxgriswold Date: Wed, 15 Jan 2025 16:11:13 -0800 Subject: [PATCH 2/4] Incrementing minor version number --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index f9242aa..95d4dcd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: optic Title: Simulation Tool for Causal Inference Using Longitudinal Data -Version: 1.0.1 +Version: 1.1.1 Authors@R: c( person("Beth Ann", "Griffin", , "bethg@rand.org", role = c("aut", "cph"), comment = c(ORCID = "0000-0002-2391-4601")), From 25a85f7f44a37ef8e58dcf7aa2db8033cc9de96b Mon Sep 17 00:00:00 2001 From: maxgriswold Date: Wed, 15 Jan 2025 16:28:08 -0800 Subject: [PATCH 3/4] Updating references Found issue in json file - there was a missing comma. --- vignettes/optic_refs.json | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/vignettes/optic_refs.json b/vignettes/optic_refs.json index 296123b..b8b81dc 100644 --- a/vignettes/optic_refs.json +++ b/vignettes/optic_refs.json @@ -139,6 +139,20 @@ ] } }, + { + "id":"griffin2023identifying", + "author":[{"family":"Griffin","given":"Beth Ann"},{"family":"Schuler","given":"Megan S"},{"family":"Stone","given":"Elizabeth M"},{"family":"Patrick","given":"Stephen W"},{"family":"Stein","given":"Bradley D"},{"family":"De Lima","given":"Pedro Nascimento"},{"family":"Griswold","given":"Max"},{"family":"Scherling","given":"Adam"},{"family":"Stuart","given":"Elizabeth A"}], + "citation-key":"griffin2023identifying", + "container-title":"Epidemiology (Cambridge, Mass.)", + "container-title-short":"Epidemiology", + "issue":"6", + "issued":{"date-parts":[["2023"]]}, + "page":"856–864", + "publisher":"LWW", + "title":"Identifying optimal methods for addressing confounding bias when estimating the effects of state-level policies", + "type":"article-journal", + "URL":"https://journals.lww.com/epidem/abstract/2023/11000/identifying_optimal_methods_for_addressing.14.aspx","volume":"34" + }, { "id": "ben-michaelAugmentedSyntheticControl2021", "type": "article-journal", From c013e3b55d8b7eace36625c9b85824c98bf9a1bc Mon Sep 17 00:00:00 2001 From: maxgriswold Date: Thu, 16 Jan 2025 09:43:20 -0800 Subject: [PATCH 4/4] Mod version number Changing to patch iteration rather than minor versioning. --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 95d4dcd..86f247a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: optic Title: Simulation Tool for Causal Inference Using Longitudinal Data -Version: 1.1.1 +Version: 1.0.2 Authors@R: c( person("Beth Ann", "Griffin", , "bethg@rand.org", role = c("aut", "cph"), comment = c(ORCID = "0000-0002-2391-4601")),