Merge pull request #28 from RANDCorporation/fix_issue_27

Fix issue #27
RANDCorporation · Jan 16, 2025 · 140cdf5 · 140cdf5
2 parents 43acc6a + c013e3b
commit 140cdf5
Showing 8 changed files with 156 additions and 42 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: optic
 Title: Simulation Tool for Causal Inference Using Longitudinal Data
-Version: 1.0.1
+Version: 1.0.2
 Authors@R: c(
     person("Beth Ann", "Griffin", , "bethg@rand.org", role = c("aut", "cph"),
            comment = c(ORCID = "0000-0002-2391-4601")),

diff --git a/R/apply-treatment-effect.R b/R/apply-treatment-effect.R
@@ -16,12 +16,10 @@
 #' @param concurrent bool for whether this is concurrent run or not
 #' 
 #' @noRd
-apply_treatment_effect <- function(x, model_formula, model_call, te, effect_direction, concurrent) {
-  # identify outcome
-  outcome <- model_terms(model_formula)[["lhs"]]
+apply_treatment_effect <- function(x, model_formula, outcome, model_call, te, effect_direction, concurrent){
 
   # identify additive or multiplicative modification of outcome required
-  if (model_call == "lm" | model_call == "feols" | model_call == "multisynth" | model_call == 'lmer') {
+  if (model_call != "glm.nb") {
     modifier <- "additive"
   } else if (model_call == "glm.nb") {
     modifier <- "multiplicative"
@@ -46,9 +44,11 @@ apply_treatment_effect <- function(x, model_formula, model_call, te, effect_dire
       } else {
         x[[outcome]] <- x[[outcome]] + (x[[outcome]] * te * x[["treatment"]])
       }
-      x[[outcome]] <- round2(x[[outcome]], 0)
+
+      #l
+      # x[[outcome]] <- round2(x[[outcome]], 0)
     } 
 
     return(x)
   }
-}
+}
diff --git a/R/concurrent-methods.R b/R/concurrent-methods.R
@@ -183,7 +183,8 @@ concurrent_premodel <- function(model_simulation) {
     model_call=model$model_call,
     te=c(model_simulation$effect_magnitude1, model_simulation$effect_magnitude2),
     effect_direction=model_simulation$effect_direction,
-    concurrent=TRUE
+    concurrent=TRUE,
+    outcome = outcome
   )
 
   # if autoregressive, need to add lag for crude rate

diff --git a/R/no-confounding-methods.R b/R/no-confounding-methods.R
@@ -45,11 +45,12 @@ noconf_sample <- function(single_simulation) {
   model_type = names(single_simulation$models)
 
   # If there is at least one did model
-  if(!any(model_type == "drdid")){
+  if(!any(model_type == "did")){
     outcomes <- unique(sapply(single_simulation$models, function(x) { optic::model_terms(x[["model_formula"]])[["lhs"]] }))
   }else{
-    outcomes <- as.character(single_simulation$models$drdid$model_args$yname)
+    outcomes <- as.character(single_simulation$models$did$model_args$yname)
   }
+
   ###############################################
   ### IDENTIFY TREATED UNITS AND TIME PERIODS ###
   ###############################################
@@ -206,8 +207,15 @@ noconf_premodel <- function(model_simulation) {
 
   x <- model_simulation$data
   model <- model_simulation$models
-  outcome <- optic::model_terms(model$model_formula)[["lhs"]]
-  oo <- dplyr::sym(outcome)
+
+  if (model$type != "did") {
+    outcome <- optic::model_terms(model$model_formula)[["lhs"]]
+    oo <- dplyr::sym(outcome)
+  } else if (model$type=='did') {
+    outcome <- as.character(model_simulation$models$model_args$yname)
+    oo <- dplyr::sym(outcome)
+  }
+
   model_type <- model$type
   balance_statistics <- NULL
 
@@ -221,38 +229,34 @@ noconf_premodel <- function(model_simulation) {
     model_call=model$model_call,
     te=c(model_simulation$effect_magnitude),
     effect_direction=model_simulation$effect_direction,
-    concurrent=FALSE
+    concurrent=FALSE,
+    outcome = outcome
   )
+
+  unit_sym <- dplyr::sym(model_simulation$unit_var)
+  time_sym <- dplyr::sym(model_simulation$time_var)
 
   # PNL note
   # this implementation does not seem to use the lagged crude rate
 
   # if autoregressive, need to add lag for crude rate
   # when outcome is deaths, derive new crude rate from modified outcome
+  # if autoregressive, need to add lagged outcome
   if (model_type == "autoreg") {
 
-    # get lag of crude rate and add it to the model
-    unit_sym <- dplyr::sym(model_simulation$unit_var)
-    time_sym <- dplyr::sym(model_simulation$time_var)
-
     x <- x %>%
       dplyr::arrange(!!unit_sym, !!time_sym) %>%
       dplyr::group_by(!!unit_sym) %>%
       dplyr::mutate(lag_outcome = dplyr::lag(!!oo, n=1L)) %>%
       dplyr::ungroup()
 
-    if(model$model_call == "feols"){
       formula_components <- as.character(model_simulation$models$model_formula)
       updated_3 <- strsplit(formula_components[3], " | ", fixed=TRUE)
-      if(length(updated_3[[1]]==1)){
-        new_fmla <- as.formula(paste(formula_components[2], formula_components[1], updated_3[[1]][1], "+ lag_outcome"))
-      }else{
-        new_fmla <- as.formula(paste(formula_components[2], formula_components[1], updated_3[[1]][1], "+ lag_outcome |", updated_3[[1]][2]))
-      }
+
+      new_fmla <- as.formula(paste(formula_components[2], formula_components[1], updated_3[[1]][[1]], "+ lag_outcome"))
+
       model_simulation$models$model_formula <- new_fmla
-    }else{
-      model_simulation$models$model_formula <- update.formula(model_simulation$models$model_formula, ~ . + lag_outcome)
-    }
+
   } else if (model_type == "multisynth") {
     x$treatment[x$treatment > 0] <- 1
     x$treatment_level[x$treatment_level > 0] <- 1
@@ -261,9 +265,11 @@ noconf_premodel <- function(model_simulation) {
     if (sum(is.na(x[[outcome]])) > 0) {
       stop("multisynth method cannot handle missingness in outcome.")
     }
-  } else if (model_type == "drdid") {
+  } else if (model_type == "did") {
+
     x$treatment[x$treatment > 0] <- 1
     x$treatment_level[x$treatment_level > 0] <- 1
+
   }
 
   # get balance information
@@ -305,6 +311,53 @@ noconf_premodel <- function(model_simulation) {
               mu0 = mean((!!oo)[treatment == 0], na.rm=T),
               sd = sd(!!oo, na.rm=T))
   bal_stats = bind_cols(bal_stats, bal_stats2)
+
+  # Add on first year for treatment, which we will use to define
+  # treatment status across models.
+
+  x <- x %>% 
+    group_by(!!unit_sym) %>%
+    mutate(treatment_date = ifelse(max(trt_ind == 1), max(year ^ (1-treatment)), 0)) %>% 
+    ungroup()
+
+  # Depending on the model, we may need to recode treatment year or
+  # treatment date:
+
+  if (model_type == "reg"|model_type == "autoreg"){
+
+    # Sun and Abraham models expect treatment year to be Inf, if untreated:
+    x <- x %>% 
+      mutate(treatment_date = ifelse(treatment_date == 0, Inf, treatment_date))
+
+  }else if (model_type == "did"){
+
+    # CSA models expect treatment year to be Inf, if untreated and expects
+    # location to be a numeric variable
+    x <- x %>% 
+      mutate(treatment_date = ifelse(treatment_date == 0, Inf, treatment_date),
+             !!unit_sym := as.numeric(as.factor(!!unit_sym)))
+
+  }else if (model_type == "multisynth"){
+
+    # Multisynth expects treatment to be either a 0/1 (sampling process uses this
+    # variable instrumentally to produce draws of treatment effect. So reset to 
+    # binary)
+
+    x <- x %>% mutate(treatment = ifelse(treatment > 0, 1, 0))
+
+  }else if (model_type == "did_imputation"){
+
+    # I added this section for completeness, in case we want to modify later.
+    #Borusyak, Jaravel, and Spiess expects treatment_date to be zero for
+    # untreated units. So no need to change data coding.
+
+  }else if (model_type == "did2s"){
+
+    # Similar to multisynth, we need treatment to again be binary for
+    # the first stage 
+    x <- x %>% mutate(treatment = ifelse(treatment > 0, 1, 0))
+
+  }
 
   model_simulation$balance_statistics <- bal_stats
   model_simulation$data <- x
@@ -329,14 +382,30 @@ noconf_premodel <- function(model_simulation) {
 #' @noRd
 noconf_model <- function(model_simulation) {
   model <- model_simulation$models
+  model_type <- model_simulation$models$type
+  addtl_args <- model$model_args
+
   x <- model_simulation$data
 
-  if (model_simulation$models$name == "drdid") {
-    args = c(list(data=x), model[["model_args"]])
-  } else if(model$model_call == "feols"){
-    args = c(list(data=x, fml=model[["model_formula"]]), model[["model_args"]], notes=FALSE)
-  }else {
-    args = c(list(data=x, formula=model[["model_formula"]]), model[["model_args"]])
+  # The only consistent argument across these
+  # models is "data". We need to also pass along user provided arguments
+  # and set some arguments based off the specific model type 
+
+  args <- list(data=x)
+
+  if (length(addtl_args) >= 1){
+    args <- append(args, addtl_args)
+  }
+
+  # Change names of provided arguments to meet the needs of respective packages
+  if (model_type == "reg"|model_type == "autoreg"){
+    args[['formula']] <- model$model_formula
+  }else if (model_type == "multisynth"){
+    args[['form']] <-  model$model_formula
+  }else if (model_type == "did_imputation"){
+    args[['horizon']] <- T
+  }else if (model_type == "did2s"){
+    args[['treatment']] <- 'treatment'
   }
 
   m <- do.call(
@@ -361,7 +430,12 @@ noconf_model <- function(model_simulation) {
 #' @importFrom stats resid
 #' @noRd
 noconf_postmodel <- function(model_simulation) {
-  outcome <- optic::model_terms(model_simulation$models[["model_formula"]])[["lhs"]]
+
+  if (model_simulation$models[["type"]] == "did"){
+    outcome <- as.character(model_simulation$models$model_args$yname)
+  }else{
+    outcome <- optic::model_terms(model_simulation$models[["model_formula"]])[["lhs"]]
+  }
   # get run metadata to merge in after
   meta_data <- data.frame(
     model_name = model_simulation$models$name,
@@ -375,10 +449,11 @@ noconf_postmodel <- function(model_simulation) {
     n_units=model_simulation$n_units,
     effect_direction=model_simulation$effect_direction  
   )
+
+  m <- model_simulation$model_result
 
   # get model result information and apply standard error adjustments
-  if (model_simulation$models[["type"]] != "multisynth") {
-    m <- model_simulation$model_result
+  if (model_simulation$models[["type"]] == "reg"|model_simulation$models[["type"]] == "autoreg"|model_simulation$models[["type"]] == "did2s") {
     if(model_simulation$models$model_call=="feols"){
       cf <- as.data.frame(summary(m)$coeftable)
     } else{
@@ -413,7 +488,31 @@ noconf_postmodel <- function(model_simulation) {
       mse=mean(resid(m)^2, na.rm=T),
       stringsAsFactors=FALSE
     )
-  } else {
+  } else if (model_simulation$models[["type"]] == "did"){
+    m_agg <- did::aggte(m, type='group', na.rm=T)
+
+    cf <- data.frame("estimate" = m_agg$overall.att,
+                     "se" = m_agg$overall.se)
+
+    estimate <- cf$estimate
+    se <- cf$se
+    variance <- se ^ 2
+    t_stat <- NA
+    p_value <- 2 * pnorm(abs(estimate/se), lower.tail = FALSE)
+
+    results <- data.frame(
+        outcome=outcome,
+        se_adjustment="none",
+        estimate=estimate,
+        se=se,
+        variance=variance,
+        t_stat=NA,
+        p_value=p_value,
+        mse = NA,
+        stringsAsFactors=FALSE
+      )
+  }
+  else{
     m <- model_simulation$model_result
     cf <- summary(m)
     cf <- cf$att
@@ -588,9 +687,9 @@ noconf_postmodel <- function(model_simulation) {
   }
 
   results <- left_join(results, meta_data, by="outcome")
-  results <- left_join(results, model_simulation$balance_statistics, by="outcome")
 
   return(results)
+
 }
 
 

diff --git a/R/optic-model-class.R b/R/optic-model-class.R
@@ -67,7 +67,7 @@ optic_model <- function(name, type,call, formula, se_adjust, ...) {
 # Constructor:
 new_optic_model <- function(
                   name,
-                  type= c("reg", "autoreg", "multisynth", "drdid"),
+                  type= c("reg", "autoreg", "multisynth", "did"),
                   call= c("lm", "feols", "multisynth", "glm.nb"),
                   formula,
                   args=list(weights=as.name('population')),

diff --git a/man/optic_model.Rd b/man/optic_model.Rd
diff --git a/vignettes/intro_optic.Rmd b/vignettes/intro_optic.Rmd
@@ -141,15 +141,15 @@ then attempt to estimate this effect based on user-provided models. The
 
 -   `name`: A name for the model to identify the model type in results.
 
--   `type`: Users can set this as either "autoreg", "drdid",
+-   `type`: Users can set this as either "autoreg", "did",
     "multisynth", or "reg".
 
     -   "reg" uses a typical regression framework, implementing the
         procedure chosen in `model_call`.
 
     -   "autoreg" adds a dependent lag to the model formula.
 
-    -   "drdid" estimates a treatment effect using a doubly-robust
+    -   "did" estimates a treatment effect using a doubly-robust
         difference-in-difference estimator, with covariates in the
         `model_formula` argument used within both the propensity score
         stage and  outcome modeling stage (for more details on doubly robust

diff --git a/vignettes/optic_refs.json b/vignettes/optic_refs.json
@@ -139,6 +139,20 @@
 			]
 		}
 	},
+	{
+		"id":"griffin2023identifying",
+		"author":[{"family":"Griffin","given":"Beth Ann"},{"family":"Schuler","given":"Megan S"},{"family":"Stone","given":"Elizabeth M"},{"family":"Patrick","given":"Stephen W"},{"family":"Stein","given":"Bradley D"},{"family":"De Lima","given":"Pedro Nascimento"},{"family":"Griswold","given":"Max"},{"family":"Scherling","given":"Adam"},{"family":"Stuart","given":"Elizabeth A"}],
+		"citation-key":"griffin2023identifying",
+		"container-title":"Epidemiology (Cambridge, Mass.)",
+		"container-title-short":"Epidemiology",
+		"issue":"6",
+		"issued":{"date-parts":[["2023"]]},
+		"page":"856–864",
+		"publisher":"LWW",
+		"title":"Identifying optimal methods for addressing confounding bias when estimating the effects of state-level policies",
+		"type":"article-journal",
+		"URL":"https://journals.lww.com/epidem/abstract/2023/11000/identifying_optimal_methods_for_addressing.14.aspx","volume":"34"
+	},
 	{
 		"id": "ben-michaelAugmentedSyntheticControl2021",
 		"type": "article-journal",