From ff23efacaf01b8e3609ed94794090254f6e1a83a Mon Sep 17 00:00:00 2001
From: maxgriswold <max.griswold.43@gmail.com>
Date: Wed, 15 Jan 2025 16:01:01 -0800
Subject: [PATCH 1/4] Fixing noconf CSA models (#27)

Updated no confounding code so that CSA models work throughout OPTIC. Fixed errors in autoreg model formulas. Updated apply_treatment_effect code, then modified concurrent method to work with new implementation. Updated code so that additional new model types will work with noconfounding (however, still need to update code for standard errors using these methods).

This does not fix issues using CSA when model is concurrent!
---
 R/apply-treatment-effect.R |  12 +--
 R/concurrent-methods.R     |   3 +-
 R/no-confounding-methods.R | 159 ++++++++++++++++++++++++++++++-------
 R/optic-model-class.R      |   2 +-
 man/optic_model.Rd         |   2 +-
 vignettes/intro_optic.Rmd  |   4 +-
 6 files changed, 141 insertions(+), 41 deletions(-)

diff --git a/R/apply-treatment-effect.R b/R/apply-treatment-effect.R
index 09c9531..9af50c4 100644
--- a/R/apply-treatment-effect.R
+++ b/R/apply-treatment-effect.R
@@ -16,12 +16,10 @@
 #' @param concurrent bool for whether this is concurrent run or not
 #' 
 #' @noRd
-apply_treatment_effect <- function(x, model_formula, model_call, te, effect_direction, concurrent) {
-  # identify outcome
-  outcome <- model_terms(model_formula)[["lhs"]]
+apply_treatment_effect <- function(x, model_formula, outcome, model_call, te, effect_direction, concurrent){
   
   # identify additive or multiplicative modification of outcome required
-  if (model_call == "lm" | model_call == "feols" | model_call == "multisynth" | model_call == 'lmer') {
+  if (model_call != "glm.nb") {
     modifier <- "additive"
   } else if (model_call == "glm.nb") {
     modifier <- "multiplicative"
@@ -46,9 +44,11 @@ apply_treatment_effect <- function(x, model_formula, model_call, te, effect_dire
       } else {
         x[[outcome]] <- x[[outcome]] + (x[[outcome]] * te * x[["treatment"]])
       }
-      x[[outcome]] <- round2(x[[outcome]], 0)
+      
+      #l
+      # x[[outcome]] <- round2(x[[outcome]], 0)
     } 
     
     return(x)
   }
-}
\ No newline at end of file
+}
diff --git a/R/concurrent-methods.R b/R/concurrent-methods.R
index 24341c2..dfdf2fb 100644
--- a/R/concurrent-methods.R
+++ b/R/concurrent-methods.R
@@ -183,7 +183,8 @@ concurrent_premodel <- function(model_simulation) {
     model_call=model$model_call,
     te=c(model_simulation$effect_magnitude1, model_simulation$effect_magnitude2),
     effect_direction=model_simulation$effect_direction,
-    concurrent=TRUE
+    concurrent=TRUE,
+    outcome = outcome
   )
   
   # if autoregressive, need to add lag for crude rate
diff --git a/R/no-confounding-methods.R b/R/no-confounding-methods.R
index 5f0faa0..592f783 100644
--- a/R/no-confounding-methods.R
+++ b/R/no-confounding-methods.R
@@ -45,11 +45,12 @@ noconf_sample <- function(single_simulation) {
   model_type = names(single_simulation$models)
   
   # If there is at least one did model
-  if(!any(model_type == "drdid")){
+  if(!any(model_type == "did")){
     outcomes <- unique(sapply(single_simulation$models, function(x) { optic::model_terms(x[["model_formula"]])[["lhs"]] }))
   }else{
-    outcomes <- as.character(single_simulation$models$drdid$model_args$yname)
+    outcomes <- as.character(single_simulation$models$did$model_args$yname)
   }
+  
   ###############################################
   ### IDENTIFY TREATED UNITS AND TIME PERIODS ###
   ###############################################
@@ -206,8 +207,15 @@ noconf_premodel <- function(model_simulation) {
   
   x <- model_simulation$data
   model <- model_simulation$models
-  outcome <- optic::model_terms(model$model_formula)[["lhs"]]
-  oo <- dplyr::sym(outcome)
+
+  if (model$type != "did") {
+    outcome <- optic::model_terms(model$model_formula)[["lhs"]]
+    oo <- dplyr::sym(outcome)
+  } else if (model$type=='did') {
+    outcome <- as.character(model_simulation$models$model_args$yname)
+    oo <- dplyr::sym(outcome)
+  }
+
   model_type <- model$type
   balance_statistics <- NULL
   
@@ -221,38 +229,34 @@ noconf_premodel <- function(model_simulation) {
     model_call=model$model_call,
     te=c(model_simulation$effect_magnitude),
     effect_direction=model_simulation$effect_direction,
-    concurrent=FALSE
+    concurrent=FALSE,
+    outcome = outcome
   )
+
+  unit_sym <- dplyr::sym(model_simulation$unit_var)
+  time_sym <- dplyr::sym(model_simulation$time_var)
   
   # PNL note
   # this implementation does not seem to use the lagged crude rate
   
   # if autoregressive, need to add lag for crude rate
   # when outcome is deaths, derive new crude rate from modified outcome
+  # if autoregressive, need to add lagged outcome
   if (model_type == "autoreg") {
     
-    # get lag of crude rate and add it to the model
-    unit_sym <- dplyr::sym(model_simulation$unit_var)
-    time_sym <- dplyr::sym(model_simulation$time_var)
-    
     x <- x %>%
       dplyr::arrange(!!unit_sym, !!time_sym) %>%
       dplyr::group_by(!!unit_sym) %>%
       dplyr::mutate(lag_outcome = dplyr::lag(!!oo, n=1L)) %>%
       dplyr::ungroup()
     
-    if(model$model_call == "feols"){
       formula_components <- as.character(model_simulation$models$model_formula)
       updated_3 <- strsplit(formula_components[3], " | ", fixed=TRUE)
-      if(length(updated_3[[1]]==1)){
-        new_fmla <- as.formula(paste(formula_components[2], formula_components[1], updated_3[[1]][1], "+ lag_outcome"))
-      }else{
-        new_fmla <- as.formula(paste(formula_components[2], formula_components[1], updated_3[[1]][1], "+ lag_outcome |", updated_3[[1]][2]))
-      }
+      
+      new_fmla <- as.formula(paste(formula_components[2], formula_components[1], updated_3[[1]][[1]], "+ lag_outcome"))
+    
       model_simulation$models$model_formula <- new_fmla
-    }else{
-      model_simulation$models$model_formula <- update.formula(model_simulation$models$model_formula, ~ . + lag_outcome)
-    }
+   
   } else if (model_type == "multisynth") {
     x$treatment[x$treatment > 0] <- 1
     x$treatment_level[x$treatment_level > 0] <- 1
@@ -261,9 +265,11 @@ noconf_premodel <- function(model_simulation) {
     if (sum(is.na(x[[outcome]])) > 0) {
       stop("multisynth method cannot handle missingness in outcome.")
     }
-  } else if (model_type == "drdid") {
+  } else if (model_type == "did") {
+    
     x$treatment[x$treatment > 0] <- 1
     x$treatment_level[x$treatment_level > 0] <- 1
+
   }
   
   # get balance information
@@ -305,6 +311,53 @@ noconf_premodel <- function(model_simulation) {
               mu0 = mean((!!oo)[treatment == 0], na.rm=T),
               sd = sd(!!oo, na.rm=T))
   bal_stats = bind_cols(bal_stats, bal_stats2)
+
+  # Add on first year for treatment, which we will use to define
+  # treatment status across models.
+  
+  x <- x %>% 
+    group_by(!!unit_sym) %>%
+    mutate(treatment_date = ifelse(max(trt_ind == 1), max(year ^ (1-treatment)), 0)) %>% 
+    ungroup()
+
+  # Depending on the model, we may need to recode treatment year or
+  # treatment date:
+
+  if (model_type == "reg"|model_type == "autoreg"){
+    
+    # Sun and Abraham models expect treatment year to be Inf, if untreated:
+    x <- x %>% 
+      mutate(treatment_date = ifelse(treatment_date == 0, Inf, treatment_date))
+    
+  }else if (model_type == "did"){
+    
+    # CSA models expect treatment year to be Inf, if untreated and expects
+    # location to be a numeric variable
+    x <- x %>% 
+      mutate(treatment_date = ifelse(treatment_date == 0, Inf, treatment_date),
+             !!unit_sym := as.numeric(as.factor(!!unit_sym)))
+    
+  }else if (model_type == "multisynth"){
+    
+    # Multisynth expects treatment to be either a 0/1 (sampling process uses this
+    # variable instrumentally to produce draws of treatment effect. So reset to 
+    # binary)
+    
+    x <- x %>% mutate(treatment = ifelse(treatment > 0, 1, 0))
+    
+  }else if (model_type == "did_imputation"){
+    
+    # I added this section for completeness, in case we want to modify later.
+    #Borusyak, Jaravel, and Spiess expects treatment_date to be zero for
+    # untreated units. So no need to change data coding.
+    
+  }else if (model_type == "did2s"){
+    
+    # Similar to multisynth, we need treatment to again be binary for
+    # the first stage 
+    x <- x %>% mutate(treatment = ifelse(treatment > 0, 1, 0))
+    
+  }
   
   model_simulation$balance_statistics <- bal_stats
   model_simulation$data <- x
@@ -329,14 +382,30 @@ noconf_premodel <- function(model_simulation) {
 #' @noRd
 noconf_model <- function(model_simulation) {
   model <- model_simulation$models
+  model_type <- model_simulation$models$type
+  addtl_args <- model$model_args
+  
   x <- model_simulation$data
   
-  if (model_simulation$models$name == "drdid") {
-    args = c(list(data=x), model[["model_args"]])
-  } else if(model$model_call == "feols"){
-    args = c(list(data=x, fml=model[["model_formula"]]), model[["model_args"]], notes=FALSE)
-  }else {
-    args = c(list(data=x, formula=model[["model_formula"]]), model[["model_args"]])
+  # The only consistent argument across these
+  # models is "data". We need to also pass along user provided arguments
+  # and set some arguments based off the specific model type 
+  
+  args <- list(data=x)
+  
+  if (length(addtl_args) >= 1){
+    args <- append(args, addtl_args)
+  }
+  
+  # Change names of provided arguments to meet the needs of respective packages
+  if (model_type == "reg"|model_type == "autoreg"){
+    args[['formula']] <- model$model_formula
+  }else if (model_type == "multisynth"){
+    args[['form']] <-  model$model_formula
+  }else if (model_type == "did_imputation"){
+    args[['horizon']] <- T
+  }else if (model_type == "did2s"){
+    args[['treatment']] <- 'treatment'
   }
   
   m <- do.call(
@@ -361,7 +430,12 @@ noconf_model <- function(model_simulation) {
 #' @importFrom stats resid
 #' @noRd
 noconf_postmodel <- function(model_simulation) {
-  outcome <- optic::model_terms(model_simulation$models[["model_formula"]])[["lhs"]]
+
+  if (model_simulation$models[["type"]] == "did"){
+    outcome <- as.character(model_simulation$models$model_args$yname)
+  }else{
+    outcome <- optic::model_terms(model_simulation$models[["model_formula"]])[["lhs"]]
+  }
   # get run metadata to merge in after
   meta_data <- data.frame(
     model_name = model_simulation$models$name,
@@ -375,10 +449,11 @@ noconf_postmodel <- function(model_simulation) {
     n_units=model_simulation$n_units,
     effect_direction=model_simulation$effect_direction  
   )
+
+  m <- model_simulation$model_result
   
   # get model result information and apply standard error adjustments
-  if (model_simulation$models[["type"]] != "multisynth") {
-    m <- model_simulation$model_result
+  if (model_simulation$models[["type"]] == "reg"|model_simulation$models[["type"]] == "autoreg"|model_simulation$models[["type"]] == "did2s") {
     if(model_simulation$models$model_call=="feols"){
       cf <- as.data.frame(summary(m)$coeftable)
     } else{
@@ -413,7 +488,31 @@ noconf_postmodel <- function(model_simulation) {
       mse=mean(resid(m)^2, na.rm=T),
       stringsAsFactors=FALSE
     )
-  } else {
+  } else if (model_simulation$models[["type"]] == "did"){
+    m_agg <- did::aggte(m, type='group', na.rm=T)
+
+    cf <- data.frame("estimate" = m_agg$overall.att,
+                     "se" = m_agg$overall.se)
+          
+    estimate <- cf$estimate
+    se <- cf$se
+    variance <- se ^ 2
+    t_stat <- NA
+    p_value <- 2 * pnorm(abs(estimate/se), lower.tail = FALSE)
+
+    results <- data.frame(
+        outcome=outcome,
+        se_adjustment="none",
+        estimate=estimate,
+        se=se,
+        variance=variance,
+        t_stat=NA,
+        p_value=p_value,
+        mse = NA,
+        stringsAsFactors=FALSE
+      )
+  }
+  else{
     m <- model_simulation$model_result
     cf <- summary(m)
     cf <- cf$att
@@ -588,9 +687,9 @@ noconf_postmodel <- function(model_simulation) {
   }
   
   results <- left_join(results, meta_data, by="outcome")
-  results <- left_join(results, model_simulation$balance_statistics, by="outcome")
   
   return(results)
+
 }
 
 
diff --git a/R/optic-model-class.R b/R/optic-model-class.R
index 7d9615d..4245991 100644
--- a/R/optic-model-class.R
+++ b/R/optic-model-class.R
@@ -67,7 +67,7 @@ optic_model <- function(name, type,call, formula, se_adjust, ...) {
 # Constructor:
 new_optic_model <- function(
                   name,
-                  type= c("reg", "autoreg", "multisynth", "drdid"),
+                  type= c("reg", "autoreg", "multisynth", "did"),
                   call= c("lm", "feols", "multisynth", "glm.nb"),
                   formula,
                   args=list(weights=as.name('population')),
diff --git a/man/optic_model.Rd b/man/optic_model.Rd
index e5a4ad6..606ed50 100644
--- a/man/optic_model.Rd
+++ b/man/optic_model.Rd
@@ -9,7 +9,7 @@ optic_model(name, type, call, formula, se_adjust, ...)
 \arguments{
 \item{name}{Name of the model object, used to identify the model when reviewing simulation results}
 
-\item{type}{Estimator used to identify the treatment effect using simulated data. Specified as a string, which can either be 'reg' (regression), 'autoreg' (autoregression, which adds a lag for the outcome variable to a regression model), 'drdid' (doubly-robust difference-in-difference estimator), or 'multisynth' (augmented synthetic control)}
+\item{type}{Estimator used to identify the treatment effect using simulated data. Specified as a string, which can either be 'reg' (regression), 'autoreg' (autoregression, which adds a lag for the outcome variable to a regression model), 'did' (doubly-robust difference-in-difference estimator), or 'multisynth' (augmented synthetic control)}
 
 \item{call}{String which specifies the R function to call for applying the estimator. Package currently supports  either 'lm' (linear model), 'feols' (fixed-effect OLS), 'multisynth' (pooled synthetic controls), or 'glm.nb' (negative-binomial generalized nearlized linear model)}
 
diff --git a/vignettes/intro_optic.Rmd b/vignettes/intro_optic.Rmd
index 20995d8..3d3cef2 100644
--- a/vignettes/intro_optic.Rmd
+++ b/vignettes/intro_optic.Rmd
@@ -141,7 +141,7 @@ then attempt to estimate this effect based on user-provided models. The
 
 -   `name`: A name for the model to identify the model type in results.
 
--   `type`: Users can set this as either "autoreg", "drdid",
+-   `type`: Users can set this as either "autoreg", "did",
     "multisynth", or "reg".
 
     -   "reg" uses a typical regression framework, implementing the
@@ -149,7 +149,7 @@ then attempt to estimate this effect based on user-provided models. The
 
     -   "autoreg" adds a dependent lag to the model formula.
 
-    -   "drdid" estimates a treatment effect using a doubly-robust
+    -   "did" estimates a treatment effect using a doubly-robust
         difference-in-difference estimator, with covariates in the
         `model_formula` argument used within both the propensity score
         stage and  outcome modeling stage (for more details on doubly robust

From 479ed2e28cf8dcc66393c55f764e7f1994dcb364 Mon Sep 17 00:00:00 2001
From: maxgriswold <max.griswold.43@gmail.com>
Date: Wed, 15 Jan 2025 16:11:13 -0800
Subject: [PATCH 2/4] Incrementing minor version number

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index f9242aa..95d4dcd 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: optic
 Title: Simulation Tool for Causal Inference Using Longitudinal Data
-Version: 1.0.1
+Version: 1.1.1
 Authors@R: c(
     person("Beth Ann", "Griffin", , "bethg@rand.org", role = c("aut", "cph"),
            comment = c(ORCID = "0000-0002-2391-4601")),

From 25a85f7f44a37ef8e58dcf7aa2db8033cc9de96b Mon Sep 17 00:00:00 2001
From: maxgriswold <max.griswold.43@gmail.com>
Date: Wed, 15 Jan 2025 16:28:08 -0800
Subject: [PATCH 3/4] Updating references

Found issue in json file - there was a missing comma.
---
 vignettes/optic_refs.json | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/vignettes/optic_refs.json b/vignettes/optic_refs.json
index 296123b..b8b81dc 100644
--- a/vignettes/optic_refs.json
+++ b/vignettes/optic_refs.json
@@ -139,6 +139,20 @@
 			]
 		}
 	},
+	{
+		"id":"griffin2023identifying",
+		"author":[{"family":"Griffin","given":"Beth Ann"},{"family":"Schuler","given":"Megan S"},{"family":"Stone","given":"Elizabeth M"},{"family":"Patrick","given":"Stephen W"},{"family":"Stein","given":"Bradley D"},{"family":"De Lima","given":"Pedro Nascimento"},{"family":"Griswold","given":"Max"},{"family":"Scherling","given":"Adam"},{"family":"Stuart","given":"Elizabeth A"}],
+		"citation-key":"griffin2023identifying",
+		"container-title":"Epidemiology (Cambridge, Mass.)",
+		"container-title-short":"Epidemiology",
+		"issue":"6",
+		"issued":{"date-parts":[["2023"]]},
+		"page":"856–864",
+		"publisher":"LWW",
+		"title":"Identifying optimal methods for addressing confounding bias when estimating the effects of state-level policies",
+		"type":"article-journal",
+		"URL":"https://journals.lww.com/epidem/abstract/2023/11000/identifying_optimal_methods_for_addressing.14.aspx","volume":"34"
+	},
 	{
 		"id": "ben-michaelAugmentedSyntheticControl2021",
 		"type": "article-journal",

From c013e3b55d8b7eace36625c9b85824c98bf9a1bc Mon Sep 17 00:00:00 2001
From: maxgriswold <max.griswold.43@gmail.com>
Date: Thu, 16 Jan 2025 09:43:20 -0800
Subject: [PATCH 4/4] Mod version number

Changing to patch iteration rather than minor versioning.
---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 95d4dcd..86f247a 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: optic
 Title: Simulation Tool for Causal Inference Using Longitudinal Data
-Version: 1.1.1
+Version: 1.0.2
 Authors@R: c(
     person("Beth Ann", "Griffin", , "bethg@rand.org", role = c("aut", "cph"),
            comment = c(ORCID = "0000-0002-2391-4601")),