Skip to content

Commit

Permalink
Merge pull request #28 from RANDCorporation/fix_issue_27
Browse files Browse the repository at this point in the history
Fix issue #27
pedroliman authored Jan 16, 2025
2 parents 43acc6a + c013e3b commit 140cdf5
Showing 8 changed files with 156 additions and 42 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: optic
Title: Simulation Tool for Causal Inference Using Longitudinal Data
Version: 1.0.1
Version: 1.0.2
Authors@R: c(
person("Beth Ann", "Griffin", , "bethg@rand.org", role = c("aut", "cph"),
comment = c(ORCID = "0000-0002-2391-4601")),
12 changes: 6 additions & 6 deletions R/apply-treatment-effect.R
Original file line number Diff line number Diff line change
@@ -16,12 +16,10 @@
#' @param concurrent bool for whether this is concurrent run or not
#'
#' @noRd
apply_treatment_effect <- function(x, model_formula, model_call, te, effect_direction, concurrent) {
# identify outcome
outcome <- model_terms(model_formula)[["lhs"]]
apply_treatment_effect <- function(x, model_formula, outcome, model_call, te, effect_direction, concurrent){

# identify additive or multiplicative modification of outcome required
if (model_call == "lm" | model_call == "feols" | model_call == "multisynth" | model_call == 'lmer') {
if (model_call != "glm.nb") {
modifier <- "additive"
} else if (model_call == "glm.nb") {
modifier <- "multiplicative"
@@ -46,9 +44,11 @@ apply_treatment_effect <- function(x, model_formula, model_call, te, effect_dire
} else {
x[[outcome]] <- x[[outcome]] + (x[[outcome]] * te * x[["treatment"]])
}
x[[outcome]] <- round2(x[[outcome]], 0)

#l
# x[[outcome]] <- round2(x[[outcome]], 0)
}

return(x)
}
}
}
3 changes: 2 additions & 1 deletion R/concurrent-methods.R
Original file line number Diff line number Diff line change
@@ -183,7 +183,8 @@ concurrent_premodel <- function(model_simulation) {
model_call=model$model_call,
te=c(model_simulation$effect_magnitude1, model_simulation$effect_magnitude2),
effect_direction=model_simulation$effect_direction,
concurrent=TRUE
concurrent=TRUE,
outcome = outcome
)

# if autoregressive, need to add lag for crude rate
159 changes: 129 additions & 30 deletions R/no-confounding-methods.R
Original file line number Diff line number Diff line change
@@ -45,11 +45,12 @@ noconf_sample <- function(single_simulation) {
model_type = names(single_simulation$models)

# If there is at least one did model
if(!any(model_type == "drdid")){
if(!any(model_type == "did")){
outcomes <- unique(sapply(single_simulation$models, function(x) { optic::model_terms(x[["model_formula"]])[["lhs"]] }))
}else{
outcomes <- as.character(single_simulation$models$drdid$model_args$yname)
outcomes <- as.character(single_simulation$models$did$model_args$yname)
}

###############################################
### IDENTIFY TREATED UNITS AND TIME PERIODS ###
###############################################
@@ -206,8 +207,15 @@ noconf_premodel <- function(model_simulation) {

x <- model_simulation$data
model <- model_simulation$models
outcome <- optic::model_terms(model$model_formula)[["lhs"]]
oo <- dplyr::sym(outcome)

if (model$type != "did") {
outcome <- optic::model_terms(model$model_formula)[["lhs"]]
oo <- dplyr::sym(outcome)
} else if (model$type=='did') {
outcome <- as.character(model_simulation$models$model_args$yname)
oo <- dplyr::sym(outcome)
}

model_type <- model$type
balance_statistics <- NULL

@@ -221,38 +229,34 @@ noconf_premodel <- function(model_simulation) {
model_call=model$model_call,
te=c(model_simulation$effect_magnitude),
effect_direction=model_simulation$effect_direction,
concurrent=FALSE
concurrent=FALSE,
outcome = outcome
)

unit_sym <- dplyr::sym(model_simulation$unit_var)
time_sym <- dplyr::sym(model_simulation$time_var)

# PNL note
# this implementation does not seem to use the lagged crude rate

# if autoregressive, need to add lag for crude rate
# when outcome is deaths, derive new crude rate from modified outcome
# if autoregressive, need to add lagged outcome
if (model_type == "autoreg") {

# get lag of crude rate and add it to the model
unit_sym <- dplyr::sym(model_simulation$unit_var)
time_sym <- dplyr::sym(model_simulation$time_var)

x <- x %>%
dplyr::arrange(!!unit_sym, !!time_sym) %>%
dplyr::group_by(!!unit_sym) %>%
dplyr::mutate(lag_outcome = dplyr::lag(!!oo, n=1L)) %>%
dplyr::ungroup()

if(model$model_call == "feols"){
formula_components <- as.character(model_simulation$models$model_formula)
updated_3 <- strsplit(formula_components[3], " | ", fixed=TRUE)
if(length(updated_3[[1]]==1)){
new_fmla <- as.formula(paste(formula_components[2], formula_components[1], updated_3[[1]][1], "+ lag_outcome"))
}else{
new_fmla <- as.formula(paste(formula_components[2], formula_components[1], updated_3[[1]][1], "+ lag_outcome |", updated_3[[1]][2]))
}

new_fmla <- as.formula(paste(formula_components[2], formula_components[1], updated_3[[1]][[1]], "+ lag_outcome"))

model_simulation$models$model_formula <- new_fmla
}else{
model_simulation$models$model_formula <- update.formula(model_simulation$models$model_formula, ~ . + lag_outcome)
}

} else if (model_type == "multisynth") {
x$treatment[x$treatment > 0] <- 1
x$treatment_level[x$treatment_level > 0] <- 1
@@ -261,9 +265,11 @@ noconf_premodel <- function(model_simulation) {
if (sum(is.na(x[[outcome]])) > 0) {
stop("multisynth method cannot handle missingness in outcome.")
}
} else if (model_type == "drdid") {
} else if (model_type == "did") {

x$treatment[x$treatment > 0] <- 1
x$treatment_level[x$treatment_level > 0] <- 1

}

# get balance information
@@ -305,6 +311,53 @@ noconf_premodel <- function(model_simulation) {
mu0 = mean((!!oo)[treatment == 0], na.rm=T),
sd = sd(!!oo, na.rm=T))
bal_stats = bind_cols(bal_stats, bal_stats2)

# Add on first year for treatment, which we will use to define
# treatment status across models.

x <- x %>%
group_by(!!unit_sym) %>%
mutate(treatment_date = ifelse(max(trt_ind == 1), max(year ^ (1-treatment)), 0)) %>%
ungroup()

# Depending on the model, we may need to recode treatment year or
# treatment date:

if (model_type == "reg"|model_type == "autoreg"){

# Sun and Abraham models expect treatment year to be Inf, if untreated:
x <- x %>%
mutate(treatment_date = ifelse(treatment_date == 0, Inf, treatment_date))

}else if (model_type == "did"){

# CSA models expect treatment year to be Inf, if untreated and expects
# location to be a numeric variable
x <- x %>%
mutate(treatment_date = ifelse(treatment_date == 0, Inf, treatment_date),
!!unit_sym := as.numeric(as.factor(!!unit_sym)))

}else if (model_type == "multisynth"){

# Multisynth expects treatment to be either a 0/1 (sampling process uses this
# variable instrumentally to produce draws of treatment effect. So reset to
# binary)

x <- x %>% mutate(treatment = ifelse(treatment > 0, 1, 0))

}else if (model_type == "did_imputation"){

# I added this section for completeness, in case we want to modify later.
#Borusyak, Jaravel, and Spiess expects treatment_date to be zero for
# untreated units. So no need to change data coding.

}else if (model_type == "did2s"){

# Similar to multisynth, we need treatment to again be binary for
# the first stage
x <- x %>% mutate(treatment = ifelse(treatment > 0, 1, 0))

}

model_simulation$balance_statistics <- bal_stats
model_simulation$data <- x
@@ -329,14 +382,30 @@ noconf_premodel <- function(model_simulation) {
#' @noRd
noconf_model <- function(model_simulation) {
model <- model_simulation$models
model_type <- model_simulation$models$type
addtl_args <- model$model_args

x <- model_simulation$data

if (model_simulation$models$name == "drdid") {
args = c(list(data=x), model[["model_args"]])
} else if(model$model_call == "feols"){
args = c(list(data=x, fml=model[["model_formula"]]), model[["model_args"]], notes=FALSE)
}else {
args = c(list(data=x, formula=model[["model_formula"]]), model[["model_args"]])
# The only consistent argument across these
# models is "data". We need to also pass along user provided arguments
# and set some arguments based off the specific model type

args <- list(data=x)

if (length(addtl_args) >= 1){
args <- append(args, addtl_args)
}

# Change names of provided arguments to meet the needs of respective packages
if (model_type == "reg"|model_type == "autoreg"){
args[['formula']] <- model$model_formula
}else if (model_type == "multisynth"){
args[['form']] <- model$model_formula
}else if (model_type == "did_imputation"){
args[['horizon']] <- T
}else if (model_type == "did2s"){
args[['treatment']] <- 'treatment'
}

m <- do.call(
@@ -361,7 +430,12 @@ noconf_model <- function(model_simulation) {
#' @importFrom stats resid
#' @noRd
noconf_postmodel <- function(model_simulation) {
outcome <- optic::model_terms(model_simulation$models[["model_formula"]])[["lhs"]]

if (model_simulation$models[["type"]] == "did"){
outcome <- as.character(model_simulation$models$model_args$yname)
}else{
outcome <- optic::model_terms(model_simulation$models[["model_formula"]])[["lhs"]]
}
# get run metadata to merge in after
meta_data <- data.frame(
model_name = model_simulation$models$name,
@@ -375,10 +449,11 @@ noconf_postmodel <- function(model_simulation) {
n_units=model_simulation$n_units,
effect_direction=model_simulation$effect_direction
)

m <- model_simulation$model_result

# get model result information and apply standard error adjustments
if (model_simulation$models[["type"]] != "multisynth") {
m <- model_simulation$model_result
if (model_simulation$models[["type"]] == "reg"|model_simulation$models[["type"]] == "autoreg"|model_simulation$models[["type"]] == "did2s") {
if(model_simulation$models$model_call=="feols"){
cf <- as.data.frame(summary(m)$coeftable)
} else{
@@ -413,7 +488,31 @@ noconf_postmodel <- function(model_simulation) {
mse=mean(resid(m)^2, na.rm=T),
stringsAsFactors=FALSE
)
} else {
} else if (model_simulation$models[["type"]] == "did"){
m_agg <- did::aggte(m, type='group', na.rm=T)

cf <- data.frame("estimate" = m_agg$overall.att,
"se" = m_agg$overall.se)

estimate <- cf$estimate
se <- cf$se
variance <- se ^ 2
t_stat <- NA
p_value <- 2 * pnorm(abs(estimate/se), lower.tail = FALSE)

results <- data.frame(
outcome=outcome,
se_adjustment="none",
estimate=estimate,
se=se,
variance=variance,
t_stat=NA,
p_value=p_value,
mse = NA,
stringsAsFactors=FALSE
)
}
else{
m <- model_simulation$model_result
cf <- summary(m)
cf <- cf$att
@@ -588,9 +687,9 @@ noconf_postmodel <- function(model_simulation) {
}

results <- left_join(results, meta_data, by="outcome")
results <- left_join(results, model_simulation$balance_statistics, by="outcome")

return(results)

}


2 changes: 1 addition & 1 deletion R/optic-model-class.R
Original file line number Diff line number Diff line change
@@ -67,7 +67,7 @@ optic_model <- function(name, type,call, formula, se_adjust, ...) {
# Constructor:
new_optic_model <- function(
name,
type= c("reg", "autoreg", "multisynth", "drdid"),
type= c("reg", "autoreg", "multisynth", "did"),
call= c("lm", "feols", "multisynth", "glm.nb"),
formula,
args=list(weights=as.name('population')),
2 changes: 1 addition & 1 deletion man/optic_model.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions vignettes/intro_optic.Rmd
Original file line number Diff line number Diff line change
@@ -141,15 +141,15 @@ then attempt to estimate this effect based on user-provided models. The

- `name`: A name for the model to identify the model type in results.

- `type`: Users can set this as either "autoreg", "drdid",
- `type`: Users can set this as either "autoreg", "did",
"multisynth", or "reg".

- "reg" uses a typical regression framework, implementing the
procedure chosen in `model_call`.

- "autoreg" adds a dependent lag to the model formula.

- "drdid" estimates a treatment effect using a doubly-robust
- "did" estimates a treatment effect using a doubly-robust
difference-in-difference estimator, with covariates in the
`model_formula` argument used within both the propensity score
stage and outcome modeling stage (for more details on doubly robust
14 changes: 14 additions & 0 deletions vignettes/optic_refs.json
Original file line number Diff line number Diff line change
@@ -139,6 +139,20 @@
]
}
},
{
"id":"griffin2023identifying",
"author":[{"family":"Griffin","given":"Beth Ann"},{"family":"Schuler","given":"Megan S"},{"family":"Stone","given":"Elizabeth M"},{"family":"Patrick","given":"Stephen W"},{"family":"Stein","given":"Bradley D"},{"family":"De Lima","given":"Pedro Nascimento"},{"family":"Griswold","given":"Max"},{"family":"Scherling","given":"Adam"},{"family":"Stuart","given":"Elizabeth A"}],
"citation-key":"griffin2023identifying",
"container-title":"Epidemiology (Cambridge, Mass.)",
"container-title-short":"Epidemiology",
"issue":"6",
"issued":{"date-parts":[["2023"]]},
"page":"856–864",
"publisher":"LWW",
"title":"Identifying optimal methods for addressing confounding bias when estimating the effects of state-level policies",
"type":"article-journal",
"URL":"https://journals.lww.com/epidem/abstract/2023/11000/identifying_optimal_methods_for_addressing.14.aspx","volume":"34"
},
{
"id": "ben-michaelAugmentedSyntheticControl2021",
"type": "article-journal",

0 comments on commit 140cdf5

Please sign in to comment.