diff --git a/analysis/codelists.py b/analysis/codelists.py
index cf7c97df..bceeb96b 100644
--- a/analysis/codelists.py
+++ b/analysis/codelists.py
@@ -746,4 +746,18 @@
     anxiety_icd10,
     ocd_icd10,
     ptsd_icd10
+)
+
+# COCP
+cocp_dmd = codelist_from_csv(
+    "codelists/user-elsie_horne-cocp_dmd.csv",
+    system="snomed",
+    column="dmd_id",
+)
+
+# HRT
+hrt_dmd = codelist_from_csv(
+    "codelists/user-elsie_horne-hrt_dmd.csv",
+    system="snomed",
+    column="dmd_id",
 )
\ No newline at end of file
diff --git a/analysis/common_variables.py b/analysis/common_variables.py
index d31c8740..df2e1377 100644
--- a/analysis/common_variables.py
+++ b/analysis/common_variables.py
@@ -28,7 +28,7 @@
 pandemic_start = study_dates["pandemic_start"]
 
 # Define common variables function
-def generate_common_variables(index_date_variable,end_date_variable):
+def generate_common_variables(index_date_variable,exposure_end_date_variable,outcome_end_date_variable):
 
     dynamic_variables = dict(
     
@@ -43,7 +43,7 @@ def generate_common_variables(index_date_variable,end_date_variable):
                     returning="date",
                     find_first_match_in_period=True,
                     date_format="YYYY-MM-DD",
-                    between=[f"{index_date_variable}",f"{end_date_variable}"],
+                    between=[f"{index_date_variable}",f"{exposure_end_date_variable}"],
                     return_expectations={
                         "date": {"earliest": study_dates["pandemic_start"], "latest" : "today"},
                         "rate": "uniform",
@@ -59,7 +59,7 @@ def generate_common_variables(index_date_variable,end_date_variable):
                         covid_primary_care_sequalae,
                     ),
                     returning="date",
-                    between=[f"{index_date_variable}",f"{end_date_variable}"],
+                    between=[f"{index_date_variable}",f"{exposure_end_date_variable}"],
                     date_format="YYYY-MM-DD",
                     find_first_match_in_period=True,
                     return_expectations={
@@ -73,7 +73,7 @@ def generate_common_variables(index_date_variable,end_date_variable):
                 tmp_exp_date_covid19_confirmed_hes=patients.admitted_to_hospital(
                     with_these_diagnoses=covid_codes,
                     returning="date_admitted",
-                    between=[f"{index_date_variable}",f"{end_date_variable}"],
+                    between=[f"{index_date_variable}",f"{exposure_end_date_variable}"],
                     date_format="YYYY-MM-DD",
                     find_first_match_in_period=True,
                     return_expectations={
@@ -87,7 +87,7 @@ def generate_common_variables(index_date_variable,end_date_variable):
                 tmp_exp_date_covid19_confirmed_death=patients.with_these_codes_on_death_certificate(
                     covid_codes,
                     returning="date_of_death",
-                    between=[f"{index_date_variable}",f"{end_date_variable}"],
+                    between=[f"{index_date_variable}",f"{exposure_end_date_variable}"],
                     match_only_underlying_cause=True,
                     date_format="YYYY-MM-DD",
                     return_expectations={
@@ -125,7 +125,7 @@ def generate_common_variables(index_date_variable,end_date_variable):
 
             ## Deregistraton date
                 dereg_date=patients.date_deregistered_from_all_supported_practices( 
-                    between=[f"{index_date_variable}",f"{end_date_variable}"],
+                    between=[f"{index_date_variable}",f"{outcome_end_date_variable}"],
                     date_format = 'YYYY-MM-DD',
                     return_expectations={
                     "date": {"earliest": study_dates["pandemic_start"], "latest": "today"},
@@ -194,7 +194,7 @@ def generate_common_variables(index_date_variable,end_date_variable):
                 tmp_out_date_depression_snomed=patients.with_these_clinical_events(
                     depression_snomed_clinical,
                     returning="date",
-                    between=[f"{index_date_variable}",f"{end_date_variable}"],
+                    between=[f"{index_date_variable}",f"{outcome_end_date_variable}"],
                     date_format="YYYY-MM-DD",
                     find_first_match_in_period=True,
                     return_expectations={
@@ -208,7 +208,7 @@ def generate_common_variables(index_date_variable,end_date_variable):
                 tmp_out_date_depression_hes=patients.admitted_to_hospital(
                     returning="date_admitted",
                     with_these_diagnoses=depression_icd10,
-                    between=[f"{index_date_variable}",f"{end_date_variable}"],
+                    between=[f"{index_date_variable}",f"{outcome_end_date_variable}"],
                     date_format="YYYY-MM-DD",
                     find_first_match_in_period=True,
                     return_expectations={
@@ -222,7 +222,7 @@ def generate_common_variables(index_date_variable,end_date_variable):
                 tmp_out_date_depression_death=patients.with_these_codes_on_death_certificate(
                     depression_icd10,
                     returning="date_of_death",
-                    between=[f"{index_date_variable}",f"{end_date_variable}"],
+                    between=[f"{index_date_variable}",f"{outcome_end_date_variable}"],
                     match_only_underlying_cause=True,
                     date_format="YYYY-MM-DD",
                     return_expectations={
@@ -243,7 +243,7 @@ def generate_common_variables(index_date_variable,end_date_variable):
                 tmp_out_date_anxiety_general_snomed=patients.with_these_clinical_events(
                     anxiety_general_snomed_clinical,
                     returning="date",
-                    between=[f"{index_date_variable}",f"{end_date_variable}"],
+                    between=[f"{index_date_variable}",f"{outcome_end_date_variable}"],
                     date_format="YYYY-MM-DD",
                     find_first_match_in_period=True,
                     return_expectations={
@@ -257,7 +257,7 @@ def generate_common_variables(index_date_variable,end_date_variable):
                 tmp_out_date_anxiety_general_hes=patients.admitted_to_hospital(
                     returning="date_admitted",
                     with_these_diagnoses=anxiety_icd10,
-                    between=[f"{index_date_variable}",f"{end_date_variable}"],
+                    between=[f"{index_date_variable}",f"{outcome_end_date_variable}"],
                     date_format="YYYY-MM-DD",
                     find_first_match_in_period=True,
                     return_expectations={
@@ -271,7 +271,7 @@ def generate_common_variables(index_date_variable,end_date_variable):
                 tmp_out_date_anxiety_general_death=patients.with_these_codes_on_death_certificate(
                     anxiety_icd10,
                     returning="date_of_death",
-                    between=[f"{index_date_variable}",f"{end_date_variable}"],
+                    between=[f"{index_date_variable}",f"{outcome_end_date_variable}"],
                     match_only_underlying_cause=True,
                     date_format="YYYY-MM-DD",
                     return_expectations={
@@ -292,7 +292,7 @@ def generate_common_variables(index_date_variable,end_date_variable):
                 tmp_out_date_anxiety_ptsd_snomed=patients.with_these_clinical_events(
                     anxiety_ptsd_snomed_clinical,
                     returning="date",
-                    between=[f"{index_date_variable}",f"{end_date_variable}"],
+                    between=[f"{index_date_variable}",f"{outcome_end_date_variable}"],
                     date_format="YYYY-MM-DD",
                     find_first_match_in_period=True,
                     return_expectations={
@@ -306,7 +306,7 @@ def generate_common_variables(index_date_variable,end_date_variable):
                 tmp_out_date_anxiety_ptsd_hes=patients.admitted_to_hospital(
                     returning="date_admitted",
                     with_these_diagnoses=ptsd_icd10,
-                    between=[f"{index_date_variable}",f"{end_date_variable}"],
+                    between=[f"{index_date_variable}",f"{outcome_end_date_variable}"],
                     date_format="YYYY-MM-DD",
                     find_first_match_in_period=True,
                     return_expectations={
@@ -320,7 +320,7 @@ def generate_common_variables(index_date_variable,end_date_variable):
                 tmp_out_date_anxiety_ptsd_death=patients.with_these_codes_on_death_certificate(
                     ptsd_icd10,
                     returning="date_of_death",
-                    between=[f"{index_date_variable}",f"{end_date_variable}"],
+                    between=[f"{index_date_variable}",f"{outcome_end_date_variable}"],
                     match_only_underlying_cause=True,
                     date_format="YYYY-MM-DD",
                     return_expectations={
@@ -341,7 +341,7 @@ def generate_common_variables(index_date_variable,end_date_variable):
                 tmp_out_date_eating_disorders_snomed=patients.with_these_clinical_events(
                     eating_disorders_snomed_clinical,
                     returning="date",
-                    between=[f"{index_date_variable}",f"{end_date_variable}"],
+                    between=[f"{index_date_variable}",f"{outcome_end_date_variable}"],
                     date_format="YYYY-MM-DD",
                     find_first_match_in_period=True,
                     return_expectations={
@@ -355,7 +355,7 @@ def generate_common_variables(index_date_variable,end_date_variable):
                 tmp_out_date_eating_disorders_hes=patients.admitted_to_hospital(
                     returning="date_admitted",
                     with_these_diagnoses=eating_disorder_icd10,
-                    between=[f"{index_date_variable}",f"{end_date_variable}"],
+                    between=[f"{index_date_variable}",f"{outcome_end_date_variable}"],
                     date_format="YYYY-MM-DD",
                     find_first_match_in_period=True,
                     return_expectations={
@@ -369,7 +369,7 @@ def generate_common_variables(index_date_variable,end_date_variable):
                 tmp_out_date_eating_disorders_death=patients.with_these_codes_on_death_certificate(
                     eating_disorder_icd10,
                     returning="date_of_death",
-                    between=[f"{index_date_variable}",f"{end_date_variable}"],
+                    between=[f"{index_date_variable}",f"{outcome_end_date_variable}"],
                     match_only_underlying_cause=True,
                     date_format="YYYY-MM-DD",
                     return_expectations={
@@ -390,7 +390,7 @@ def generate_common_variables(index_date_variable,end_date_variable):
                 tmp_out_date_serious_mental_illness_snomed=patients.with_these_clinical_events(
                     serious_mental_illness_snomed_clinical,
                     returning="date",
-                    between=[f"{index_date_variable}",f"{end_date_variable}"],
+                    between=[f"{index_date_variable}",f"{outcome_end_date_variable}"],
                     date_format="YYYY-MM-DD",
                     find_first_match_in_period=True,
                     return_expectations={
@@ -404,7 +404,7 @@ def generate_common_variables(index_date_variable,end_date_variable):
                 tmp_out_date_serious_mental_illness_hes=patients.admitted_to_hospital(
                     returning="date_admitted",
                     with_these_diagnoses=serious_mental_illness_icd10,
-                    between=[f"{index_date_variable}",f"{end_date_variable}"],
+                    between=[f"{index_date_variable}",f"{outcome_end_date_variable}"],
                     date_format="YYYY-MM-DD",
                     find_first_match_in_period=True,
                     return_expectations={
@@ -418,7 +418,7 @@ def generate_common_variables(index_date_variable,end_date_variable):
                 tmp_out_date_serious_mental_illness_death=patients.with_these_codes_on_death_certificate(
                     serious_mental_illness_icd10,
                     returning="date_of_death",
-                    between=[f"{index_date_variable}",f"{end_date_variable}"],
+                    between=[f"{index_date_variable}",f"{outcome_end_date_variable}"],
                     match_only_underlying_cause=True,
                     date_format="YYYY-MM-DD",
                     return_expectations={
@@ -439,7 +439,7 @@ def generate_common_variables(index_date_variable,end_date_variable):
                 tmp_out_date_self_harm_snomed=patients.with_these_clinical_events(
                     self_harm_15_10_combined_snomed,
                     returning="date",
-                    between=[f"{index_date_variable}",f"{end_date_variable}"],
+                    between=[f"{index_date_variable}",f"{outcome_end_date_variable}"],
                     date_format="YYYY-MM-DD",
                     find_first_match_in_period=True,
                     return_expectations={
@@ -453,7 +453,7 @@ def generate_common_variables(index_date_variable,end_date_variable):
                 tmp_out_date_self_harm_hes=patients.admitted_to_hospital(
                     returning="date_admitted",
                     with_these_diagnoses=self_harm_15_10_combined_icd,
-                    between=[f"{index_date_variable}",f"{end_date_variable}"],
+                    between=[f"{index_date_variable}",f"{outcome_end_date_variable}"],
                     date_format="YYYY-MM-DD",
                     find_first_match_in_period=True,
                     return_expectations={
@@ -467,7 +467,7 @@ def generate_common_variables(index_date_variable,end_date_variable):
                 tmp_out_date_self_harm_death=patients.with_these_codes_on_death_certificate(
                     self_harm_15_10_combined_icd,
                     returning="date_of_death",
-                    between=[f"{index_date_variable}",f"{end_date_variable}"],
+                    between=[f"{index_date_variable}",f"{outcome_end_date_variable}"],
                     match_only_underlying_cause=True,
                     date_format="YYYY-MM-DD",
                     return_expectations={
@@ -488,7 +488,7 @@ def generate_common_variables(index_date_variable,end_date_variable):
                 tmp_out_date_suicide_hes=patients.admitted_to_hospital(
                     returning="date_admitted",
                     with_these_diagnoses=suicide_icd10,
-                    between=[f"{index_date_variable}",f"{end_date_variable}"],
+                    between=[f"{index_date_variable}",f"{outcome_end_date_variable}"],
                     date_format="YYYY-MM-DD",
                     find_first_match_in_period=True,
                     return_expectations={
@@ -502,7 +502,7 @@ def generate_common_variables(index_date_variable,end_date_variable):
                 tmp_out_date_suicide_death=patients.with_these_codes_on_death_certificate(
                     suicide_icd10,
                     returning="date_of_death",
-                    between=[f"{index_date_variable}",f"{end_date_variable}"],
+                    between=[f"{index_date_variable}",f"{outcome_end_date_variable}"],
                     match_only_underlying_cause=True,
                     date_format="YYYY-MM-DD",
                     return_expectations={
@@ -523,7 +523,7 @@ def generate_common_variables(index_date_variable,end_date_variable):
                 tmp_out_date_addiction_snomed=patients.with_these_clinical_events(
                     addiction_snomed_clinical,
                     returning="date",
-                    between=[f"{index_date_variable}",f"{end_date_variable}"],
+                    between=[f"{index_date_variable}",f"{outcome_end_date_variable}"],
                     date_format="YYYY-MM-DD",
                     find_first_match_in_period=True,
                     return_expectations={
@@ -537,7 +537,7 @@ def generate_common_variables(index_date_variable,end_date_variable):
                 tmp_out_date_addiction_hes=patients.admitted_to_hospital(
                     returning="date_admitted",
                     with_these_diagnoses=opioid_misuse_icd10,
-                    between=[f"{index_date_variable}",f"{end_date_variable}"],
+                    between=[f"{index_date_variable}",f"{outcome_end_date_variable}"],
                     date_format="YYYY-MM-DD",
                     find_first_match_in_period=True,
                     return_expectations={
@@ -551,7 +551,7 @@ def generate_common_variables(index_date_variable,end_date_variable):
                 tmp_out_date_addiction_death=patients.with_these_codes_on_death_certificate(
                     opioid_misuse_icd10,
                     returning="date_of_death",
-                    between=[f"{index_date_variable}",f"{end_date_variable}"],
+                    between=[f"{index_date_variable}",f"{outcome_end_date_variable}"],
                     match_only_underlying_cause=True,
                     date_format="YYYY-MM-DD",
                     return_expectations={
@@ -1307,6 +1307,26 @@ def generate_common_variables(index_date_variable,end_date_variable):
                     },
                 ),
 
+            ## HRT or COCP 
+
+                tmp_cocp=patients.with_these_medications(
+                        cocp_dmd, 
+                        returning='binary_flag',
+                        on_or_before=f"{index_date_variable}",
+                        return_expectations={"incidence": 0.1},
+                    ),
+
+                tmp_hrt=patients.with_these_medications(
+                        hrt_dmd, 
+                        returning='binary_flag',
+                        on_or_before=f"{index_date_variable}",
+                        return_expectations={"incidence": 0.1},
+                    ),
+                
+                qa_bin_hrtcocp=patients.maximum_of(
+                    "tmp_cocp", "tmp_hrt"
+                ),
+
         )
     
     return dynamic_variables
diff --git a/analysis/create_project_actions.R b/analysis/create_project_actions.R
index 4a11c6b5..fadce510 100644
--- a/analysis/create_project_actions.R
+++ b/analysis/create_project_actions.R
@@ -237,6 +237,17 @@ actions_list <- splice(
     )
   ),
   
+  comment("Implement study_definition for unvax_extf"),
+  
+  action(
+    name = "generate_study_population_unvax_extf",
+    run = "cohortextractor:latest generate_cohort --study-definition study_definition_unvax_extf --output-format csv.gz",
+    needs = list("vax_eligibility_inputs","generate_index_dates"),
+    highly_sensitive = list(
+      cohort = glue("output/input_unvax_extf.csv.gz")
+    )
+  ),
+  
   comment("Preprocess data - prevax"),
   
   action(
@@ -301,12 +312,28 @@ actions_list <- splice(
     )
   ),
   
+  comment("Preprocess data - unvax_extf"),
+  
+  action(
+    name = "preprocess_data_unvax_extf",
+    run = "r:latest analysis/preprocess_data.R unvax_extf",
+    needs = list("generate_index_dates", "generate_study_population_unvax_extf"),
+    moderately_sensitive = list(
+      describe = glue("output/describe_input_unvax_extf_stage0.txt"),
+      describe_venn = glue("output/describe_venn_unvax_extf.txt")
+    ),
+    highly_sensitive = list(
+      cohort = glue("output/input_unvax_extf.rds"),
+      venn = glue("output/venn_unvax_extf.rds")
+    )
+  ),
+  
   comment("Data cleaning - all cohorts"),
   
   action(
     name = "stage1_data_cleaning_all",
     run = "r:latest analysis/stage1_data_cleaning.R all",
-    needs = list("preprocess_data_prevax","preprocess_data_prevax_extf","preprocess_data_vax", "preprocess_data_unvax","vax_eligibility_inputs"),
+    needs = list("preprocess_data_prevax","preprocess_data_prevax_extf","preprocess_data_vax", "preprocess_data_unvax", "preprocess_data_unvax_extf"),
     moderately_sensitive = list(
       refactoring = glue("output/meta_data_factors_*.csv"),
       QA_rules = glue("output/QA_summary_*.csv"),
@@ -318,41 +345,41 @@ actions_list <- splice(
     )
   ),
   
-  action(
-    name = glue("describe_file-input_prevax_stage1"),
-    run = glue("r:latest analysis/describe_file.R input_prevax_stage1 rds"),
-    needs = list("stage1_data_cleaning_all"),
-    moderately_sensitive = list(
-      describe_model_input = glue("output/describe-input_prevax_stage1.txt")
-    )
-  ),
-  
-  action(
-    name = glue("describe_file-input_prevax_extf_stage1"),
-    run = glue("r:latest analysis/describe_file.R input_prevax_extf_stage1 rds"),
-    needs = list("stage1_data_cleaning_all"),
-    moderately_sensitive = list(
-      describe_model_input = glue("output/describe-input_prevax_extf_stage1.txt")
-    )
-  ),
-  
-  action(
-    name = glue("describe_file-input_vax_stage1"),
-    run = glue("r:latest analysis/describe_file.R input_vax_stage1 rds"),
-    needs = list("stage1_data_cleaning_all"),
-    moderately_sensitive = list(
-      describe_model_input = glue("output/describe-input_vax_stage1.txt")
-    )
-  ),
-  
-  action(
-    name = glue("describe_file-input_unvax_stage1"),
-    run = glue("r:latest analysis/describe_file.R input_unvax_stage1 rds"),
-    needs = list("stage1_data_cleaning_all"),
-    moderately_sensitive = list(
-      describe_model_input = glue("output/describe-input_unvax_stage1.txt")
-    )
-  ),
+  # action(
+  #   name = glue("describe_file-input_prevax_stage1"),
+  #   run = glue("r:latest analysis/describe_file.R input_prevax_stage1 rds"),
+  #   needs = list("stage1_data_cleaning_all"),
+  #   moderately_sensitive = list(
+  #     describe_model_input = glue("output/describe-input_prevax_stage1.txt")
+  #   )
+  # ),
+  # 
+  # action(
+  #   name = glue("describe_file-input_prevax_extf_stage1"),
+  #   run = glue("r:latest analysis/describe_file.R input_prevax_extf_stage1 rds"),
+  #   needs = list("stage1_data_cleaning_all"),
+  #   moderately_sensitive = list(
+  #     describe_model_input = glue("output/describe-input_prevax_extf_stage1.txt")
+  #   )
+  # ),
+  # 
+  # action(
+  #   name = glue("describe_file-input_vax_stage1"),
+  #   run = glue("r:latest analysis/describe_file.R input_vax_stage1 rds"),
+  #   needs = list("stage1_data_cleaning_all"),
+  #   moderately_sensitive = list(
+  #     describe_model_input = glue("output/describe-input_vax_stage1.txt")
+  #   )
+  # ),
+  # 
+  # action(
+  #   name = glue("describe_file-input_unvax_stage1"),
+  #   run = glue("r:latest analysis/describe_file.R input_unvax_stage1 rds"),
+  #   needs = list("stage1_data_cleaning_all"),
+  #   moderately_sensitive = list(
+  #     describe_model_input = glue("output/describe-input_unvax_stage1.txt")
+  #   )
+  # ),
   
   # comment("Stage 2 - Missing - Table 1 - all cohorts"),
   # 
diff --git a/analysis/make_model_input.R b/analysis/make_model_input.R
index c36f8c3e..6ef34e56 100644
--- a/analysis/make_model_input.R
+++ b/analysis/make_model_input.R
@@ -55,7 +55,8 @@ for (i in 1:nrow(active_analyses)) {
   
   input <- input[,unique(c("patient_id",
                            "index_date",
-                           "end_date",
+                           "end_date_exposure",
+                           "end_date_outcome",
                            active_analyses$exposure[i], 
                            active_analyses$outcome[i],
                            unlist(strsplit(active_analyses$strata[i], split = ";")),
@@ -75,8 +76,8 @@ for (i in 1:nrow(active_analyses)) {
                          "exp_date" = active_analyses$exposure[i])
   
   input <- input %>% 
-    dplyr::mutate(out_date = replace(out_date, which(out_date>end_date | out_date<index_date), NA),
-                  exp_date =  replace(exp_date, which(exp_date>end_date | exp_date<index_date), NA),
+    dplyr::mutate(out_date = replace(out_date, which(out_date>end_date_outcome | out_date<index_date), NA),
+                  exp_date =  replace(exp_date, which(exp_date>end_date_exposure | exp_date<index_date), NA),
                   sub_cat_covid19_hospital = replace(sub_cat_covid19_hospital, which(is.na(exp_date)),"no_infection"))
   
   # Update end date to be outcome date where applicable ------------------------
@@ -84,51 +85,8 @@ for (i in 1:nrow(active_analyses)) {
   
   input <- input %>% 
     dplyr::rowwise() %>% 
-    dplyr::mutate(end_date = min(end_date, out_date, na.rm = TRUE))
-  
-  # # Make three level history covariates ----------------------------------------
-  # print('Make three level history covariates')
-  # 
-  # input$cov_cat_priorhistory_depression <- dplyr::case_when(
-  #   input$cov_bin_history_depression==TRUE & input$cov_bin_recent_depression==TRUE ~ "recent",
-  #   input$cov_bin_history_depression==TRUE & input$cov_bin_recent_depression==FALSE ~ "notrecent",
-  #   input$cov_bin_history_depression==FALSE & input$cov_bin_recent_depression==TRUE ~ "recent",
-  #   input$cov_bin_history_depression==FALSE & input$cov_bin_recent_depression==FALSE ~ "none")
-  # input[,c("cov_bin_history_depression","cov_bin_recent_depression")] <- NULL
-  # input$cov_cat_priorhistory_depression <- as.factor(input$cov_cat_priorhistory_depression)
-  # 
-  # input$cov_cat_priorhistory_anxiety_general <- dplyr::case_when(
-  #       input$cov_bin_history_anxiety==TRUE & input$cov_bin_recent_anxiety==TRUE ~ "recent",
-  #       input$cov_bin_history_anxiety==TRUE & input$cov_bin_recent_anxiety==FALSE ~ "notrecent",
-  #       input$cov_bin_history_anxiety==FALSE & input$cov_bin_recent_anxiety==TRUE ~ "recent",
-  #       input$cov_bin_history_anxiety==FALSE & input$cov_bin_recent_anxiety==FALSE ~ "none")
-  # input[,c("cov_bin_history_anxiety","cov_bin_recent_anxiety")] <- NULL
-  # input$cov_cat_priorhistory_anxiety_general <- as.factor(input$cov_cat_priorhistory_anxiety_general)
-  # 
-  # input$cov_cat_priorhistory_eating_disorders <- dplyr::case_when(
-  #       input$cov_bin_history_eating_disorders==TRUE & input$cov_bin_recent_eating_disorders==TRUE ~ "recent",
-  #       input$cov_bin_history_eating_disorders==TRUE & input$cov_bin_recent_eating_disorders==FALSE ~ "notrecent",
-  #       input$cov_bin_history_eating_disorders==FALSE & input$cov_bin_recent_eating_disorders==TRUE ~ "recent",
-  #       input$cov_bin_history_eating_disorders==FALSE & input$cov_bin_recent_eating_disorders==FALSE ~ "none")
-  # input[,c("cov_bin_history_eating_disorders","cov_bin_recent_eating_disorders")] <- NULL
-  # input$cov_cat_priorhistory_eating_disorders <- as.factor(input$cov_cat_priorhistory_eating_disorders)
-  # 
-  # input$cov_cat_priorhistory_serious_mental_illness <- dplyr::case_when(
-  #       input$cov_bin_history_serious_mental_illness==TRUE & input$cov_bin_recent_serious_mental_illness==TRUE ~ "recent",
-  #       input$cov_bin_history_serious_mental_illness==TRUE & input$cov_bin_recent_serious_mental_illness==FALSE ~ "notrecent",
-  #       input$cov_bin_history_serious_mental_illness==FALSE & input$cov_bin_recent_serious_mental_illness==TRUE ~ "recent",
-  #       input$cov_bin_history_serious_mental_illness==FALSE & input$cov_bin_recent_serious_mental_illness==FALSE ~ "none")
-  # input[,c("cov_bin_history_serious_mental_illness","cov_bin_recent_serious_mental_illness")] <- NULL
-  # input$cov_cat_priorhistory_serious_mental_illness <- as.factor(input$cov_cat_priorhistory_serious_mental_illness)
-  # 
-  # input$cov_cat_priorhistory_self_harm <- dplyr::case_when(
-  #       input$cov_bin_history_self_harm==TRUE & input$cov_bin_recent_self_harm==TRUE ~ "recent",
-  #       input$cov_bin_history_self_harm==TRUE & input$cov_bin_recent_self_harm==FALSE ~ "notrecent",
-  #       input$cov_bin_history_self_harm==FALSE & input$cov_bin_recent_self_harm==TRUE ~ "recent",
-  #       input$cov_bin_history_self_harm==FALSE & input$cov_bin_recent_self_harm==FALSE ~ "none")
-  # input[,c("cov_bin_history_self_harm","cov_bin_recent_self_harm")] <- NULL
-  # input$cov_cat_priorhistory_self_harm <- as.factor(input$cov_cat_priorhistory_self_harm)
-
+    dplyr::mutate(end_date_outcome = min(end_date_outcome, out_date, na.rm = TRUE))
+  
   # Make model input: main -------------------------------------------------------
   
   if (active_analyses$analysis[i]=="main") {
@@ -155,11 +113,11 @@ for (i in 1:nrow(active_analyses)) {
     df <- input[input$sub_bin_covid19_confirmed_history==FALSE,]
     
     df <- df %>% 
-      dplyr::mutate(end_date = replace(end_date, which(sub_cat_covid19_hospital=="non_hospitalised"), exp_date-1),
+      dplyr::mutate(end_date_outcome = replace(end_date_outcome, which(sub_cat_covid19_hospital=="non_hospitalised"), exp_date-1),
                     exp_date = replace(exp_date, which(sub_cat_covid19_hospital=="non_hospitalised"), NA),
-                    out_date = replace(out_date, which(out_date>end_date), NA))
+                    out_date = replace(out_date, which(out_date>end_date_outcome), NA))
     
-    df <- df[df$end_date>=df$index_date,]
+    df <- df[df$end_date_outcome>=df$index_date,]
     
     df[,colnames(df)[grepl("sub_",colnames(df))]] <- NULL
     
@@ -179,11 +137,11 @@ for (i in 1:nrow(active_analyses)) {
     df <- input[input$sub_bin_covid19_confirmed_history==FALSE,]
     
     df <- df %>% 
-      dplyr::mutate(end_date = replace(end_date, which(sub_cat_covid19_hospital=="hospitalised"), exp_date-1),
+      dplyr::mutate(end_date_outcome = replace(end_date_outcome, which(sub_cat_covid19_hospital=="hospitalised"), exp_date-1),
                     exp_date = replace(exp_date, which(sub_cat_covid19_hospital=="hospitalised"), NA),
-                    out_date = replace(out_date, which(out_date>end_date), NA))
+                    out_date = replace(out_date, which(out_date>end_date_outcome), NA))
     
-    df <- df[df$end_date>=df$index_date,]
+    df <- df[df$end_date_outcome>=df$index_date,]
     df$index_date <- as.Date(df$index_date)
     
     df[,colnames(df)[grepl("sub_",colnames(df))]] <- NULL
diff --git a/analysis/prelim.R b/analysis/prelim.R
index 770887fb..8b0945c9 100644
--- a/analysis/prelim.R
+++ b/analysis/prelim.R
@@ -20,15 +20,17 @@ prelim_data <- prelim_data %>%
                   unit = "days"))) %>%
   mutate(vax_date_covid_2_offset = vax_date_covid_2 + days(efficacy_offset),
          vax_date_eligible_offset = vax_date_eligible + days(eligibility_offset),
-         index_prevax = as.Date(study_dates$pandemic_start)) %>% 
+         index_prevax = as.Date(study_dates$pandemic_start),
+         index_prevax_extf = as.Date(study_dates$pandemic_start)) %>% 
   rowwise() %>%             
   mutate(index_vax = max(c(vax_date_covid_2_offset, delta_date), na.rm=T),
          index_unvax =  max(c(vax_date_eligible_offset, delta_date), na.rm=T),
+         index_unvax_extf =  max(c(vax_date_eligible_offset, delta_date), na.rm=T),
          end_vax = min(c(death_date, delta_end_date), na.rm=T),
-         end_unvax = min(c(death_date, delta_end_date), na.rm=T),
-         end_prevax = min(c(vax_date_eligible,death_date, vax_date_covid_1, all_eligible_date), na.rm=T),
-         end_prevax_exf = min(c(death_date, vax_date_covid_1, na.rm=T))) 
-
+         end_unvax = min(c(death_date, delta_end_date, vax_date_covid_1), na.rm=T),
+         end_unvax_extf = min(c(death_date, delta_end_date), na.rm=T),
+         end_prevax = min(c(vax_date_eligible, death_date, vax_date_covid_1, all_eligible_date), na.rm=T),
+         end_prevax_extf = min(c(death_date, delta_end_date), na.rm=T)) 
 
 #Write data to csv file 
 write_csv(prelim_data, "output/index_dates.csv.gz")
\ No newline at end of file
diff --git a/analysis/preprocess_data.R b/analysis/preprocess_data.R
index 8859ebbc..cbab3497 100644
--- a/analysis/preprocess_data.R
+++ b/analysis/preprocess_data.R
@@ -45,7 +45,7 @@ df <- df %>%
 
 if(Sys.getenv("OPENSAFELY_BACKEND") %in% c("", "expectations") &&
    cohort_name %in% c("vax")) {
-  source("analysis/preprocess/modify_dummy_vax_data.R")
+  source("analysis/modify_dummy_vax_data.R")
   message("Vaccine information overwritten successfully")
 }
 
diff --git a/analysis/stage1_data_cleaning.R b/analysis/stage1_data_cleaning.R
index fcb094eb..98d2f058 100644
--- a/analysis/stage1_data_cleaning.R
+++ b/analysis/stage1_data_cleaning.R
@@ -15,26 +15,26 @@ library(arrow)
 args <- commandArgs(trailingOnly=TRUE)
 
 if(length(args)==0){
-  cohort_name <- "prevax"
+  cohort_name <- "unvax"
 } else {
   cohort_name <- args[[1]]
 }
 
-# Load json file containing vax study dates ------------------------------------
-
-study_dates <- fromJSON("output/study_dates.json")
-
-# Specify relevant dates -------------------------------------------------------
-
-vax_start_date <- as.Date(study_dates$vax1_earliest, format="%Y-%m-%d")
-mixed_vax_threshold <- as.Date("2021-05-07")
-start_date_delta <- as.Date(study_dates$delta_date, format="%Y-%m-%d")
-end_date_delta <- as.Date(study_dates$omicron_date, format="%Y-%m-%d") 
-
 # Define stage 1 function ------------------------------------------------------
 
 stage1 <- function(cohort_name) {
   
+  # Load json file containing vax study dates ------------------------------------
+  
+  study_dates <- fromJSON("output/study_dates.json")
+  
+  # Specify relevant dates -----------------------------------------------------
+  
+  vax_start_date <- as.Date(study_dates$vax1_earliest, format="%Y-%m-%d")
+  mixed_vax_threshold <- as.Date("2021-05-07")
+  start_date_delta <- as.Date(study_dates$delta_date, format="%Y-%m-%d")
+  end_date_delta <- as.Date(study_dates$omicron_date, format="%Y-%m-%d") 
+  
   ## Load cohort data ----------------------------------------------------------
   
   input <- read_rds(file.path("output", paste0("input_",cohort_name,".rds")))
@@ -42,9 +42,7 @@ stage1 <- function(cohort_name) {
   
   ## Rename date variables -----------------------------------------------------
   
-  input <- input %>%
-    rename(index_date =!!sym(paste0("index_date_",cohort_name))) %>%
-    rename(end_date = !!sym(paste0("end_date_",cohort_name)))
+  input <- dplyr::rename(input, "index_date" = "index_date_cohort")
   
   ## Handle missing values -----------------------------------------------------
   
@@ -157,10 +155,8 @@ stage1 <- function(cohort_name) {
   
   ### Rule 5: HRT or COCP meds for men
   
-  input$rule5 <- FALSE
-  # input$rule5 <- NA
-  # input$rule5 <- ((input$cov_cat_sex=="Male" & input$qa_bin_hrt==TRUE) | 
-  #                   (input$cov_cat_sex=="Male" & input$qa_bin_cocp==TRUE))
+  input$rule5 <- NA
+ input$rule5 <- (input$cov_cat_sex=="Male" & input$qa_bin_hrtcocp==TRUE)
   
   ### Rule 6: Prostate cancer codes for women
   
@@ -325,7 +321,7 @@ stage1 <- function(cohort_name) {
     input <- input %>% filter (!is.na(index_date) & index_date <= end_date & index_date >= start_date_delta)
     cohort_flow[nrow(cohort_flow)+1,] <- c(nrow(input),as.numeric(cohort_flow[nrow(cohort_flow),"N"]) - nrow(input), "Criteria 13 (Inclusion): Patient index date is within the study start and end dates i.e patient is fully vaccinated before the study end date")
     
-  } else if (cohort_name == "unvax"){
+  } else if (cohort_name %in% c("unvax","unvax_extf")){
     
     ### Exclusion criteria 8: Have a record of one or more vaccination prior index date
     # i.e. Have a record of a first vaccination prior to index date
@@ -344,7 +340,7 @@ stage1 <- function(cohort_name) {
     cohort_flow[nrow(cohort_flow)+1,] <- c(nrow(input),as.numeric(cohort_flow[nrow(cohort_flow),"N"]) - nrow(input), "Criteria 9 (Exclusion): Missing or unknown JCVI group")
     
     ### Inclusion criteria 10: Index date is before cohort end date - will remove anyone whose eligibility date + 84 days is after study end date (only those with unknown JCVI group)
-    input <- input %>% filter (!is.na(index_date) & index_date <= end_date & index_date >= start_date_delta)
+    input <- input %>% filter (!is.na(index_date) & index_date <= end_date_exposure & index_date >= start_date_delta)
     cohort_flow[nrow(cohort_flow)+1,] <- c(nrow(input),as.numeric(cohort_flow[nrow(cohort_flow),"N"]) - nrow(input), "Criteria 10 (Inclusion): Patient index date is within the study start and end dates i.e patients eligibility date + 84 days is before the study end date")
     
   }
@@ -399,6 +395,7 @@ if (cohort_name == "all") {
   stage1("prevax_extf")
   stage1("vax")
   stage1("unvax")
+  stage1("unvax_extf")
 } else{
   stage1(cohort_name)
 }
\ No newline at end of file
diff --git a/analysis/study_definition_prevax.py b/analysis/study_definition_prevax.py
index 623bc453..224c255e 100644
--- a/analysis/study_definition_prevax.py
+++ b/analysis/study_definition_prevax.py
@@ -27,7 +27,7 @@
 from common_variables import generate_common_variables
 (
     dynamic_variables
-) = generate_common_variables(index_date_variable="index_date_prevax", end_date_variable="end_date_prevax")
+) = generate_common_variables(index_date_variable="index_date_cohort", exposure_end_date_variable="end_date_exposure", outcome_end_date_variable="end_date_outcome")
 
 ## Variables for deriving JCVI groups
 from grouping_variables import (
@@ -41,13 +41,19 @@
 study = StudyDefinition(
 
     # Specify study dates
-        index_date_prevax = patients.with_value_from_file(
+        index_date_cohort = patients.with_value_from_file(
             f_path = 'output/index_dates.csv.gz', 
             returning = 'index_prevax', 
             returning_type = 'date', 
             date_format = 'YYYY-MM-DD',
         ),
-        end_date_prevax = patients.with_value_from_file(
+        end_date_exposure = patients.with_value_from_file(
+            f_path = 'output/index_dates.csv.gz', 
+            returning = 'end_prevax', 
+            returning_type = 'date', 
+            date_format = 'YYYY-MM-DD',     
+        ),
+        end_date_outcome = patients.with_value_from_file(
             f_path = 'output/index_dates.csv.gz', 
             returning = 'end_prevax', 
             returning_type = 'date', 
diff --git a/analysis/study_definition_prevax_extf.py b/analysis/study_definition_prevax_extf.py
index 623bc453..4353b8ec 100644
--- a/analysis/study_definition_prevax_extf.py
+++ b/analysis/study_definition_prevax_extf.py
@@ -27,7 +27,7 @@
 from common_variables import generate_common_variables
 (
     dynamic_variables
-) = generate_common_variables(index_date_variable="index_date_prevax", end_date_variable="end_date_prevax")
+) = generate_common_variables(index_date_variable="index_date_cohort", exposure_end_date_variable="end_date_exposure", outcome_end_date_variable="end_date_outcome")
 
 ## Variables for deriving JCVI groups
 from grouping_variables import (
@@ -41,18 +41,24 @@
 study = StudyDefinition(
 
     # Specify study dates
-        index_date_prevax = patients.with_value_from_file(
+        index_date_cohort = patients.with_value_from_file(
             f_path = 'output/index_dates.csv.gz', 
             returning = 'index_prevax', 
             returning_type = 'date', 
             date_format = 'YYYY-MM-DD',
         ),
-        end_date_prevax = patients.with_value_from_file(
+        end_date_exposure = patients.with_value_from_file(
             f_path = 'output/index_dates.csv.gz', 
             returning = 'end_prevax', 
             returning_type = 'date', 
             date_format = 'YYYY-MM-DD',     
         ),
+        end_date_outcome = patients.with_value_from_file(
+            f_path = 'output/index_dates.csv.gz', 
+            returning = 'end_prevax_extf', 
+            returning_type = 'date', 
+            date_format = 'YYYY-MM-DD',     
+        ),
 
     # Configure the expectations framework
         default_expectations={
diff --git a/analysis/study_definition_unvax.py b/analysis/study_definition_unvax.py
index 1fb9f351..120edace 100644
--- a/analysis/study_definition_unvax.py
+++ b/analysis/study_definition_unvax.py
@@ -27,7 +27,7 @@
 from common_variables import generate_common_variables
 (
     dynamic_variables
-) = generate_common_variables(index_date_variable="index_date_unvax", end_date_variable="end_date_unvax")
+) = generate_common_variables(index_date_variable="index_date_cohort", exposure_end_date_variable="end_date_exposure", outcome_end_date_variable="end_date_outcome")
 
 ## Variables for deriving JCVI groups
 from grouping_variables import (
@@ -40,13 +40,19 @@
 study = StudyDefinition(
 
     # Specify study dates
-        index_date_unvax = patients.with_value_from_file(
+        index_date_cohort = patients.with_value_from_file(
             f_path = 'output/index_dates.csv.gz', 
             returning = 'index_unvax', 
             returning_type = 'date', 
             date_format = 'YYYY-MM-DD',     
         ),
-        end_date_unvax = patients.with_value_from_file(
+        end_date_exposure = patients.with_value_from_file(
+            f_path = 'output/index_dates.csv.gz',
+            returning = 'end_unvax',
+            returning_type = 'date', 
+            date_format = 'YYYY-MM-DD',
+        ),
+        end_date_outcome = patients.with_value_from_file(
             f_path = 'output/index_dates.csv.gz',
             returning = 'end_unvax',
             returning_type = 'date', 
diff --git a/analysis/study_definition_unvax_extf.py b/analysis/study_definition_unvax_extf.py
new file mode 100644
index 00000000..8e43a61f
--- /dev/null
+++ b/analysis/study_definition_unvax_extf.py
@@ -0,0 +1,92 @@
+# Import statements
+
+## Set seed
+import numpy as np
+np.random.seed(123456)
+
+## Cohort extractor
+from cohortextractor import (
+  StudyDefinition,
+  patients,
+  codelist_from_csv,
+  codelist,
+  filter_codes_by_category,
+  combine_codelists,
+)
+
+## Codelists from codelist.py (which pulls them from the codelist folder)
+from codelists import *
+
+## Datetime functions
+from datetime import date
+
+## Study definition helper
+import study_definition_helper_functions as helpers
+
+## Import common variables function
+from common_variables import generate_common_variables
+(
+    dynamic_variables
+) = generate_common_variables(index_date_variable="index_date_cohort", exposure_end_date_variable="end_date_exposure", outcome_end_date_variable="end_date_outcome")
+
+## Variables for deriving JCVI groups
+from grouping_variables import (
+    jcvi_variables, 
+    start_date,
+    end_date,
+    study_dates
+)
+
+study = StudyDefinition(
+
+    # Specify study dates
+        index_date_cohort = patients.with_value_from_file(
+            f_path = 'output/index_dates.csv.gz', 
+            returning = 'index_unvax', 
+            returning_type = 'date', 
+            date_format = 'YYYY-MM-DD',     
+        ),
+        end_date_exposure = patients.with_value_from_file(
+            f_path = 'output/index_dates.csv.gz',
+            returning = 'end_unvax',
+            returning_type = 'date', 
+            date_format = 'YYYY-MM-DD',
+        ),
+        end_date_outcome = patients.with_value_from_file(
+            f_path = 'output/index_dates.csv.gz',
+            returning = 'end_unvax_extf',
+            returning_type = 'date', 
+            date_format = 'YYYY-MM-DD',
+        ),
+  
+    # Configure the expectations framework
+        default_expectations={
+            "date": {"earliest": study_dates["earliest_expec"], "latest": "today"},
+            "rate": "uniform",
+            "incidence": 0.5,
+        },
+
+    # Define the study population (NB: all inclusions and exclusions are performed in stage 1)
+        population = patients.all(),
+
+    # Define sex (NB: this is required for JCVI variables hence is defined here)
+        cov_cat_sex = patients.with_value_from_file(
+            f_path = 'output/index_dates.csv.gz',
+            returning = 'cov_cat_sex',
+            returning_type = 'str',  
+        ),
+
+    # Any covid vaccination, identified by target disease
+        vax_date_covid_1 = patients.with_value_from_file(
+            f_path = 'output/index_dates.csv.gz',
+            returning = 'vax_date_covid_1',
+            returning_type = 'date'          
+        ),
+
+    # Define vaccine eligibility variables
+        **jcvi_variables, 
+
+    # Define common variables (e.g., exposures, outcomes, covariates) that require dynamic dates
+        **dynamic_variables
+
+)
\ No newline at end of file
diff --git a/analysis/study_definition_vax.py b/analysis/study_definition_vax.py
index 4f346c86..e786d138 100644
--- a/analysis/study_definition_vax.py
+++ b/analysis/study_definition_vax.py
@@ -36,19 +36,25 @@
 from common_variables import generate_common_variables
 (
     dynamic_variables
-) = generate_common_variables(index_date_variable="index_date_vax", end_date_variable="end_date_vax")
+) = generate_common_variables(index_date_variable="index_date_cohort", exposure_end_date_variable="end_date_exposure", outcome_end_date_variable="end_date_outcome")
 
 
 study = StudyDefinition(
 
     # Specify study dates
-        index_date_vax = patients.with_value_from_file(
+        index_date_cohort = patients.with_value_from_file(
             f_path = 'output/index_dates.csv.gz', 
             returning = 'index_vax', 
             returning_type = 'date', 
             date_format = 'YYYY-MM-DD',     
         ),
-        end_date_vax = patients.with_value_from_file(
+        end_date_exposure = patients.with_value_from_file(
+            f_path = 'output/index_dates.csv.gz',
+            returning = 'end_vax',
+            returning_type = 'date', 
+            date_format = 'YYYY-MM-DD',
+        ),
+        end_date_outcome = patients.with_value_from_file(
             f_path = 'output/index_dates.csv.gz',
             returning = 'end_vax',
             returning_type = 'date', 
diff --git a/analysis/table2.R b/analysis/table2.R
new file mode 100644
index 00000000..66e23817
--- /dev/null
+++ b/analysis/table2.R
@@ -0,0 +1,21 @@
+library(readr)
+library(dplyr)
+library(magrittr)
+
+# Load active analyses ---------------------------------------------------------
+
+active_analyses <- readr::read_rds("lib/active_analyses.rds")
+
+# Repeat
+
+for (i in 1:nrow(active_analyses)) {
+  
+  ## Load data -----------------------------------------------------------------
+  
+  df <- read_rds(paste0("model_input-",active_analyses$name[i],".rds"))
+  
+  ## Calculate number of events ------------------------------------------------
+  
+  
+  
+}
diff --git a/codelists/codelists.json b/codelists/codelists.json
index 22cf24da..1c3e9b34 100644
--- a/codelists/codelists.json
+++ b/codelists/codelists.json
@@ -779,6 +779,18 @@
       "url": "https://codelists.opensafely.org/codelist/bristol/anxiolytics_040102/38c710f3/",
       "downloaded_at": "2022-08-09 16:25:34.347033Z",
       "sha": "8a5cbbc840b72cd7391affd0e39ada5fe6b6029d"
+    },
+    "user-elsie_horne-cocp_dmd.csv": {
+      "id": "user/elsie_horne/cocp_dmd/1666a7a3",
+      "url": "https://codelists.opensafely.org/codelist/user/elsie_horne/cocp_dmd/1666a7a3/",
+      "downloaded_at": "2023-02-07 15:43:17.491774Z",
+      "sha": "8327c8ec4d2a3f1fcec7ffaa76a3e79cc1e5974a"
+    },
+    "user-elsie_horne-hrt_dmd.csv": {
+      "id": "user/elsie_horne/hrt_dmd/19196799",
+      "url": "https://codelists.opensafely.org/codelist/user/elsie_horne/hrt_dmd/19196799/",
+      "downloaded_at": "2023-02-07 15:43:17.735201Z",
+      "sha": "14aff592668a703f02cc68aa54c7ebab0774c62f"
     }
   }
 }
\ No newline at end of file
diff --git a/codelists/codelists.txt b/codelists/codelists.txt
index 872c8ae8..aa11a38d 100644
--- a/codelists/codelists.txt
+++ b/codelists/codelists.txt
@@ -127,4 +127,6 @@ user/RochelleKnight/confirmed-hospitalised-covid-19/1f0d2526
 bristol/antidepressant-drugs/1d04ddc8
 bristol/antipsychotic-drugs/51d68428
 bristol/opioid-dependence/6ae5f75d
-bristol/anxiolytics_040102/38c710f3
\ No newline at end of file
+bristol/anxiolytics_040102/38c710f3
+user/elsie_horne/cocp_dmd/1666a7a3
+user/elsie_horne/hrt_dmd/19196799
\ No newline at end of file
diff --git a/codelists/user-elsie_horne-cocp_dmd.csv b/codelists/user-elsie_horne-cocp_dmd.csv
new file mode 100644
index 00000000..b3339d78
--- /dev/null
+++ b/codelists/user-elsie_horne-cocp_dmd.csv
@@ -0,0 +1,92 @@
+dmd_id
+326350003
+326309006
+326361006
+4639611000001105
+21711311000001108
+3058111000001101
+208311000001105
+3049211000001104
+4608311000001102
+11758611000001104
+17346911000001108
+17353311000001100
+22263611000001104
+21933611000001104
+24676211000001100
+24678111000001104
+28006411000001100
+29910811000001108
+30195711000001108
+30252711000001104
+30805711000001104
+38335711000001104
+326310001
+326324002
+326358005
+326351004
+36062011000001104
+3546811000001107
+377360003
+524211000001108
+42111000001107
+3052511000001108
+492611000001103
+3058411000001106
+3048811000001105
+3174811000001109
+439011000001108
+11753211000001108
+16614111000001104
+17220611000001108
+17346711000001106
+17348811000001102
+17351511000001108
+18358111000001100
+21730911000001104
+21930311000001104
+23649211000001108
+24564811000001104
+24676611000001104
+24677511000001100
+24677911000001108
+24684111000001108
+24684511000001104
+27979911000001108
+29911411000001104
+30805911000001100
+33017111000001104
+34104511000001104
+34181511000001104
+38340211000001104
+326341000
+3227811000001102
+4431511000001108
+377414004
+3228611000001102
+4431211000001105
+326364003
+235311000001105
+312411000001108
+403611000001106
+380211000001105
+22562211000001104
+31364011000001104
+326369008
+3831411000001104
+3545111000001106
+3545011000001105
+3213311000001106
+3215011000001109
+3236411000001104
+4432011000001108
+3233311000001102
+3047311000001102
+17444111000001106
+15473911000001108
+15470011000001100
+22403311000001100
+22311511000001104
+15364711000001108
+15364511000001104
+36602211000001104
diff --git a/codelists/user-elsie_horne-hrt_dmd.csv b/codelists/user-elsie_horne-hrt_dmd.csv
new file mode 100644
index 00000000..e32cc2b8
--- /dev/null
+++ b/codelists/user-elsie_horne-hrt_dmd.csv
@@ -0,0 +1,186 @@
+dmd_id
+32936711000001100
+32927411000001104
+325480003
+734211000001107
+526411000001104
+692011000001108
+325481004
+4111411000001103
+4111811000001101
+4111611000001100
+325482006
+3962511000001105
+3963211000001101
+3962811000001108
+11738011000001104
+14778411000001108
+11733811000001108
+8794111000001104
+8752311000001103
+325541008
+3773511000001101
+3774411000001102
+3773711000001106
+27322111000001108
+325546003
+3348711000001103
+3365411000001107
+3454911000001101
+3447411000001102
+10276811000001100
+325577001
+36064911000001104
+325545004
+3346811000001105
+3367011000001105
+3448611000001100
+36065111000001112
+36065511000001104
+36065011000001104
+3664211000001102
+3664111000001108
+36064811000001104
+36065411000001104
+38344311000001104
+2942911000001101
+2889311000001109
+2889611000001104
+2945011000001105
+2937111000001101
+2911111000001100
+2838411000001101
+2842511000001103
+2942311000001102
+2889111000001107
+2903111000001101
+2936011000001100
+3414911000001105
+2948411000001106
+2902911000001105
+2938411000001105
+2949711000001109
+2939611000001104
+2841811000001109
+2837211000001104
+2890211000001107
+2893411000001103
+3657811000001104
+3658611000001104
+9045511000001100
+9045711000001106
+9044911000001108
+9045111000001104
+9045311000001108
+10276611000001104
+38268911000001104
+293111000001101
+3449411000001106
+325505008
+15466311000001108
+3351911000001108
+3355711000001108
+325533008
+3196511000001106
+24659611000001108
+4522411000001109
+3350611000001107
+3359911000001107
+3354111000001108
+2846111000001106
+2845411000001105
+2845811000001107
+2845111000001100
+3404911000001108
+325648008
+3455211000001106
+3448111000001108
+3557411000001105
+3542811000001106
+3542711000001103
+3465411000001100
+3465311000001107
+36065211000001104
+3542611000001107
+325662001
+3369311000001103
+3788211000001104
+3864111000001104
+3788311000001107
+4508511000001104
+4725811000001103
+4339811000001101
+8801211000001108
+21366211000001108
+3351311000001107
+3199011000001109
+3049911000001108
+3358411000001107
+3363611000001106
+3216611000001107
+3217311000001104
+3043111000001107
+3355511000001103
+3349811000001104
+3346111000001103
+3347911000001102
+519311000001107
+21259311000001108
+3040311000001103
+3455511000001109
+3367311000001108
+3049511000001101
+3038811000001109
+3780211000001102
+3853711000001105
+3779911000001106
+4499111000001107
+4711811000001109
+4338711000001100
+8787011000001107
+10280511000001108
+10277411000001100
+325556004
+3456411000001101
+325557008
+7142211000001109
+409118006
+34911000001102
+546511000001102
+11476811000001108
+400674006
+3341011000001106
+409322009
+15621411000001104
+3470811000001103
+557911000001109
+3456711000001107
+679511000001100
+7340311000001105
+326075007
+22567411000001108
+24559611000001104
+21960811000001104
+38744511000001104
+34444711000001100
+22517711000001104
+30086311000001108
+28996211000001108
+30863211000001104
+33612911000001100
+24110811000001108
+33971011000001100
+22109311000001100
+239811000001103
+22358811000001100
+24676811000001104
+32460211000001100
+325568008
+24418911000001104
+38039411000001104
+37088911000001104
+24195511000001100
+37241411000001104
+37825511000001104
+37363111000001104
+521411000001105
diff --git a/project.yaml b/project.yaml
index b55de25e..ab5c69df 100644
--- a/project.yaml
+++ b/project.yaml
@@ -91,6 +91,18 @@ actions:
       highly_sensitive:
         cohort: output/input_unvax.csv.gz
 
+  ## Implement study_definition for unvax_extf 
+
+  generate_study_population_unvax_extf:
+    run: cohortextractor:latest generate_cohort --study-definition study_definition_unvax_extf
+      --output-format csv.gz
+    needs:
+    - vax_eligibility_inputs
+    - generate_index_dates
+    outputs:
+      highly_sensitive:
+        cohort: output/input_unvax_extf.csv.gz
+
   ## Preprocess data - prevax 
 
   preprocess_data_prevax:
@@ -151,6 +163,21 @@ actions:
         cohort: output/input_unvax.rds
         venn: output/venn_unvax.rds
 
+  ## Preprocess data - unvax_extf 
+
+  preprocess_data_unvax_extf:
+    run: r:latest analysis/preprocess_data.R unvax_extf
+    needs:
+    - generate_index_dates
+    - generate_study_population_unvax_extf
+    outputs:
+      moderately_sensitive:
+        describe: output/describe_input_unvax_extf_stage0.txt
+        describe_venn: output/describe_venn_unvax_extf.txt
+      highly_sensitive:
+        cohort: output/input_unvax_extf.rds
+        venn: output/venn_unvax_extf.rds
+
   ## Data cleaning - all cohorts 
 
   stage1_data_cleaning_all:
@@ -160,7 +187,7 @@ actions:
     - preprocess_data_prevax_extf
     - preprocess_data_vax
     - preprocess_data_unvax
-    - vax_eligibility_inputs
+    - preprocess_data_unvax_extf
     outputs:
       moderately_sensitive:
         refactoring: output/meta_data_factors_*.csv
@@ -170,38 +197,6 @@ actions:
       highly_sensitive:
         cohort: output/input_*.rds
 
-  describe_file-input_prevax_stage1:
-    run: r:latest analysis/describe_file.R input_prevax_stage1 rds
-    needs:
-    - stage1_data_cleaning_all
-    outputs:
-      moderately_sensitive:
-        describe_model_input: output/describe-input_prevax_stage1.txt
-
-  describe_file-input_prevax_extf_stage1:
-    run: r:latest analysis/describe_file.R input_prevax_extf_stage1 rds
-    needs:
-    - stage1_data_cleaning_all
-    outputs:
-      moderately_sensitive:
-        describe_model_input: output/describe-input_prevax_extf_stage1.txt
-
-  describe_file-input_vax_stage1:
-    run: r:latest analysis/describe_file.R input_vax_stage1 rds
-    needs:
-    - stage1_data_cleaning_all
-    outputs:
-      moderately_sensitive:
-        describe_model_input: output/describe-input_vax_stage1.txt
-
-  describe_file-input_unvax_stage1:
-    run: r:latest analysis/describe_file.R input_unvax_stage1 rds
-    needs:
-    - stage1_data_cleaning_all
-    outputs:
-      moderately_sensitive:
-        describe_model_input: output/describe-input_unvax_stage1.txt
-
   ## Stage 5 - Run models 
 
   make_model_input-cohort_prevax-main-addiction: