diff --git a/analysis/codelists.py b/analysis/codelists.py index cf7c97df..bceeb96b 100644 --- a/analysis/codelists.py +++ b/analysis/codelists.py @@ -746,4 +746,18 @@ anxiety_icd10, ocd_icd10, ptsd_icd10 +) + +# COCP +cocp_dmd = codelist_from_csv( + "codelists/user-elsie_horne-cocp_dmd.csv", + system="snomed", + column="dmd_id", +) + +# HRT +hrt_dmd = codelist_from_csv( + "codelists/user-elsie_horne-hrt_dmd.csv", + system="snomed", + column="dmd_id", ) \ No newline at end of file diff --git a/analysis/common_variables.py b/analysis/common_variables.py index d31c8740..df2e1377 100644 --- a/analysis/common_variables.py +++ b/analysis/common_variables.py @@ -28,7 +28,7 @@ pandemic_start = study_dates["pandemic_start"] # Define common variables function -def generate_common_variables(index_date_variable,end_date_variable): +def generate_common_variables(index_date_variable,exposure_end_date_variable,outcome_end_date_variable): dynamic_variables = dict( @@ -43,7 +43,7 @@ def generate_common_variables(index_date_variable,end_date_variable): returning="date", find_first_match_in_period=True, date_format="YYYY-MM-DD", - between=[f"{index_date_variable}",f"{end_date_variable}"], + between=[f"{index_date_variable}",f"{exposure_end_date_variable}"], return_expectations={ "date": {"earliest": study_dates["pandemic_start"], "latest" : "today"}, "rate": "uniform", @@ -59,7 +59,7 @@ def generate_common_variables(index_date_variable,end_date_variable): covid_primary_care_sequalae, ), returning="date", - between=[f"{index_date_variable}",f"{end_date_variable}"], + between=[f"{index_date_variable}",f"{exposure_end_date_variable}"], date_format="YYYY-MM-DD", find_first_match_in_period=True, return_expectations={ @@ -73,7 +73,7 @@ def generate_common_variables(index_date_variable,end_date_variable): tmp_exp_date_covid19_confirmed_hes=patients.admitted_to_hospital( with_these_diagnoses=covid_codes, returning="date_admitted", - between=[f"{index_date_variable}",f"{end_date_variable}"], + between=[f"{index_date_variable}",f"{exposure_end_date_variable}"], date_format="YYYY-MM-DD", find_first_match_in_period=True, return_expectations={ @@ -87,7 +87,7 @@ def generate_common_variables(index_date_variable,end_date_variable): tmp_exp_date_covid19_confirmed_death=patients.with_these_codes_on_death_certificate( covid_codes, returning="date_of_death", - between=[f"{index_date_variable}",f"{end_date_variable}"], + between=[f"{index_date_variable}",f"{exposure_end_date_variable}"], match_only_underlying_cause=True, date_format="YYYY-MM-DD", return_expectations={ @@ -125,7 +125,7 @@ def generate_common_variables(index_date_variable,end_date_variable): ## Deregistraton date dereg_date=patients.date_deregistered_from_all_supported_practices( - between=[f"{index_date_variable}",f"{end_date_variable}"], + between=[f"{index_date_variable}",f"{outcome_end_date_variable}"], date_format = 'YYYY-MM-DD', return_expectations={ "date": {"earliest": study_dates["pandemic_start"], "latest": "today"}, @@ -194,7 +194,7 @@ def generate_common_variables(index_date_variable,end_date_variable): tmp_out_date_depression_snomed=patients.with_these_clinical_events( depression_snomed_clinical, returning="date", - between=[f"{index_date_variable}",f"{end_date_variable}"], + between=[f"{index_date_variable}",f"{outcome_end_date_variable}"], date_format="YYYY-MM-DD", find_first_match_in_period=True, return_expectations={ @@ -208,7 +208,7 @@ def generate_common_variables(index_date_variable,end_date_variable): tmp_out_date_depression_hes=patients.admitted_to_hospital( returning="date_admitted", with_these_diagnoses=depression_icd10, - between=[f"{index_date_variable}",f"{end_date_variable}"], + between=[f"{index_date_variable}",f"{outcome_end_date_variable}"], date_format="YYYY-MM-DD", find_first_match_in_period=True, return_expectations={ @@ -222,7 +222,7 @@ def generate_common_variables(index_date_variable,end_date_variable): tmp_out_date_depression_death=patients.with_these_codes_on_death_certificate( depression_icd10, returning="date_of_death", - between=[f"{index_date_variable}",f"{end_date_variable}"], + between=[f"{index_date_variable}",f"{outcome_end_date_variable}"], match_only_underlying_cause=True, date_format="YYYY-MM-DD", return_expectations={ @@ -243,7 +243,7 @@ def generate_common_variables(index_date_variable,end_date_variable): tmp_out_date_anxiety_general_snomed=patients.with_these_clinical_events( anxiety_general_snomed_clinical, returning="date", - between=[f"{index_date_variable}",f"{end_date_variable}"], + between=[f"{index_date_variable}",f"{outcome_end_date_variable}"], date_format="YYYY-MM-DD", find_first_match_in_period=True, return_expectations={ @@ -257,7 +257,7 @@ def generate_common_variables(index_date_variable,end_date_variable): tmp_out_date_anxiety_general_hes=patients.admitted_to_hospital( returning="date_admitted", with_these_diagnoses=anxiety_icd10, - between=[f"{index_date_variable}",f"{end_date_variable}"], + between=[f"{index_date_variable}",f"{outcome_end_date_variable}"], date_format="YYYY-MM-DD", find_first_match_in_period=True, return_expectations={ @@ -271,7 +271,7 @@ def generate_common_variables(index_date_variable,end_date_variable): tmp_out_date_anxiety_general_death=patients.with_these_codes_on_death_certificate( anxiety_icd10, returning="date_of_death", - between=[f"{index_date_variable}",f"{end_date_variable}"], + between=[f"{index_date_variable}",f"{outcome_end_date_variable}"], match_only_underlying_cause=True, date_format="YYYY-MM-DD", return_expectations={ @@ -292,7 +292,7 @@ def generate_common_variables(index_date_variable,end_date_variable): tmp_out_date_anxiety_ptsd_snomed=patients.with_these_clinical_events( anxiety_ptsd_snomed_clinical, returning="date", - between=[f"{index_date_variable}",f"{end_date_variable}"], + between=[f"{index_date_variable}",f"{outcome_end_date_variable}"], date_format="YYYY-MM-DD", find_first_match_in_period=True, return_expectations={ @@ -306,7 +306,7 @@ def generate_common_variables(index_date_variable,end_date_variable): tmp_out_date_anxiety_ptsd_hes=patients.admitted_to_hospital( returning="date_admitted", with_these_diagnoses=ptsd_icd10, - between=[f"{index_date_variable}",f"{end_date_variable}"], + between=[f"{index_date_variable}",f"{outcome_end_date_variable}"], date_format="YYYY-MM-DD", find_first_match_in_period=True, return_expectations={ @@ -320,7 +320,7 @@ def generate_common_variables(index_date_variable,end_date_variable): tmp_out_date_anxiety_ptsd_death=patients.with_these_codes_on_death_certificate( ptsd_icd10, returning="date_of_death", - between=[f"{index_date_variable}",f"{end_date_variable}"], + between=[f"{index_date_variable}",f"{outcome_end_date_variable}"], match_only_underlying_cause=True, date_format="YYYY-MM-DD", return_expectations={ @@ -341,7 +341,7 @@ def generate_common_variables(index_date_variable,end_date_variable): tmp_out_date_eating_disorders_snomed=patients.with_these_clinical_events( eating_disorders_snomed_clinical, returning="date", - between=[f"{index_date_variable}",f"{end_date_variable}"], + between=[f"{index_date_variable}",f"{outcome_end_date_variable}"], date_format="YYYY-MM-DD", find_first_match_in_period=True, return_expectations={ @@ -355,7 +355,7 @@ def generate_common_variables(index_date_variable,end_date_variable): tmp_out_date_eating_disorders_hes=patients.admitted_to_hospital( returning="date_admitted", with_these_diagnoses=eating_disorder_icd10, - between=[f"{index_date_variable}",f"{end_date_variable}"], + between=[f"{index_date_variable}",f"{outcome_end_date_variable}"], date_format="YYYY-MM-DD", find_first_match_in_period=True, return_expectations={ @@ -369,7 +369,7 @@ def generate_common_variables(index_date_variable,end_date_variable): tmp_out_date_eating_disorders_death=patients.with_these_codes_on_death_certificate( eating_disorder_icd10, returning="date_of_death", - between=[f"{index_date_variable}",f"{end_date_variable}"], + between=[f"{index_date_variable}",f"{outcome_end_date_variable}"], match_only_underlying_cause=True, date_format="YYYY-MM-DD", return_expectations={ @@ -390,7 +390,7 @@ def generate_common_variables(index_date_variable,end_date_variable): tmp_out_date_serious_mental_illness_snomed=patients.with_these_clinical_events( serious_mental_illness_snomed_clinical, returning="date", - between=[f"{index_date_variable}",f"{end_date_variable}"], + between=[f"{index_date_variable}",f"{outcome_end_date_variable}"], date_format="YYYY-MM-DD", find_first_match_in_period=True, return_expectations={ @@ -404,7 +404,7 @@ def generate_common_variables(index_date_variable,end_date_variable): tmp_out_date_serious_mental_illness_hes=patients.admitted_to_hospital( returning="date_admitted", with_these_diagnoses=serious_mental_illness_icd10, - between=[f"{index_date_variable}",f"{end_date_variable}"], + between=[f"{index_date_variable}",f"{outcome_end_date_variable}"], date_format="YYYY-MM-DD", find_first_match_in_period=True, return_expectations={ @@ -418,7 +418,7 @@ def generate_common_variables(index_date_variable,end_date_variable): tmp_out_date_serious_mental_illness_death=patients.with_these_codes_on_death_certificate( serious_mental_illness_icd10, returning="date_of_death", - between=[f"{index_date_variable}",f"{end_date_variable}"], + between=[f"{index_date_variable}",f"{outcome_end_date_variable}"], match_only_underlying_cause=True, date_format="YYYY-MM-DD", return_expectations={ @@ -439,7 +439,7 @@ def generate_common_variables(index_date_variable,end_date_variable): tmp_out_date_self_harm_snomed=patients.with_these_clinical_events( self_harm_15_10_combined_snomed, returning="date", - between=[f"{index_date_variable}",f"{end_date_variable}"], + between=[f"{index_date_variable}",f"{outcome_end_date_variable}"], date_format="YYYY-MM-DD", find_first_match_in_period=True, return_expectations={ @@ -453,7 +453,7 @@ def generate_common_variables(index_date_variable,end_date_variable): tmp_out_date_self_harm_hes=patients.admitted_to_hospital( returning="date_admitted", with_these_diagnoses=self_harm_15_10_combined_icd, - between=[f"{index_date_variable}",f"{end_date_variable}"], + between=[f"{index_date_variable}",f"{outcome_end_date_variable}"], date_format="YYYY-MM-DD", find_first_match_in_period=True, return_expectations={ @@ -467,7 +467,7 @@ def generate_common_variables(index_date_variable,end_date_variable): tmp_out_date_self_harm_death=patients.with_these_codes_on_death_certificate( self_harm_15_10_combined_icd, returning="date_of_death", - between=[f"{index_date_variable}",f"{end_date_variable}"], + between=[f"{index_date_variable}",f"{outcome_end_date_variable}"], match_only_underlying_cause=True, date_format="YYYY-MM-DD", return_expectations={ @@ -488,7 +488,7 @@ def generate_common_variables(index_date_variable,end_date_variable): tmp_out_date_suicide_hes=patients.admitted_to_hospital( returning="date_admitted", with_these_diagnoses=suicide_icd10, - between=[f"{index_date_variable}",f"{end_date_variable}"], + between=[f"{index_date_variable}",f"{outcome_end_date_variable}"], date_format="YYYY-MM-DD", find_first_match_in_period=True, return_expectations={ @@ -502,7 +502,7 @@ def generate_common_variables(index_date_variable,end_date_variable): tmp_out_date_suicide_death=patients.with_these_codes_on_death_certificate( suicide_icd10, returning="date_of_death", - between=[f"{index_date_variable}",f"{end_date_variable}"], + between=[f"{index_date_variable}",f"{outcome_end_date_variable}"], match_only_underlying_cause=True, date_format="YYYY-MM-DD", return_expectations={ @@ -523,7 +523,7 @@ def generate_common_variables(index_date_variable,end_date_variable): tmp_out_date_addiction_snomed=patients.with_these_clinical_events( addiction_snomed_clinical, returning="date", - between=[f"{index_date_variable}",f"{end_date_variable}"], + between=[f"{index_date_variable}",f"{outcome_end_date_variable}"], date_format="YYYY-MM-DD", find_first_match_in_period=True, return_expectations={ @@ -537,7 +537,7 @@ def generate_common_variables(index_date_variable,end_date_variable): tmp_out_date_addiction_hes=patients.admitted_to_hospital( returning="date_admitted", with_these_diagnoses=opioid_misuse_icd10, - between=[f"{index_date_variable}",f"{end_date_variable}"], + between=[f"{index_date_variable}",f"{outcome_end_date_variable}"], date_format="YYYY-MM-DD", find_first_match_in_period=True, return_expectations={ @@ -551,7 +551,7 @@ def generate_common_variables(index_date_variable,end_date_variable): tmp_out_date_addiction_death=patients.with_these_codes_on_death_certificate( opioid_misuse_icd10, returning="date_of_death", - between=[f"{index_date_variable}",f"{end_date_variable}"], + between=[f"{index_date_variable}",f"{outcome_end_date_variable}"], match_only_underlying_cause=True, date_format="YYYY-MM-DD", return_expectations={ @@ -1307,6 +1307,26 @@ def generate_common_variables(index_date_variable,end_date_variable): }, ), + ## HRT or COCP + + tmp_cocp=patients.with_these_medications( + cocp_dmd, + returning='binary_flag', + on_or_before=f"{index_date_variable}", + return_expectations={"incidence": 0.1}, + ), + + tmp_hrt=patients.with_these_medications( + hrt_dmd, + returning='binary_flag', + on_or_before=f"{index_date_variable}", + return_expectations={"incidence": 0.1}, + ), + + qa_bin_hrtcocp=patients.maximum_of( + "tmp_cocp", "tmp_hrt" + ), + ) return dynamic_variables diff --git a/analysis/create_project_actions.R b/analysis/create_project_actions.R index 4a11c6b5..fadce510 100644 --- a/analysis/create_project_actions.R +++ b/analysis/create_project_actions.R @@ -237,6 +237,17 @@ actions_list <- splice( ) ), + comment("Implement study_definition for unvax_extf"), + + action( + name = "generate_study_population_unvax_extf", + run = "cohortextractor:latest generate_cohort --study-definition study_definition_unvax_extf --output-format csv.gz", + needs = list("vax_eligibility_inputs","generate_index_dates"), + highly_sensitive = list( + cohort = glue("output/input_unvax_extf.csv.gz") + ) + ), + comment("Preprocess data - prevax"), action( @@ -301,12 +312,28 @@ actions_list <- splice( ) ), + comment("Preprocess data - unvax_extf"), + + action( + name = "preprocess_data_unvax_extf", + run = "r:latest analysis/preprocess_data.R unvax_extf", + needs = list("generate_index_dates", "generate_study_population_unvax_extf"), + moderately_sensitive = list( + describe = glue("output/describe_input_unvax_extf_stage0.txt"), + describe_venn = glue("output/describe_venn_unvax_extf.txt") + ), + highly_sensitive = list( + cohort = glue("output/input_unvax_extf.rds"), + venn = glue("output/venn_unvax_extf.rds") + ) + ), + comment("Data cleaning - all cohorts"), action( name = "stage1_data_cleaning_all", run = "r:latest analysis/stage1_data_cleaning.R all", - needs = list("preprocess_data_prevax","preprocess_data_prevax_extf","preprocess_data_vax", "preprocess_data_unvax","vax_eligibility_inputs"), + needs = list("preprocess_data_prevax","preprocess_data_prevax_extf","preprocess_data_vax", "preprocess_data_unvax", "preprocess_data_unvax_extf"), moderately_sensitive = list( refactoring = glue("output/meta_data_factors_*.csv"), QA_rules = glue("output/QA_summary_*.csv"), @@ -318,41 +345,41 @@ actions_list <- splice( ) ), - action( - name = glue("describe_file-input_prevax_stage1"), - run = glue("r:latest analysis/describe_file.R input_prevax_stage1 rds"), - needs = list("stage1_data_cleaning_all"), - moderately_sensitive = list( - describe_model_input = glue("output/describe-input_prevax_stage1.txt") - ) - ), - - action( - name = glue("describe_file-input_prevax_extf_stage1"), - run = glue("r:latest analysis/describe_file.R input_prevax_extf_stage1 rds"), - needs = list("stage1_data_cleaning_all"), - moderately_sensitive = list( - describe_model_input = glue("output/describe-input_prevax_extf_stage1.txt") - ) - ), - - action( - name = glue("describe_file-input_vax_stage1"), - run = glue("r:latest analysis/describe_file.R input_vax_stage1 rds"), - needs = list("stage1_data_cleaning_all"), - moderately_sensitive = list( - describe_model_input = glue("output/describe-input_vax_stage1.txt") - ) - ), - - action( - name = glue("describe_file-input_unvax_stage1"), - run = glue("r:latest analysis/describe_file.R input_unvax_stage1 rds"), - needs = list("stage1_data_cleaning_all"), - moderately_sensitive = list( - describe_model_input = glue("output/describe-input_unvax_stage1.txt") - ) - ), + # action( + # name = glue("describe_file-input_prevax_stage1"), + # run = glue("r:latest analysis/describe_file.R input_prevax_stage1 rds"), + # needs = list("stage1_data_cleaning_all"), + # moderately_sensitive = list( + # describe_model_input = glue("output/describe-input_prevax_stage1.txt") + # ) + # ), + # + # action( + # name = glue("describe_file-input_prevax_extf_stage1"), + # run = glue("r:latest analysis/describe_file.R input_prevax_extf_stage1 rds"), + # needs = list("stage1_data_cleaning_all"), + # moderately_sensitive = list( + # describe_model_input = glue("output/describe-input_prevax_extf_stage1.txt") + # ) + # ), + # + # action( + # name = glue("describe_file-input_vax_stage1"), + # run = glue("r:latest analysis/describe_file.R input_vax_stage1 rds"), + # needs = list("stage1_data_cleaning_all"), + # moderately_sensitive = list( + # describe_model_input = glue("output/describe-input_vax_stage1.txt") + # ) + # ), + # + # action( + # name = glue("describe_file-input_unvax_stage1"), + # run = glue("r:latest analysis/describe_file.R input_unvax_stage1 rds"), + # needs = list("stage1_data_cleaning_all"), + # moderately_sensitive = list( + # describe_model_input = glue("output/describe-input_unvax_stage1.txt") + # ) + # ), # comment("Stage 2 - Missing - Table 1 - all cohorts"), # diff --git a/analysis/make_model_input.R b/analysis/make_model_input.R index c36f8c3e..6ef34e56 100644 --- a/analysis/make_model_input.R +++ b/analysis/make_model_input.R @@ -55,7 +55,8 @@ for (i in 1:nrow(active_analyses)) { input <- input[,unique(c("patient_id", "index_date", - "end_date", + "end_date_exposure", + "end_date_outcome", active_analyses$exposure[i], active_analyses$outcome[i], unlist(strsplit(active_analyses$strata[i], split = ";")), @@ -75,8 +76,8 @@ for (i in 1:nrow(active_analyses)) { "exp_date" = active_analyses$exposure[i]) input <- input %>% - dplyr::mutate(out_date = replace(out_date, which(out_date>end_date | out_dateend_date | exp_dateend_date_outcome | out_dateend_date_exposure | exp_date% dplyr::rowwise() %>% - dplyr::mutate(end_date = min(end_date, out_date, na.rm = TRUE)) - - # # Make three level history covariates ---------------------------------------- - # print('Make three level history covariates') - # - # input$cov_cat_priorhistory_depression <- dplyr::case_when( - # input$cov_bin_history_depression==TRUE & input$cov_bin_recent_depression==TRUE ~ "recent", - # input$cov_bin_history_depression==TRUE & input$cov_bin_recent_depression==FALSE ~ "notrecent", - # input$cov_bin_history_depression==FALSE & input$cov_bin_recent_depression==TRUE ~ "recent", - # input$cov_bin_history_depression==FALSE & input$cov_bin_recent_depression==FALSE ~ "none") - # input[,c("cov_bin_history_depression","cov_bin_recent_depression")] <- NULL - # input$cov_cat_priorhistory_depression <- as.factor(input$cov_cat_priorhistory_depression) - # - # input$cov_cat_priorhistory_anxiety_general <- dplyr::case_when( - # input$cov_bin_history_anxiety==TRUE & input$cov_bin_recent_anxiety==TRUE ~ "recent", - # input$cov_bin_history_anxiety==TRUE & input$cov_bin_recent_anxiety==FALSE ~ "notrecent", - # input$cov_bin_history_anxiety==FALSE & input$cov_bin_recent_anxiety==TRUE ~ "recent", - # input$cov_bin_history_anxiety==FALSE & input$cov_bin_recent_anxiety==FALSE ~ "none") - # input[,c("cov_bin_history_anxiety","cov_bin_recent_anxiety")] <- NULL - # input$cov_cat_priorhistory_anxiety_general <- as.factor(input$cov_cat_priorhistory_anxiety_general) - # - # input$cov_cat_priorhistory_eating_disorders <- dplyr::case_when( - # input$cov_bin_history_eating_disorders==TRUE & input$cov_bin_recent_eating_disorders==TRUE ~ "recent", - # input$cov_bin_history_eating_disorders==TRUE & input$cov_bin_recent_eating_disorders==FALSE ~ "notrecent", - # input$cov_bin_history_eating_disorders==FALSE & input$cov_bin_recent_eating_disorders==TRUE ~ "recent", - # input$cov_bin_history_eating_disorders==FALSE & input$cov_bin_recent_eating_disorders==FALSE ~ "none") - # input[,c("cov_bin_history_eating_disorders","cov_bin_recent_eating_disorders")] <- NULL - # input$cov_cat_priorhistory_eating_disorders <- as.factor(input$cov_cat_priorhistory_eating_disorders) - # - # input$cov_cat_priorhistory_serious_mental_illness <- dplyr::case_when( - # input$cov_bin_history_serious_mental_illness==TRUE & input$cov_bin_recent_serious_mental_illness==TRUE ~ "recent", - # input$cov_bin_history_serious_mental_illness==TRUE & input$cov_bin_recent_serious_mental_illness==FALSE ~ "notrecent", - # input$cov_bin_history_serious_mental_illness==FALSE & input$cov_bin_recent_serious_mental_illness==TRUE ~ "recent", - # input$cov_bin_history_serious_mental_illness==FALSE & input$cov_bin_recent_serious_mental_illness==FALSE ~ "none") - # input[,c("cov_bin_history_serious_mental_illness","cov_bin_recent_serious_mental_illness")] <- NULL - # input$cov_cat_priorhistory_serious_mental_illness <- as.factor(input$cov_cat_priorhistory_serious_mental_illness) - # - # input$cov_cat_priorhistory_self_harm <- dplyr::case_when( - # input$cov_bin_history_self_harm==TRUE & input$cov_bin_recent_self_harm==TRUE ~ "recent", - # input$cov_bin_history_self_harm==TRUE & input$cov_bin_recent_self_harm==FALSE ~ "notrecent", - # input$cov_bin_history_self_harm==FALSE & input$cov_bin_recent_self_harm==TRUE ~ "recent", - # input$cov_bin_history_self_harm==FALSE & input$cov_bin_recent_self_harm==FALSE ~ "none") - # input[,c("cov_bin_history_self_harm","cov_bin_recent_self_harm")] <- NULL - # input$cov_cat_priorhistory_self_harm <- as.factor(input$cov_cat_priorhistory_self_harm) - + dplyr::mutate(end_date_outcome = min(end_date_outcome, out_date, na.rm = TRUE)) + # Make model input: main ------------------------------------------------------- if (active_analyses$analysis[i]=="main") { @@ -155,11 +113,11 @@ for (i in 1:nrow(active_analyses)) { df <- input[input$sub_bin_covid19_confirmed_history==FALSE,] df <- df %>% - dplyr::mutate(end_date = replace(end_date, which(sub_cat_covid19_hospital=="non_hospitalised"), exp_date-1), + dplyr::mutate(end_date_outcome = replace(end_date_outcome, which(sub_cat_covid19_hospital=="non_hospitalised"), exp_date-1), exp_date = replace(exp_date, which(sub_cat_covid19_hospital=="non_hospitalised"), NA), - out_date = replace(out_date, which(out_date>end_date), NA)) + out_date = replace(out_date, which(out_date>end_date_outcome), NA)) - df <- df[df$end_date>=df$index_date,] + df <- df[df$end_date_outcome>=df$index_date,] df[,colnames(df)[grepl("sub_",colnames(df))]] <- NULL @@ -179,11 +137,11 @@ for (i in 1:nrow(active_analyses)) { df <- input[input$sub_bin_covid19_confirmed_history==FALSE,] df <- df %>% - dplyr::mutate(end_date = replace(end_date, which(sub_cat_covid19_hospital=="hospitalised"), exp_date-1), + dplyr::mutate(end_date_outcome = replace(end_date_outcome, which(sub_cat_covid19_hospital=="hospitalised"), exp_date-1), exp_date = replace(exp_date, which(sub_cat_covid19_hospital=="hospitalised"), NA), - out_date = replace(out_date, which(out_date>end_date), NA)) + out_date = replace(out_date, which(out_date>end_date_outcome), NA)) - df <- df[df$end_date>=df$index_date,] + df <- df[df$end_date_outcome>=df$index_date,] df$index_date <- as.Date(df$index_date) df[,colnames(df)[grepl("sub_",colnames(df))]] <- NULL diff --git a/analysis/prelim.R b/analysis/prelim.R index 770887fb..8b0945c9 100644 --- a/analysis/prelim.R +++ b/analysis/prelim.R @@ -20,15 +20,17 @@ prelim_data <- prelim_data %>% unit = "days"))) %>% mutate(vax_date_covid_2_offset = vax_date_covid_2 + days(efficacy_offset), vax_date_eligible_offset = vax_date_eligible + days(eligibility_offset), - index_prevax = as.Date(study_dates$pandemic_start)) %>% + index_prevax = as.Date(study_dates$pandemic_start), + index_prevax_extf = as.Date(study_dates$pandemic_start)) %>% rowwise() %>% mutate(index_vax = max(c(vax_date_covid_2_offset, delta_date), na.rm=T), index_unvax = max(c(vax_date_eligible_offset, delta_date), na.rm=T), + index_unvax_extf = max(c(vax_date_eligible_offset, delta_date), na.rm=T), end_vax = min(c(death_date, delta_end_date), na.rm=T), - end_unvax = min(c(death_date, delta_end_date), na.rm=T), - end_prevax = min(c(vax_date_eligible,death_date, vax_date_covid_1, all_eligible_date), na.rm=T), - end_prevax_exf = min(c(death_date, vax_date_covid_1, na.rm=T))) - + end_unvax = min(c(death_date, delta_end_date, vax_date_covid_1), na.rm=T), + end_unvax_extf = min(c(death_date, delta_end_date), na.rm=T), + end_prevax = min(c(vax_date_eligible, death_date, vax_date_covid_1, all_eligible_date), na.rm=T), + end_prevax_extf = min(c(death_date, delta_end_date), na.rm=T)) #Write data to csv file write_csv(prelim_data, "output/index_dates.csv.gz") \ No newline at end of file diff --git a/analysis/preprocess_data.R b/analysis/preprocess_data.R index 8859ebbc..cbab3497 100644 --- a/analysis/preprocess_data.R +++ b/analysis/preprocess_data.R @@ -45,7 +45,7 @@ df <- df %>% if(Sys.getenv("OPENSAFELY_BACKEND") %in% c("", "expectations") && cohort_name %in% c("vax")) { - source("analysis/preprocess/modify_dummy_vax_data.R") + source("analysis/modify_dummy_vax_data.R") message("Vaccine information overwritten successfully") } diff --git a/analysis/stage1_data_cleaning.R b/analysis/stage1_data_cleaning.R index fcb094eb..98d2f058 100644 --- a/analysis/stage1_data_cleaning.R +++ b/analysis/stage1_data_cleaning.R @@ -15,26 +15,26 @@ library(arrow) args <- commandArgs(trailingOnly=TRUE) if(length(args)==0){ - cohort_name <- "prevax" + cohort_name <- "unvax" } else { cohort_name <- args[[1]] } -# Load json file containing vax study dates ------------------------------------ - -study_dates <- fromJSON("output/study_dates.json") - -# Specify relevant dates ------------------------------------------------------- - -vax_start_date <- as.Date(study_dates$vax1_earliest, format="%Y-%m-%d") -mixed_vax_threshold <- as.Date("2021-05-07") -start_date_delta <- as.Date(study_dates$delta_date, format="%Y-%m-%d") -end_date_delta <- as.Date(study_dates$omicron_date, format="%Y-%m-%d") - # Define stage 1 function ------------------------------------------------------ stage1 <- function(cohort_name) { + # Load json file containing vax study dates ------------------------------------ + + study_dates <- fromJSON("output/study_dates.json") + + # Specify relevant dates ----------------------------------------------------- + + vax_start_date <- as.Date(study_dates$vax1_earliest, format="%Y-%m-%d") + mixed_vax_threshold <- as.Date("2021-05-07") + start_date_delta <- as.Date(study_dates$delta_date, format="%Y-%m-%d") + end_date_delta <- as.Date(study_dates$omicron_date, format="%Y-%m-%d") + ## Load cohort data ---------------------------------------------------------- input <- read_rds(file.path("output", paste0("input_",cohort_name,".rds"))) @@ -42,9 +42,7 @@ stage1 <- function(cohort_name) { ## Rename date variables ----------------------------------------------------- - input <- input %>% - rename(index_date =!!sym(paste0("index_date_",cohort_name))) %>% - rename(end_date = !!sym(paste0("end_date_",cohort_name))) + input <- dplyr::rename(input, "index_date" = "index_date_cohort") ## Handle missing values ----------------------------------------------------- @@ -157,10 +155,8 @@ stage1 <- function(cohort_name) { ### Rule 5: HRT or COCP meds for men - input$rule5 <- FALSE - # input$rule5 <- NA - # input$rule5 <- ((input$cov_cat_sex=="Male" & input$qa_bin_hrt==TRUE) | - # (input$cov_cat_sex=="Male" & input$qa_bin_cocp==TRUE)) + input$rule5 <- NA + input$rule5 <- (input$cov_cat_sex=="Male" & input$qa_bin_hrtcocp==TRUE) ### Rule 6: Prostate cancer codes for women @@ -325,7 +321,7 @@ stage1 <- function(cohort_name) { input <- input %>% filter (!is.na(index_date) & index_date <= end_date & index_date >= start_date_delta) cohort_flow[nrow(cohort_flow)+1,] <- c(nrow(input),as.numeric(cohort_flow[nrow(cohort_flow),"N"]) - nrow(input), "Criteria 13 (Inclusion): Patient index date is within the study start and end dates i.e patient is fully vaccinated before the study end date") - } else if (cohort_name == "unvax"){ + } else if (cohort_name %in% c("unvax","unvax_extf")){ ### Exclusion criteria 8: Have a record of one or more vaccination prior index date # i.e. Have a record of a first vaccination prior to index date @@ -344,7 +340,7 @@ stage1 <- function(cohort_name) { cohort_flow[nrow(cohort_flow)+1,] <- c(nrow(input),as.numeric(cohort_flow[nrow(cohort_flow),"N"]) - nrow(input), "Criteria 9 (Exclusion): Missing or unknown JCVI group") ### Inclusion criteria 10: Index date is before cohort end date - will remove anyone whose eligibility date + 84 days is after study end date (only those with unknown JCVI group) - input <- input %>% filter (!is.na(index_date) & index_date <= end_date & index_date >= start_date_delta) + input <- input %>% filter (!is.na(index_date) & index_date <= end_date_exposure & index_date >= start_date_delta) cohort_flow[nrow(cohort_flow)+1,] <- c(nrow(input),as.numeric(cohort_flow[nrow(cohort_flow),"N"]) - nrow(input), "Criteria 10 (Inclusion): Patient index date is within the study start and end dates i.e patients eligibility date + 84 days is before the study end date") } @@ -399,6 +395,7 @@ if (cohort_name == "all") { stage1("prevax_extf") stage1("vax") stage1("unvax") + stage1("unvax_extf") } else{ stage1(cohort_name) } \ No newline at end of file diff --git a/analysis/study_definition_prevax.py b/analysis/study_definition_prevax.py index 623bc453..224c255e 100644 --- a/analysis/study_definition_prevax.py +++ b/analysis/study_definition_prevax.py @@ -27,7 +27,7 @@ from common_variables import generate_common_variables ( dynamic_variables -) = generate_common_variables(index_date_variable="index_date_prevax", end_date_variable="end_date_prevax") +) = generate_common_variables(index_date_variable="index_date_cohort", exposure_end_date_variable="end_date_exposure", outcome_end_date_variable="end_date_outcome") ## Variables for deriving JCVI groups from grouping_variables import ( @@ -41,13 +41,19 @@ study = StudyDefinition( # Specify study dates - index_date_prevax = patients.with_value_from_file( + index_date_cohort = patients.with_value_from_file( f_path = 'output/index_dates.csv.gz', returning = 'index_prevax', returning_type = 'date', date_format = 'YYYY-MM-DD', ), - end_date_prevax = patients.with_value_from_file( + end_date_exposure = patients.with_value_from_file( + f_path = 'output/index_dates.csv.gz', + returning = 'end_prevax', + returning_type = 'date', + date_format = 'YYYY-MM-DD', + ), + end_date_outcome = patients.with_value_from_file( f_path = 'output/index_dates.csv.gz', returning = 'end_prevax', returning_type = 'date', diff --git a/analysis/study_definition_prevax_extf.py b/analysis/study_definition_prevax_extf.py index 623bc453..4353b8ec 100644 --- a/analysis/study_definition_prevax_extf.py +++ b/analysis/study_definition_prevax_extf.py @@ -27,7 +27,7 @@ from common_variables import generate_common_variables ( dynamic_variables -) = generate_common_variables(index_date_variable="index_date_prevax", end_date_variable="end_date_prevax") +) = generate_common_variables(index_date_variable="index_date_cohort", exposure_end_date_variable="end_date_exposure", outcome_end_date_variable="end_date_outcome") ## Variables for deriving JCVI groups from grouping_variables import ( @@ -41,18 +41,24 @@ study = StudyDefinition( # Specify study dates - index_date_prevax = patients.with_value_from_file( + index_date_cohort = patients.with_value_from_file( f_path = 'output/index_dates.csv.gz', returning = 'index_prevax', returning_type = 'date', date_format = 'YYYY-MM-DD', ), - end_date_prevax = patients.with_value_from_file( + end_date_exposure = patients.with_value_from_file( f_path = 'output/index_dates.csv.gz', returning = 'end_prevax', returning_type = 'date', date_format = 'YYYY-MM-DD', ), + end_date_outcome = patients.with_value_from_file( + f_path = 'output/index_dates.csv.gz', + returning = 'end_prevax_extf', + returning_type = 'date', + date_format = 'YYYY-MM-DD', + ), # Configure the expectations framework default_expectations={ diff --git a/analysis/study_definition_unvax.py b/analysis/study_definition_unvax.py index 1fb9f351..120edace 100644 --- a/analysis/study_definition_unvax.py +++ b/analysis/study_definition_unvax.py @@ -27,7 +27,7 @@ from common_variables import generate_common_variables ( dynamic_variables -) = generate_common_variables(index_date_variable="index_date_unvax", end_date_variable="end_date_unvax") +) = generate_common_variables(index_date_variable="index_date_cohort", exposure_end_date_variable="end_date_exposure", outcome_end_date_variable="end_date_outcome") ## Variables for deriving JCVI groups from grouping_variables import ( @@ -40,13 +40,19 @@ study = StudyDefinition( # Specify study dates - index_date_unvax = patients.with_value_from_file( + index_date_cohort = patients.with_value_from_file( f_path = 'output/index_dates.csv.gz', returning = 'index_unvax', returning_type = 'date', date_format = 'YYYY-MM-DD', ), - end_date_unvax = patients.with_value_from_file( + end_date_exposure = patients.with_value_from_file( + f_path = 'output/index_dates.csv.gz', + returning = 'end_unvax', + returning_type = 'date', + date_format = 'YYYY-MM-DD', + ), + end_date_outcome = patients.with_value_from_file( f_path = 'output/index_dates.csv.gz', returning = 'end_unvax', returning_type = 'date', diff --git a/analysis/study_definition_unvax_extf.py b/analysis/study_definition_unvax_extf.py new file mode 100644 index 00000000..8e43a61f --- /dev/null +++ b/analysis/study_definition_unvax_extf.py @@ -0,0 +1,92 @@ +# Import statements + +## Set seed +import numpy as np +np.random.seed(123456) + +## Cohort extractor +from cohortextractor import ( + StudyDefinition, + patients, + codelist_from_csv, + codelist, + filter_codes_by_category, + combine_codelists, +) + +## Codelists from codelist.py (which pulls them from the codelist folder) +from codelists import * + +## Datetime functions +from datetime import date + +## Study definition helper +import study_definition_helper_functions as helpers + +## Import common variables function +from common_variables import generate_common_variables +( + dynamic_variables +) = generate_common_variables(index_date_variable="index_date_cohort", exposure_end_date_variable="end_date_exposure", outcome_end_date_variable="end_date_outcome") + +## Variables for deriving JCVI groups +from grouping_variables import ( + jcvi_variables, + start_date, + end_date, + study_dates +) + +study = StudyDefinition( + + # Specify study dates + index_date_cohort = patients.with_value_from_file( + f_path = 'output/index_dates.csv.gz', + returning = 'index_unvax', + returning_type = 'date', + date_format = 'YYYY-MM-DD', + ), + end_date_exposure = patients.with_value_from_file( + f_path = 'output/index_dates.csv.gz', + returning = 'end_unvax', + returning_type = 'date', + date_format = 'YYYY-MM-DD', + ), + end_date_outcome = patients.with_value_from_file( + f_path = 'output/index_dates.csv.gz', + returning = 'end_unvax_extf', + returning_type = 'date', + date_format = 'YYYY-MM-DD', + ), + + # Configure the expectations framework + default_expectations={ + "date": {"earliest": study_dates["earliest_expec"], "latest": "today"}, + "rate": "uniform", + "incidence": 0.5, + }, + + # Define the study population (NB: all inclusions and exclusions are performed in stage 1) + population = patients.all(), + + # Define sex (NB: this is required for JCVI variables hence is defined here) + cov_cat_sex = patients.with_value_from_file( + f_path = 'output/index_dates.csv.gz', + returning = 'cov_cat_sex', + returning_type = 'str', + ), + + # Any covid vaccination, identified by target disease + vax_date_covid_1 = patients.with_value_from_file( + f_path = 'output/index_dates.csv.gz', + returning = 'vax_date_covid_1', + returning_type = 'date' + ), + + # Define vaccine eligibility variables + **jcvi_variables, + + # Define common variables (e.g., exposures, outcomes, covariates) that require dynamic dates + **dynamic_variables + +) \ No newline at end of file diff --git a/analysis/study_definition_vax.py b/analysis/study_definition_vax.py index 4f346c86..e786d138 100644 --- a/analysis/study_definition_vax.py +++ b/analysis/study_definition_vax.py @@ -36,19 +36,25 @@ from common_variables import generate_common_variables ( dynamic_variables -) = generate_common_variables(index_date_variable="index_date_vax", end_date_variable="end_date_vax") +) = generate_common_variables(index_date_variable="index_date_cohort", exposure_end_date_variable="end_date_exposure", outcome_end_date_variable="end_date_outcome") study = StudyDefinition( # Specify study dates - index_date_vax = patients.with_value_from_file( + index_date_cohort = patients.with_value_from_file( f_path = 'output/index_dates.csv.gz', returning = 'index_vax', returning_type = 'date', date_format = 'YYYY-MM-DD', ), - end_date_vax = patients.with_value_from_file( + end_date_exposure = patients.with_value_from_file( + f_path = 'output/index_dates.csv.gz', + returning = 'end_vax', + returning_type = 'date', + date_format = 'YYYY-MM-DD', + ), + end_date_outcome = patients.with_value_from_file( f_path = 'output/index_dates.csv.gz', returning = 'end_vax', returning_type = 'date', diff --git a/analysis/table2.R b/analysis/table2.R new file mode 100644 index 00000000..66e23817 --- /dev/null +++ b/analysis/table2.R @@ -0,0 +1,21 @@ +library(readr) +library(dplyr) +library(magrittr) + +# Load active analyses --------------------------------------------------------- + +active_analyses <- readr::read_rds("lib/active_analyses.rds") + +# Repeat + +for (i in 1:nrow(active_analyses)) { + + ## Load data ----------------------------------------------------------------- + + df <- read_rds(paste0("model_input-",active_analyses$name[i],".rds")) + + ## Calculate number of events ------------------------------------------------ + + + +} diff --git a/codelists/codelists.json b/codelists/codelists.json index 22cf24da..1c3e9b34 100644 --- a/codelists/codelists.json +++ b/codelists/codelists.json @@ -779,6 +779,18 @@ "url": "https://codelists.opensafely.org/codelist/bristol/anxiolytics_040102/38c710f3/", "downloaded_at": "2022-08-09 16:25:34.347033Z", "sha": "8a5cbbc840b72cd7391affd0e39ada5fe6b6029d" + }, + "user-elsie_horne-cocp_dmd.csv": { + "id": "user/elsie_horne/cocp_dmd/1666a7a3", + "url": "https://codelists.opensafely.org/codelist/user/elsie_horne/cocp_dmd/1666a7a3/", + "downloaded_at": "2023-02-07 15:43:17.491774Z", + "sha": "8327c8ec4d2a3f1fcec7ffaa76a3e79cc1e5974a" + }, + "user-elsie_horne-hrt_dmd.csv": { + "id": "user/elsie_horne/hrt_dmd/19196799", + "url": "https://codelists.opensafely.org/codelist/user/elsie_horne/hrt_dmd/19196799/", + "downloaded_at": "2023-02-07 15:43:17.735201Z", + "sha": "14aff592668a703f02cc68aa54c7ebab0774c62f" } } } \ No newline at end of file diff --git a/codelists/codelists.txt b/codelists/codelists.txt index 872c8ae8..aa11a38d 100644 --- a/codelists/codelists.txt +++ b/codelists/codelists.txt @@ -127,4 +127,6 @@ user/RochelleKnight/confirmed-hospitalised-covid-19/1f0d2526 bristol/antidepressant-drugs/1d04ddc8 bristol/antipsychotic-drugs/51d68428 bristol/opioid-dependence/6ae5f75d -bristol/anxiolytics_040102/38c710f3 \ No newline at end of file +bristol/anxiolytics_040102/38c710f3 +user/elsie_horne/cocp_dmd/1666a7a3 +user/elsie_horne/hrt_dmd/19196799 \ No newline at end of file diff --git a/codelists/user-elsie_horne-cocp_dmd.csv b/codelists/user-elsie_horne-cocp_dmd.csv new file mode 100644 index 00000000..b3339d78 --- /dev/null +++ b/codelists/user-elsie_horne-cocp_dmd.csv @@ -0,0 +1,92 @@ +dmd_id +326350003 +326309006 +326361006 +4639611000001105 +21711311000001108 +3058111000001101 +208311000001105 +3049211000001104 +4608311000001102 +11758611000001104 +17346911000001108 +17353311000001100 +22263611000001104 +21933611000001104 +24676211000001100 +24678111000001104 +28006411000001100 +29910811000001108 +30195711000001108 +30252711000001104 +30805711000001104 +38335711000001104 +326310001 +326324002 +326358005 +326351004 +36062011000001104 +3546811000001107 +377360003 +524211000001108 +42111000001107 +3052511000001108 +492611000001103 +3058411000001106 +3048811000001105 +3174811000001109 +439011000001108 +11753211000001108 +16614111000001104 +17220611000001108 +17346711000001106 +17348811000001102 +17351511000001108 +18358111000001100 +21730911000001104 +21930311000001104 +23649211000001108 +24564811000001104 +24676611000001104 +24677511000001100 +24677911000001108 +24684111000001108 +24684511000001104 +27979911000001108 +29911411000001104 +30805911000001100 +33017111000001104 +34104511000001104 +34181511000001104 +38340211000001104 +326341000 +3227811000001102 +4431511000001108 +377414004 +3228611000001102 +4431211000001105 +326364003 +235311000001105 +312411000001108 +403611000001106 +380211000001105 +22562211000001104 +31364011000001104 +326369008 +3831411000001104 +3545111000001106 +3545011000001105 +3213311000001106 +3215011000001109 +3236411000001104 +4432011000001108 +3233311000001102 +3047311000001102 +17444111000001106 +15473911000001108 +15470011000001100 +22403311000001100 +22311511000001104 +15364711000001108 +15364511000001104 +36602211000001104 diff --git a/codelists/user-elsie_horne-hrt_dmd.csv b/codelists/user-elsie_horne-hrt_dmd.csv new file mode 100644 index 00000000..e32cc2b8 --- /dev/null +++ b/codelists/user-elsie_horne-hrt_dmd.csv @@ -0,0 +1,186 @@ +dmd_id +32936711000001100 +32927411000001104 +325480003 +734211000001107 +526411000001104 +692011000001108 +325481004 +4111411000001103 +4111811000001101 +4111611000001100 +325482006 +3962511000001105 +3963211000001101 +3962811000001108 +11738011000001104 +14778411000001108 +11733811000001108 +8794111000001104 +8752311000001103 +325541008 +3773511000001101 +3774411000001102 +3773711000001106 +27322111000001108 +325546003 +3348711000001103 +3365411000001107 +3454911000001101 +3447411000001102 +10276811000001100 +325577001 +36064911000001104 +325545004 +3346811000001105 +3367011000001105 +3448611000001100 +36065111000001112 +36065511000001104 +36065011000001104 +3664211000001102 +3664111000001108 +36064811000001104 +36065411000001104 +38344311000001104 +2942911000001101 +2889311000001109 +2889611000001104 +2945011000001105 +2937111000001101 +2911111000001100 +2838411000001101 +2842511000001103 +2942311000001102 +2889111000001107 +2903111000001101 +2936011000001100 +3414911000001105 +2948411000001106 +2902911000001105 +2938411000001105 +2949711000001109 +2939611000001104 +2841811000001109 +2837211000001104 +2890211000001107 +2893411000001103 +3657811000001104 +3658611000001104 +9045511000001100 +9045711000001106 +9044911000001108 +9045111000001104 +9045311000001108 +10276611000001104 +38268911000001104 +293111000001101 +3449411000001106 +325505008 +15466311000001108 +3351911000001108 +3355711000001108 +325533008 +3196511000001106 +24659611000001108 +4522411000001109 +3350611000001107 +3359911000001107 +3354111000001108 +2846111000001106 +2845411000001105 +2845811000001107 +2845111000001100 +3404911000001108 +325648008 +3455211000001106 +3448111000001108 +3557411000001105 +3542811000001106 +3542711000001103 +3465411000001100 +3465311000001107 +36065211000001104 +3542611000001107 +325662001 +3369311000001103 +3788211000001104 +3864111000001104 +3788311000001107 +4508511000001104 +4725811000001103 +4339811000001101 +8801211000001108 +21366211000001108 +3351311000001107 +3199011000001109 +3049911000001108 +3358411000001107 +3363611000001106 +3216611000001107 +3217311000001104 +3043111000001107 +3355511000001103 +3349811000001104 +3346111000001103 +3347911000001102 +519311000001107 +21259311000001108 +3040311000001103 +3455511000001109 +3367311000001108 +3049511000001101 +3038811000001109 +3780211000001102 +3853711000001105 +3779911000001106 +4499111000001107 +4711811000001109 +4338711000001100 +8787011000001107 +10280511000001108 +10277411000001100 +325556004 +3456411000001101 +325557008 +7142211000001109 +409118006 +34911000001102 +546511000001102 +11476811000001108 +400674006 +3341011000001106 +409322009 +15621411000001104 +3470811000001103 +557911000001109 +3456711000001107 +679511000001100 +7340311000001105 +326075007 +22567411000001108 +24559611000001104 +21960811000001104 +38744511000001104 +34444711000001100 +22517711000001104 +30086311000001108 +28996211000001108 +30863211000001104 +33612911000001100 +24110811000001108 +33971011000001100 +22109311000001100 +239811000001103 +22358811000001100 +24676811000001104 +32460211000001100 +325568008 +24418911000001104 +38039411000001104 +37088911000001104 +24195511000001100 +37241411000001104 +37825511000001104 +37363111000001104 +521411000001105 diff --git a/project.yaml b/project.yaml index b55de25e..ab5c69df 100644 --- a/project.yaml +++ b/project.yaml @@ -91,6 +91,18 @@ actions: highly_sensitive: cohort: output/input_unvax.csv.gz + ## Implement study_definition for unvax_extf + + generate_study_population_unvax_extf: + run: cohortextractor:latest generate_cohort --study-definition study_definition_unvax_extf + --output-format csv.gz + needs: + - vax_eligibility_inputs + - generate_index_dates + outputs: + highly_sensitive: + cohort: output/input_unvax_extf.csv.gz + ## Preprocess data - prevax preprocess_data_prevax: @@ -151,6 +163,21 @@ actions: cohort: output/input_unvax.rds venn: output/venn_unvax.rds + ## Preprocess data - unvax_extf + + preprocess_data_unvax_extf: + run: r:latest analysis/preprocess_data.R unvax_extf + needs: + - generate_index_dates + - generate_study_population_unvax_extf + outputs: + moderately_sensitive: + describe: output/describe_input_unvax_extf_stage0.txt + describe_venn: output/describe_venn_unvax_extf.txt + highly_sensitive: + cohort: output/input_unvax_extf.rds + venn: output/venn_unvax_extf.rds + ## Data cleaning - all cohorts stage1_data_cleaning_all: @@ -160,7 +187,7 @@ actions: - preprocess_data_prevax_extf - preprocess_data_vax - preprocess_data_unvax - - vax_eligibility_inputs + - preprocess_data_unvax_extf outputs: moderately_sensitive: refactoring: output/meta_data_factors_*.csv @@ -170,38 +197,6 @@ actions: highly_sensitive: cohort: output/input_*.rds - describe_file-input_prevax_stage1: - run: r:latest analysis/describe_file.R input_prevax_stage1 rds - needs: - - stage1_data_cleaning_all - outputs: - moderately_sensitive: - describe_model_input: output/describe-input_prevax_stage1.txt - - describe_file-input_prevax_extf_stage1: - run: r:latest analysis/describe_file.R input_prevax_extf_stage1 rds - needs: - - stage1_data_cleaning_all - outputs: - moderately_sensitive: - describe_model_input: output/describe-input_prevax_extf_stage1.txt - - describe_file-input_vax_stage1: - run: r:latest analysis/describe_file.R input_vax_stage1 rds - needs: - - stage1_data_cleaning_all - outputs: - moderately_sensitive: - describe_model_input: output/describe-input_vax_stage1.txt - - describe_file-input_unvax_stage1: - run: r:latest analysis/describe_file.R input_unvax_stage1 rds - needs: - - stage1_data_cleaning_all - outputs: - moderately_sensitive: - describe_model_input: output/describe-input_unvax_stage1.txt - ## Stage 5 - Run models make_model_input-cohort_prevax-main-addiction: