From 92b6fc7658fa4e32b67cf05542c4f8e7c29ff433 Mon Sep 17 00:00:00 2001
From: andrewscolm <73839417+andrewscolm@users.noreply.github.com>
Date: Fri, 23 Feb 2024 16:36:46 +0000
Subject: [PATCH] Clarify the source of saved files

---
 analysis/report_tables.py | 56 +++++++++++++++++++--------------------
 notebooks_r/report.Rmd    | 42 ++++++++++++++---------------
 2 files changed, 49 insertions(+), 49 deletions(-)

diff --git a/analysis/report_tables.py b/analysis/report_tables.py
index 2d6c435..93a2d4d 100644
--- a/analysis/report_tables.py
+++ b/analysis/report_tables.py
@@ -4,7 +4,7 @@
 import numpy as np
 
 
-def local_patient_counts(
+def report_patient_counts(
     definitions,
     group,
     defin,
@@ -140,21 +140,21 @@ def local_patient_counts(
 
     if categories:
         df_patient_counts.to_csv(
-            f"output/{output_path}/local_patient_counts_categories_{group}_{defin}_registered.csv"
+            f"output/{output_path}/report_patient_counts_categories_{group}_{defin}_registered.csv"
         )
         print(
-            f"saved: output/{output_path}/local_patient_counts_categories_{group}_{defin}_registered.csv"
+            f"saved: output/{output_path}/report_patient_counts_categories_{group}_{defin}_registered.csv"
         )
     else:
         df_patient_counts.to_csv(
-            f"output/{output_path}/local_patient_counts_{group}_{defin}_registered.csv"
+            f"output/{output_path}/report_patient_counts_{group}_{defin}_registered.csv"
         )
         print(
-            f"saved: output/{output_path}/local_patient_counts_{group}_{defin}_registered.csv"
+            f"saved: output/{output_path}/report_patient_counts_{group}_{defin}_registered.csv"
         )
 
 
-def local_latest_common(
+def report_latest_common(
     definitions,
     input_path,
     output_path,
@@ -222,7 +222,7 @@ def local_latest_common(
         df_out = df_out.set_index(f"Latest Ethnicity-\n{definition_dict[definition]}")
         df_out = df_out.replace(np.nan, "-")
         df_out.to_csv(
-            f"output/{output_path}/local_latest_common_{definition}_registered.csv"
+            f"output/{output_path}/report_latest_common_{definition}_registered.csv"
         )
 
         if code_dict != "":
@@ -255,11 +255,11 @@ def local_latest_common(
         )
         df_sum = df_sum.set_index(f"Latest Ethnicity-\n{definition_dict[definition]}")
         df_sum.to_csv(
-            f"output/{output_path}/local_latest_common_{defin}_{group}_expanded_registered.csv"
+            f"output/{output_path}/report_latest_common_{defin}_{group}_expanded_registered.csv"
         )
 
 
-def local_state_change(
+def report_state_change(
     definitions,
     input_path,
     output_path,
@@ -313,7 +313,7 @@ def local_state_change(
             f"Latest Ethnicity-\n{definition_dict[definition]}"
         )
         df_state_change.to_csv(
-            f"output/{output_path}/local_state_change_{defin}_{group}_registered.csv"
+            f"output/{output_path}/report_state_change_{defin}_{group}_registered.csv"
         )
 
 
@@ -502,7 +502,7 @@ def local_state_change(
 
 
 def main():
-    local_patient_counts(
+    report_patient_counts(
         definitions_sus_5,
         group_5,
         "new_sus",
@@ -513,7 +513,7 @@ def main():
         categories=False,
         missing=False,
     )
-    local_patient_counts(
+    report_patient_counts(
         definitions_sus_5,
         group_5,
         "new_sus",
@@ -524,7 +524,7 @@ def main():
         categories=True,
         missing=False,
     )
-    local_patient_counts(
+    report_patient_counts(
         definitions_sus_5_ctv3,
         group_5,
         "ctv3_sus",
@@ -535,7 +535,7 @@ def main():
         categories=False,
         missing=False,
     )
-    local_patient_counts(
+    report_patient_counts(
         definitions_5,
         group_5,
         "new_ctv3",
@@ -546,7 +546,7 @@ def main():
         categories=False,
         missing=False,
     )
-    local_patient_counts(
+    report_patient_counts(
         definitions_supplemented_ctv3_5,
         group_5,
         "ctv3_sus",
@@ -559,7 +559,7 @@ def main():
     )
 
     #### 16 group
-    local_patient_counts(
+    report_patient_counts(
         definitions_sus_16,
         group_16,
         "new_sus",
@@ -570,7 +570,7 @@ def main():
         categories=False,
         missing=False,
     )
-    local_patient_counts(
+    report_patient_counts(
         definitions_sus_16_ctv3,
         group_16,
         "ctv3_sus",
@@ -581,7 +581,7 @@ def main():
         categories=False,
         missing=False,
     )
-    local_patient_counts(
+    report_patient_counts(
         definitions_16,
         group_16,
         "new_ctv3",
@@ -592,7 +592,7 @@ def main():
         categories=False,
         missing=False,
     )
-    local_patient_counts(
+    report_patient_counts(
         definitions_supplemented_ctv3_16,
         group_16,
         "ctv3_sus",
@@ -604,7 +604,7 @@ def main():
         missing=False,
     )
 
-    local_patient_counts(
+    report_patient_counts(
         definitions_supplemented_16,
         group_16,
         "new_sus",
@@ -616,7 +616,7 @@ def main():
         missing=False,
     )
     #  Most recent vs most common
-    local_latest_common(
+    report_latest_common(
         ["ethnicity_new_5"],
         input_path_new,
         output_path,
@@ -626,7 +626,7 @@ def main():
         definition_dict_new,
         suffix,
     )
-    local_latest_common(
+    report_latest_common(
         ["ethnicity_new_16"],
         input_path_new,
         output_path,
@@ -636,7 +636,7 @@ def main():
         definition_dict_new,
         suffix,
     )
-    local_latest_common(
+    report_latest_common(
         ["ethnicity_5"],
         input_path_new,
         output_path,
@@ -646,7 +646,7 @@ def main():
         definition_dict_ctv3,
         suffix,
     )
-    local_latest_common(
+    report_latest_common(
         ["ethnicity_16"],
         input_path_new,
         output_path,
@@ -657,7 +657,7 @@ def main():
         suffix,
     )
     #  Most recent vs any
-    local_state_change(
+    report_state_change(
         ["ethnicity_new_5"],
         input_path_new,
         output_path,
@@ -666,7 +666,7 @@ def main():
         code_dict_5,
         definition_dict_new,
     )
-    local_state_change(
+    report_state_change(
         ["ethnicity_new_16"],
         input_path_new,
         output_path,
@@ -675,7 +675,7 @@ def main():
         code_dict_16,
         definition_dict_new,
     )
-    local_state_change(
+    report_state_change(
         ["ethnicity_5"],
         input_path_new,
         output_path,
@@ -684,7 +684,7 @@ def main():
         code_dict_5,
         definition_dict_new,
     )
-    local_state_change(
+    report_state_change(
         ["ethnicity_16"],
         input_path_new,
         output_path,
diff --git a/notebooks_r/report.Rmd b/notebooks_r/report.Rmd
index eb5c47a..e2cc9e4 100644
--- a/notebooks_r/report.Rmd
+++ b/notebooks_r/report.Rmd
@@ -173,7 +173,7 @@ Around `r codelist_counts %>% filter(group=="all")%>%pull(all_filled)/1000000` m
 ```{r}
 # # patient counts
 ### New SUS
-new_sus_prop_table_yesno <- read_csv(here::here("output", "report_tables", "local_patient_counts_5_new_sus_registered.csv")) %>%
+new_sus_prop_table_yesno <- read_csv(here::here("output", "report_tables", "report_patient_counts_5_new_sus_registered.csv")) %>%
   filter(subgroup == "Yes" | subgroup == "No") %>%
   mutate(subgroup = recode(subgroup,
     Yes = "Present",
@@ -181,7 +181,7 @@ new_sus_prop_table_yesno <- read_csv(here::here("output", "report_tables", "loca
   )) %>%
   arrange(group, rev(subgroup))
 
-new_sus_prop_table <- read_csv(here::here("output", "report_tables", "local_patient_counts_5_new_sus_registered.csv")) %>%
+new_sus_prop_table <- read_csv(here::here("output", "report_tables", "report_patient_counts_5_new_sus_registered.csv")) %>%
   filter(subgroup != "Yes" & subgroup != "No") %>%
   bind_rows(new_sus_prop_table_yesno) %>%
   mutate(
@@ -205,7 +205,7 @@ new_sus_prop_table <- read_csv(here::here("output", "report_tables", "local_pati
 
 
 #### CTV3 SUS
-ctv3_sus_prop_table_yesno <- read_csv(here::here("output", "report_tables", "local_patient_counts_5_ctv3_sus_registered.csv")) %>%
+ctv3_sus_prop_table_yesno <- read_csv(here::here("output", "report_tables", "report_patient_counts_5_ctv3_sus_registered.csv")) %>%
   filter(subgroup == "Yes" | subgroup == "No") %>%
   mutate(subgroup = recode(subgroup,
     Yes = "Present",
@@ -213,7 +213,7 @@ ctv3_sus_prop_table_yesno <- read_csv(here::here("output", "report_tables", "loc
   )) %>%
   arrange(group, rev(subgroup))
 
-ctv3_sus_prop_table <- read_csv(here::here("output", "report_tables", "local_patient_counts_5_ctv3_sus_registered.csv")) %>%
+ctv3_sus_prop_table <- read_csv(here::here("output", "report_tables", "report_patient_counts_5_ctv3_sus_registered.csv")) %>%
   filter(subgroup != "Yes" & subgroup != "No") %>%
   bind_rows(ctv3_sus_prop_table_yesno) %>%
   mutate(
@@ -236,7 +236,7 @@ ctv3_sus_prop_table <- read_csv(here::here("output", "report_tables", "local_pat
   select(-`all filled`)
 
 #### CTV3 New
-new_ctv3_prop_table_yesno <- read_csv(here::here("output", "report_tables", "local_patient_counts_5_new_ctv3_registered.csv")) %>%
+new_ctv3_prop_table_yesno <- read_csv(here::here("output", "report_tables", "report_patient_counts_5_new_ctv3_registered.csv")) %>%
   filter(subgroup == "Yes" | subgroup == "No") %>%
   mutate(subgroup = recode(subgroup,
     Yes = "Present",
@@ -244,7 +244,7 @@ new_ctv3_prop_table_yesno <- read_csv(here::here("output", "report_tables", "loc
   )) %>%
   arrange(group, rev(subgroup))
 
-new_ctv3_prop_table <- read_csv(here::here("output", "report_tables", "local_patient_counts_5_new_ctv3_registered.csv")) %>%
+new_ctv3_prop_table <- read_csv(here::here("output", "report_tables", "report_patient_counts_5_new_ctv3_registered.csv")) %>%
   filter(subgroup != "Yes" & subgroup != "No") %>%
   bind_rows(new_ctv3_prop_table_yesno) %>%
   mutate(
@@ -446,7 +446,7 @@ The SNOMED:2022 is the most well-populated codelist for White (`r label_number_n
 
 ```{r}
 # patient counts 5 group
-new_SUS5yesno <- read_csv(here::here("output", "report_tables", "local_patient_counts_categories_5_new_sus_registered.csv")) %>%
+new_SUS5yesno <- read_csv(here::here("output", "report_tables", "report_patient_counts_categories_5_new_sus_registered.csv")) %>%
   filter(subgroup == "Yes" | subgroup == "No") %>%
   mutate(subgroup = recode(subgroup,
     Yes = "Present",
@@ -454,7 +454,7 @@ new_SUS5yesno <- read_csv(here::here("output", "report_tables", "local_patient_c
   )) %>%
   arrange(group, rev(subgroup))
 
-new_SUS5 <- read_csv(here::here("output", "report_tables", "local_patient_counts_categories_5_new_sus_registered.csv")) %>%
+new_SUS5 <- read_csv(here::here("output", "report_tables", "report_patient_counts_categories_5_new_sus_registered.csv")) %>%
   filter(subgroup != "Yes" & subgroup != "No") %>%
   bind_rows(new_SUS5yesno) %>%
   mutate(
@@ -472,7 +472,7 @@ new_SUS5 <- read_csv(here::here("output", "report_tables", "local_patient_counts
   filter(`Asian 5 SNOMED:2022` != "- (-)")
 
 ### CTV3
-ctv3_SUS5yesno <- read_csv(here::here("output", "report_tables", "local_patient_counts_categories_5_ctv3_sus_registered.csv")) %>%
+ctv3_SUS5yesno <- read_csv(here::here("output", "report_tables", "report_patient_counts_categories_5_ctv3_sus_registered.csv")) %>%
   filter(subgroup == "Yes" | subgroup == "No") %>%
   mutate(subgroup = recode(subgroup,
     Yes = "Present",
@@ -480,7 +480,7 @@ ctv3_SUS5yesno <- read_csv(here::here("output", "report_tables", "local_patient_
   )) %>%
   arrange(group, rev(subgroup))
 
-ctv3_SUS5 <- read_csv(here::here("output", "report_tables", "local_patient_counts_categories_5_ctv3_sus_registered.csv")) %>%
+ctv3_SUS5 <- read_csv(here::here("output", "report_tables", "report_patient_counts_categories_5_ctv3_sus_registered.csv")) %>%
   filter(subgroup != "Yes" & subgroup != "No") %>%
   bind_rows(ctv3_SUS5yesno) %>%
   mutate(
@@ -709,7 +709,7 @@ codelist_counts_categories_16 <- read_csv(here::here("output", "simplified_outpu
 In the 16 group ethnicity the Other ethnic group is expanded to Chinese and Any other ethnic group. For Chinese the SNOMED:2022 codelist is most well-populated (`r label_number_n(signif(codelist_counts_categories_16 %>% filter(group=="all")%>% pull(Chinese_ethnicity_new_16_filled),3))`) and for Any other ethnic group the CTV3:2020 codelist is most well populated (`r label_number_n(signif(codelist_counts_categories_16 %>% filter(group=="all")%>% pull(Any_other_ethnic_group_ethnicity_16_filled),3))`) .
 
 ```{r, fig.width=12, fig.height=18}
-new_SUS16yesno <- read_csv(here::here("output", "report_tables", "local_patient_counts_categories_16_new_sus_registered.csv")) %>%
+new_SUS16yesno <- read_csv(here::here("output", "report_tables", "report_patient_counts_categories_16_new_sus_registered.csv")) %>%
   filter(subgroup == "Yes" | subgroup == "No") %>%
   mutate(subgroup = recode(subgroup,
     Yes = "Present",
@@ -718,7 +718,7 @@ new_SUS16yesno <- read_csv(here::here("output", "report_tables", "local_patient_
 
 # patient counts 16 group
 # New Snomed codelist
-new_SUS16 <- read_csv(here::here("output", "report_tables", "local_patient_counts_categories_16_new_sus_registered.csv")) %>%
+new_SUS16 <- read_csv(here::here("output", "report_tables", "report_patient_counts_categories_16_new_sus_registered.csv")) %>%
   filter(subgroup != "Yes" & subgroup != "No") %>%
   bind_rows(new_SUS16yesno) %>%
   mutate(
@@ -736,7 +736,7 @@ new_SUS16 <- read_csv(here::here("output", "report_tables", "local_patient_count
   filter(`Indian 16 SNOMED:2022` != "- (-)")
 
 ### CTV3 codelist
-ctv3_SUS16yesno <- read_csv(here::here("output", "report_tables", "local_patient_counts_categories_16_ctv3_sus_registered.csv")) %>%
+ctv3_SUS16yesno <- read_csv(here::here("output", "report_tables", "report_patient_counts_categories_16_ctv3_sus_registered.csv")) %>%
   filter(subgroup == "Yes" | subgroup == "No") %>%
   mutate(subgroup = recode(subgroup,
     Yes = "Present",
@@ -744,7 +744,7 @@ ctv3_SUS16yesno <- read_csv(here::here("output", "report_tables", "local_patient
   ))
 
 # patient counts 16 group
-ctv3_SUS16 <- read_csv(here::here("output", "report_tables", "local_patient_counts_categories_16_ctv3_sus_registered.csv")) %>%
+ctv3_SUS16 <- read_csv(here::here("output", "report_tables", "report_patient_counts_categories_16_ctv3_sus_registered.csv")) %>%
   filter(subgroup != "Yes" & subgroup != "No") %>%
   bind_rows(ctv3_SUS16yesno) %>%
   mutate(
@@ -830,13 +830,13 @@ SUS16
 ## Changes in coded ethnicity groups
 ### 5 Group
 ```{r}
-change_new <- read_csv(here::here("output", "report_tables", glue("local_state_change_new_5_registered.csv"))) %>%
+change_new <- read_csv(here::here("output", "report_tables", glue("report_state_change_new_5_registered.csv"))) %>%
   rename(
     latest = contains("Latest Ethnicity"),
     discordant = contains("Supplemented")
   )
 
-change_ctv3 <- read_csv(here::here("output", "report_tables", glue("local_state_change_ctv3_5_registered.csv"))) %>%
+change_ctv3 <- read_csv(here::here("output", "report_tables", glue("report_state_change_ctv3_5_registered.csv"))) %>%
   rename(
     latest = contains("Latest Ethnicity"),
     discordant = contains("Supplemented")
@@ -849,7 +849,7 @@ Patients whose latest recorded ethnicity were grouped as Mixed were most likely
 for (codelist in c("new", "ctv3")) {
   ifelse(codelist == "new", codelist_name <- "SNOMED:2022", codelist_name <- "CTV3:2020")
 
-  anyrepeated <- read_csv(here::here("output", "report_tables", glue("local_state_change_{codelist}_5_registered.csv"))) %>%
+  anyrepeated <- read_csv(here::here("output", "report_tables", glue("report_state_change_{codelist}_5_registered.csv"))) %>%
     rename_with(~ gsub("SNOMED:2022 ", "", .)) %>%
     rename_with(str_to_title) %>%
     rename(codelist = contains("Latest Ethnicity")) %>%
@@ -888,13 +888,13 @@ anyrepeated_ctv3
 ### 16 Group
 
 ```{r}
-change_new_16 <- read_csv(here::here("output", "report_tables", glue("local_state_change_new_16_registered.csv"))) %>%
+change_new_16 <- read_csv(here::here("output", "report_tables", glue("report_state_change_new_16_registered.csv"))) %>%
   rename(
     latest = contains("Latest Ethnicity"),
     discordant = contains("Supplemented")
   )
 
-change_ctv3_16 <- read_csv(here::here("output", "report_tables", glue("local_state_change_ctv3_16_registered.csv"))) %>%
+change_ctv3_16 <- read_csv(here::here("output", "report_tables", glue("report_state_change_ctv3_16_registered.csv"))) %>%
   rename(
     latest = contains("Latest Ethnicity"),
     discordant = contains("Supplemented")
@@ -908,7 +908,7 @@ Patients whose latest recorded ethnicity were grouped as Other Black were most l
 for (codelist in c("new", "ctv3")) {
   ifelse(codelist == "new", codelist_name <- "SNOMED:2022", codelist_name <- "CTV3:2020")
 
-  anyrepeated <- read_csv(here::here("output", "report_tables", glue("local_state_change_{codelist}_16_registered.csv"))) %>%
+  anyrepeated <- read_csv(here::here("output", "report_tables", glue("report_state_change_{codelist}_16_registered.csv"))) %>%
     rename_with(~ gsub("SNOMED:2022 ", "", .)) %>%
     rename_with(~ gsub("_", " ", .)) %>%
     rename_with(str_to_title) %>%
@@ -974,7 +974,7 @@ for (codelist in c("new", "ctv3")) {
     discordant <- discordant_full %>%
       select(ethnicity, discordantcombined)
 
-    latestcommon <- read_csv(here::here("output", "report_tables", glue("local_latest_common_{codelist}_{group}_expanded_registered.csv"))) %>%
+    latestcommon <- read_csv(here::here("output", "report_tables", glue("report_latest_common_{codelist}_{group}_expanded_registered.csv"))) %>%
       rename_with(str_to_title) %>%
       rename(ethnicity = contains("Latest Ethnicity")) %>%
       inner_join(discordant, by = "ethnicity") %>%