diff --git a/analysis/local/appendix.R b/analysis/local/appendix.R
index f170f7c..13fe497 100644
--- a/analysis/local/appendix.R
+++ b/analysis/local/appendix.R
@@ -73,67 +73,67 @@ SUS_gt  %>% gtsave(here::here("output","released","made_locally","patient_counts
 
 
 
-# # patient counts 5 group
-# SUS5yesno <- read_csv(here::here("output","released","made_locally","local_patient_counts_categories_5_registered.csv")) %>%
-#   filter(subgroup == "Yes" | subgroup == "No") %>%
-#   mutate(subgroup =recode(subgroup, Yes = "Present",
-#                           No = "Absent"
-#   )) %>%
-#   arrange(group,rev(subgroup))
-# 
-# SUS5<-read_csv(here::here("output","released","made_locally","local_patient_counts_categories_5_registered.csv")) %>%
-#   filter(subgroup != "Yes" & subgroup != "No") %>%
-#   bind_rows(SUS5yesno) %>%
-#   mutate(group = case_when(group == 'age_band' ~ 'age band',
-#                            group == 'learning_disability' ~ 'learning disability',
-#                            group == "imd" ~ "IMD",
-#                          TRUE ~ group),
-#          subgroup =recode(subgroup, F = "Female",
-#                           M = "Male"
-#          )
-#          ) %>%
-#   filter(`Asian 5 SNOMED:2022` !="- (-)") 
-#   
-# my_cols <- setNames(c("group","",rep(c("SNOMED 2022","SNOMED 2022 with  SUS data"),5)),names(SUS5))
-# 
-#
-# SUS5 <- SUS5 %>%
-#   gt( groupname_col = "group") %>%
-#   tab_spanner(label="Asian", columns=c(3,4)) %>%
-#   tab_spanner(label="Black", columns=c(5,6)) %>%
-#   tab_spanner(label="Mixed", columns=c(7,8)) %>%
-#   tab_spanner(label="White", columns=c(9,10)) %>%
-#   tab_spanner(label="Other", columns=c(11,12)) %>%
-#   cols_label(!!!my_cols) %>%
-#   tab_style(
-#     style = list(
-#       cell_fill(color = "gray96")
-#     ),
-#     locations = cells_body(
-#     )
-#   ) %>%
-#   tab_style(
-#     style = list(
-#       cell_text(weight = "bold")
-#     ),
-#     locations = cells_column_labels(everything())
-#   ) %>%
-#   tab_options(     
-#     table.align = "left",
-#     # row_group.as_column = TRUE option not available on the OS R image
-#     row_group.as_column = TRUE,
-#     table.font.size = 8,
-#     column_labels.border.top.width = px(3),
-#     column_labels.border.top.color = "transparent",
-#     table.border.top.color = "transparent",
-#     heading.align = "left"
-#   ) %>%
-#   tab_header(
-#     title = md("Table 2:  Count of patients with a recorded ethnicity in OpenSAFELY TPP by ethnicity group (proportion of registered TPP population) and clinical and demographic subgroups. All counts are rounded to the nearest 5. "),
-#   )
-# 
-# SUS5  %>% gtsave(here::here("output","released","made_locally","patient_counts_5_group.html"))
-# 
+# patient counts 5 group
+SUS5yesno <- read_csv(here::here("output","released","made_locally","local_patient_counts_categories_5_registered.csv")) %>%
+  filter(subgroup == "Yes" | subgroup == "No") %>%
+  mutate(subgroup =recode(subgroup, Yes = "Present",
+                          No = "Absent"
+  )) %>%
+  arrange(group,rev(subgroup))
+
+SUS5<-read_csv(here::here("output","released","made_locally","local_patient_counts_categories_5_registered.csv")) %>%
+  filter(subgroup != "Yes" & subgroup != "No") %>%
+  bind_rows(SUS5yesno) %>%
+  mutate(group = case_when(group == 'age_band' ~ 'age band',
+                           group == 'learning_disability' ~ 'learning disability',
+                           group == "imd" ~ "IMD",
+                         TRUE ~ group),
+         subgroup =recode(subgroup, F = "Female",
+                          M = "Male"
+         )
+         ) %>%
+  filter(`Asian 5 SNOMED:2022` !="- (-)")
+
+my_cols <- setNames(c("group","",rep(c("SNOMED 2022","SNOMED 2022 with  SUS data"),5)),names(SUS5))
+
+
+SUS5 <- SUS5 %>%
+  gt( groupname_col = "group") %>%
+  tab_spanner(label="Asian", columns=c(3,4)) %>%
+  tab_spanner(label="Black", columns=c(5,6)) %>%
+  tab_spanner(label="Mixed", columns=c(7,8)) %>%
+  tab_spanner(label="White", columns=c(9,10)) %>%
+  tab_spanner(label="Other", columns=c(11,12)) %>%
+  cols_label(!!!my_cols) %>%
+  tab_style(
+    style = list(
+      cell_fill(color = "gray96")
+    ),
+    locations = cells_body(
+    )
+  ) %>%
+  tab_style(
+    style = list(
+      cell_text(weight = "bold")
+    ),
+    locations = cells_column_labels(everything())
+  ) %>%
+  tab_options(
+    table.align = "left",
+    # row_group.as_column = TRUE option not available on the OS R image
+    row_group.as_column = TRUE,
+    table.font.size = 8,
+    column_labels.border.top.width = px(3),
+    column_labels.border.top.color = "transparent",
+    table.border.top.color = "transparent",
+    heading.align = "left"
+  ) %>%
+  tab_header(
+    title = md("Table 2:  Count of patients with a recorded ethnicity in OpenSAFELY TPP by ethnicity group (proportion of registered TPP population) and clinical and demographic subgroups. All counts are rounded to the nearest 5. "),
+  )
+
+SUS5  %>% gtsave(here::here("output","released","made_locally","patient_counts_5_group.html"))
+
 
 
 
@@ -204,7 +204,7 @@ SUS16 <- SUS16 %>%
     heading.align = "left"
   ) %>%
   tab_header(
-    title = md("Table 2:  Count of patients with a recorded ethnicity in OpenSAFELY TPP by ethnicity group (proportion of registered TPP population) and clinical and demographic subgroups. All counts are rounded to the nearest 5."),
+    title = md("Table 3:  Count of patients with a recorded ethnicity in OpenSAFELY TPP by ethnicity group (proportion of registered TPP population) and clinical and demographic subgroups. All counts are rounded to the nearest 5."),
   )
 
 SUS16 %>% gtsave(here::here("output","released","made_locally","patient_counts_16_group.html"))
@@ -247,7 +247,7 @@ latestcommon <- read_csv(here::here("output","released","made_locally","local_la
     heading.align = "left"
   ) %>%
   tab_header(
-    title = md("Table 3: Count of patients’ most frequently recorded ethnicity (proportion of latest ethnicity). "),
+    title = md("Table 4: Count of patients’ most frequently recorded ethnicity (proportion of latest ethnicity). "),
   )
 
 latestcommon %>% gtsave(here::here("output","released","made_locally","latest_frequent.html"))
@@ -270,7 +270,7 @@ listed <- read_csv(here::here("output","released","ethnicity","snomed_ethnicity_
     heading.align = "left"
   ) %>%
   tab_header(
-    title = md("Table 8: Count of individual ethnicity code use"),
+    title = md("Table 9: Count of individual ethnicity code use"),
   )
 
 listed %>% gtsave(here::here("output","released","made_locally","percodelist.html"))
@@ -300,14 +300,14 @@ plot_time<- time %>%   ggplot(aes(x=Date,y= n,colour=measure))+
   theme(legend.title=element_blank()) +
   scale_x_date(breaks = breakvec, date_labels =  "%Y") +
   scale_y_continuous(name="Number of records", labels = scales::comma) + 
-  ggtitle("Figure 4: Recording of ethnicity over time for latest and first recorded ethnicity. Unknown dates of recording may be stored as '1900-01-01'")
+  ggtitle("Figure 3: Recording of ethnicity over time for latest and first recorded ethnicity. Unknown dates of recording may be stored as '1900-01-01'")
 
 ggsave(
   filename = here::here(
     "output",
     "released",
     "made_locally",
-    "fig_4_plot_time.pdf"
+    "fig_3_plot_time.pdf"
   ),
   plot_time,
   dpi = 600,
@@ -377,7 +377,7 @@ ONS_tab_2021 %>%
     heading.align = "left"
   ) %>%
   tab_header(
-    title = md("Table 7:  Count of patients with a recorded ethnicity in OpenSAFELY TPP [amended to the 2021 ethnicity grouping] (proportion of registered TPP population) and 2021 ONS Census counts (proportion of 2021 ONS Census population). All counts are rounded to the nearest 5. "),
+    title = md("Table 8:  Count of patients with a recorded ethnicity in OpenSAFELY TPP [amended to the 2021 ethnicity grouping] (proportion of registered TPP population) and 2021 ONS Census counts (proportion of 2021 ONS Census population). All counts are rounded to the nearest 5. "),
   )  %>% 
   gtsave(here::here("output","released","made_locally","ons_table_2021_with_2021_categories.html"))
 
@@ -434,7 +434,7 @@ ONS_tab_2001 %>%
     heading.align = "left"
   ) %>%
   tab_header(
-    title = md("Table 6:  Count of patients with a recorded ethnicity in OpenSAFELY TPP by ethnicity group (proportion of registered TPP population) and 2021 ONS Census counts [amended to 2001 grouping] (proportion of 2021 ONS Census population). All counts are rounded to the nearest 5. "),
+    title = md("Table 7:  Count of patients with a recorded ethnicity in OpenSAFELY TPP by ethnicity group (proportion of registered TPP population) and 2021 ONS Census counts [amended to 2001 grouping] (proportion of 2021 ONS Census population). All counts are rounded to the nearest 5. "),
   )  %>% 
   gtsave(here::here("output","released","made_locally","ons_table_2021_with_2001_categories.html"))
 
@@ -478,7 +478,7 @@ ethnicity_plot_na_2021 <- ethnicity_na_2021 %>%
   theme(legend.position="bottom",
         legend.title=element_blank()) +
   geom_text(aes(x=Ethnic_Group,y=percentage,label=ifelse(cohort=="2021 Census","",paste0(round(diff,digits =1),"%"))), size=3.4, position =position_dodge(width=0.9), vjust=0.3,hjust = -0.2)  + 
-  ggtitle("Figure 3: Barplot showing the proportion of 2021 Census and TPP populations (amended to 2021 grouping) per ethnicity grouped into 5 groups per NUTS-1 region (excluding\nthose without a recorded ethnicity). Annotated with percentage point difference between 2021 Census and TPP populations.") +
+  ggtitle("Figure 2: Barplot showing the proportion of 2021 Census and TPP populations (amended to 2021 grouping) per ethnicity grouped into 5 groups per NUTS-1 region (excluding\nthose without a recorded ethnicity). Annotated with percentage point difference between 2021 Census and TPP populations.") +
   theme(plot.title = element_text(size = 16))
 
 
@@ -510,7 +510,7 @@ ethnicity_plot_eng_na_2021 <- ethnicity_na_2021 %>%
   theme(legend.position="bottom",
         legend.title=element_blank()) +
   geom_text(aes(x=Ethnic_Group,y=percentage,label=ifelse(cohort=="2021 Census","",paste0(round(diff,digits =1),"%"))), size=3.4, position =position_dodge(width=0.9), vjust=0.3 ,hjust = -0.2)  + 
-  ggtitle("Figure 2: Barplot showing the proportion of 2021 Census and TPP populations (amended to 2021 grouping) per ethnicity grouped into 5 groups (excluding those\nwithout a recorded ethnicity). Annotated with percentage point difference between 2021 Census and TPP populations.") +
+  ggtitle("Figure 1: Barplot showing the proportion of 2021 Census and TPP populations (amended to 2021 grouping) per ethnicity grouped into 5 groups (excluding those\nwithout a recorded ethnicity). Annotated with percentage point difference between 2021 Census and TPP populations.") +
   theme(plot.title = element_text(size = 10))
 
 
@@ -584,8 +584,8 @@ my_cols <- setNames(c("","Asian","Black","Mixed", "White","Other","Unknown"),nam
 
 df_sus_new_cross_table <- df_sus_new_cross_perc %>%
   gt( groupname_col = "") %>%
-  tab_spanner(label="SNOMED: 2022", columns=c(1)) %>%
-  tab_spanner(label="SUS", columns=c(2:7)) %>%
+  tab_spanner(label="Primary Care ethnicity", columns=c(1)) %>%
+  tab_spanner(label="Secondary Care ethnicity", columns=c(2:7)) %>%
   cols_label(!!!my_cols) %>%
   tab_style(
     style = list(
@@ -611,7 +611,7 @@ df_sus_new_cross_table <- df_sus_new_cross_perc %>%
     heading.align = "left"
   ) %>%
   tab_header(
-    title = md("Table 4:  Count of patients with a recorded ethnicity in SUS by ethnicity group (proportion of SNOMED:2022 population). All counts are rounded to the nearest 5. "),
+    title = md("Table 5:  Count of patients with a recorded ethnicity in Secondary Care by ethnicity group (proportion of Primary Care population). All counts are rounded to the nearest 5. "),
   )
 
 
@@ -742,8 +742,8 @@ my_cols <- setNames(c("","Asian","Black","Mixed", "White","Other"),names(df_sus_
 
 df_sus_new_cross_known_table <- df_sus_new_cross_known_perc %>%
   gt( groupname_col = "") %>%
-  tab_spanner(label="SNOMED: 2022", columns=c(1)) %>%
-  tab_spanner(label="SUS", columns=c(2:6)) %>%
+  tab_spanner(label="Primary Care ethnicity", columns=c(1)) %>%
+  tab_spanner(label="Secondary Care ethnicity", columns=c(2:6)) %>%
   cols_label(!!!my_cols) %>%
   tab_style(
     style = list(
@@ -769,7 +769,7 @@ df_sus_new_cross_known_table <- df_sus_new_cross_known_perc %>%
     heading.align = "left"
   ) %>%
   tab_header(
-    title = md("Table 5:  Count of patients with a recorded ethnicity in SUS by ethnicity group excluding Unknown ethnicites (proportion of SNOMED:2022 population). All counts are rounded to the nearest 5. "),
+    title = md("Table 6:  Count of patients with a recorded ethnicity in Secondary Care by ethnicity group excluding Unknown ethnicites (proportion of Primary Care population). All counts are rounded to the nearest 5. "),
   )
 
 
@@ -1111,14 +1111,14 @@ prop_reg_cat_plot <- prop_reg_cat_pivot %>%
   ggtitle("Figure 1: Barplot showing proportion of registered TPP population with a recorded ethnicity by clinical and demographic subgroups,\nbased on primary care records (solid bars) and when supplemented with secondary care data (pale bars).")
 
 
-prop_reg_cat_plot
-
-ggsave(
-  filename =here::here("output","released","made_locally", "completeness_cat.pdf"
-  ),
-  prop_reg_cat_plot,
-  dpi = 600,
-  width = 100,
-  height = 60,
-  units = "cm"
-)
+# prop_reg_cat_plot
+# 
+# ggsave(
+#   filename =here::here("output","released","made_locally", "completeness_cat.pdf"
+#   ),
+#   prop_reg_cat_plot,
+#   dpi = 600,
+#   width = 100,
+#   height = 60,
+#   units = "cm"
+# )
diff --git a/analysis/local/local_manuscript_plots.R b/analysis/local/local_manuscript_plots.R
index fe2b47b..4a1e723 100644
--- a/analysis/local/local_manuscript_plots.R
+++ b/analysis/local/local_manuscript_plots.R
@@ -494,25 +494,34 @@ alluvial <- ggplot(
 ) +
   geom_alluvium(aes(fill = ethnicity_new_5)) +
   geom_stratum(aes(fill = ethnicity_sus_5)) +
-  geom_text(stat = "stratum", aes(label = after_stat(stratum)), colour = "white") +
+  # geom_text(stat = "stratum", aes(label = after_stat(stratum)), colour = "white",size = 10) +
   scale_x_discrete(limits = c("ethnicity_new_5", "ethnicity_sus_5"), expand = c(.05, .05), labels = c("ethnicity_new_5" = "Primary Care ethnicity", "ethnicity_sus_5" = "Secondary Care ethnicity"), position = "top") +
   scale_fill_manual(values = rev(c("#FFD23B", "#808080", "#FF7C00", "#5323B3", "#5A71F3", "#17D7E6")), na.value = NA) +
-  theme_minimal() +
+  #theme_minimal() +
   ggtitle("") +
   theme(
     axis.title.y = element_blank(),
     axis.text.y = element_blank(),
     axis.ticks.y = element_blank(),
-    axis.text.x = element_text(size = 15)
+    axis.text.x = element_text(size = 20)
   ) +
   theme(
+    panel.background = element_rect(fill = "black"),
     panel.grid.major = element_blank(),
     panel.grid.minor = element_blank()
   ) +
   theme(
     legend.position = "bottom",
     legend.title = element_blank()
-  )
+  ) +
+  geom_label_repel(stat = "stratum", 
+                   aes(label = after_stat(stratum),
+                       fill = after_stat(stratum)),
+                   colour = "white",
+                   size = 10,
+                   fontface = 'bold',
+                   direction = "x",
+                   show.legend = F)
 
 ggsave(
   filename = here::here(
@@ -527,6 +536,103 @@ ggsave(
   height = 30,
   units = "cm"
 )
+ 
+##### only discrepancy alluvial
+df_secondary_new_cross_perc <- df_secondary_new_cross_perc %>%
+  mutate(ethnicity_new_5_na = case_when(ethnicity_new_5=="Unknown"~ethnicity_sus_5),
+         ethnicity_new_5_na = fct_relevel(
+           ethnicity_new_5_na,
+          "Unknown", "Other", "White", "Mixed", "Black", "Asian"
+          ))
+
+alluvial_sus_pick <- ggplot(
+  as.data.frame(df_secondary_new_cross_perc),
+  aes(y = `0`, axis1 = ethnicity_new_5, axis2 = ethnicity_sus_5)
+) +
+  geom_alluvium(aes(fill = ethnicity_new_5_na)) +
+  geom_stratum(aes(fill = ethnicity_sus_5)) +
+  # geom_text(stat = "stratum", aes(label = after_stat(stratum)), colour = "white",size = 10) +
+  scale_x_discrete(limits = c("ethnicity_new_5", "ethnicity_sus_5"), expand = c(.05, .05), labels = c("ethnicity_new_5" = "Primary Care ethnicity", "ethnicity_sus_5" = "Secondary Care ethnicity"), position = "top") +
+  scale_fill_manual(values = c( "#808080", "#FF7C00", "#5323B3", "#5A71F3","#FFD23B", "#17D7E6"), na.value = NA) +
+  #theme_minimal() +
+  ggtitle("") +
+  theme(
+    axis.title.y = element_blank(),
+    axis.text.y = element_blank(),
+    axis.ticks.y = element_blank(),
+    axis.text.x = element_text(size = 20)
+  ) +
+  theme(
+    panel.background = element_rect(fill = "black"),
+    panel.grid.major = element_blank(),
+    panel.grid.minor = element_blank()
+  ) +
+  theme(
+    legend.position = "bottom",
+    legend.title = element_blank()
+  ) 
+
+ggsave(
+  filename = here::here(
+    "output",
+    "released",
+    "made_locally",
+    glue("secondary_care_alluvial_sus_pick.png")
+  ),
+  alluvial_sus_pick,
+  dpi = 600,
+  width = 50,
+  height = 30,
+  units = "cm"
+)
+
+# alluvial picked up from SUS data
+##### only discrepancy alluvial
+df_secondary_new_cross_perc <- df_secondary_new_cross_perc %>%
+  mutate(ethnicity_new_5_na = case_when(ethnicity_new_5=="Unknown" & ethnicity_sus_5 !="Unknown" & ethnicity_new_5!=ethnicity_sus_5~ethnicity_new_5))
+
+alluvial_disc <- ggplot(
+  as.data.frame(df_secondary_new_cross_perc),
+  aes(y = `0`, axis1 = ethnicity_new_5, axis2 = ethnicity_sus_5)
+) +
+  geom_alluvium(aes(fill = ethnicity_new_5_na)) +
+  geom_stratum(aes(fill = ethnicity_sus_5)) +
+  # geom_text(stat = "stratum", aes(label = after_stat(stratum)), colour = "white",size = 10) +
+  scale_x_discrete(limits = c("ethnicity_new_5", "ethnicity_sus_5"), expand = c(.05, .05), labels = c("ethnicity_new_5" = "Primary Care ethnicity", "ethnicity_sus_5" = "Secondary Care ethnicity"), position = "top") +
+  scale_fill_manual(values = rev(c("#FFD23B", "#808080", "#FF7C00", "#5323B3", "#5A71F3", "#17D7E6")), na.value = NA) +
+  #theme_minimal() +
+  ggtitle("") +
+  theme(
+    axis.title.y = element_blank(),
+    axis.text.y = element_blank(),
+    axis.ticks.y = element_blank(),
+    axis.text.x = element_text(size = 20)
+  ) +
+  theme(
+    panel.background = element_rect(fill = "black"),
+    panel.grid.major = element_blank(),
+    panel.grid.minor = element_blank()
+  ) +
+  theme(
+    legend.position = "bottom",
+    legend.title = element_blank()
+  ) 
+
+ggsave(
+  filename = here::here(
+    "output",
+    "released",
+    "made_locally",
+    glue("secondary_care_alluvial_disc.png")
+  ),
+  alluvial_disc,
+  dpi = 600,
+  width = 50,
+  height = 30,
+  units = "cm"
+)
+
+
 
 secondary_heat_perc_all_patients <- ggplot(df_secondary_new_cross_perc, aes(ethnicity_sus_5, ethnicity_new_5, fill = percentage)) +
   geom_tile() +
@@ -673,16 +779,15 @@ alluvial_frequent <- ggplot(
 ) +
   geom_alluvium(aes(fill = latest)) +
   geom_stratum(aes(fill = common)) +
-  geom_text(stat = "stratum", aes(label = after_stat(stratum)), colour = "white") +
   scale_x_discrete(limits = c("latest", "common"), expand = c(.05, .05), labels = c("latest" = "Latest recorded ethnicity", "common" = "Most frequently recorded ethnicity"), position = "top") +
   scale_fill_manual(values = rev(c("#FFD23B", "#FF7C00", "#5323B3", "#5A71F3", "#17D7E6")), na.value = NA) +
-  theme_minimal() +
+  # theme_minimal() +
   ggtitle("") +
   theme(
     axis.title.y = element_blank(),
     axis.text.y = element_blank(),
     axis.ticks.y = element_blank(),
-    axis.text.x = element_text(size = 15)
+    axis.text.x = element_text(size = 20)
   ) +
   theme(
     panel.grid.major = element_blank(),
@@ -691,7 +796,15 @@ alluvial_frequent <- ggplot(
   theme(
     legend.position = "bottom",
     legend.title = element_blank()
-  )
+  ) +
+  geom_label_repel(stat = "stratum", 
+                   aes(label = after_stat(stratum),
+                       fill = after_stat(stratum)),
+                   colour = "white",
+                   size = 10,
+                   fontface = 'bold',
+                   direction = "x",
+                   show.legend = F)
 
 ggsave(
   filename = here::here(
diff --git a/analysis/sus/lib_phenotype_validation_sus.py b/analysis/sus/lib_phenotype_validation_sus.py
index be9c87e..ab9e975 100644
--- a/analysis/sus/lib_phenotype_validation_sus.py
+++ b/analysis/sus/lib_phenotype_validation_sus.py
@@ -39,7 +39,7 @@ def import_clean(
 ):
     # Import
     df_import = pd.read_feather(input_path)
-    
+
     # Check whether output paths exist or not, create if missing
     path_tables = f"output/{output_path}/{grouping}/tables/"
     path_figures = f"output/{output_path}/{grouping}/figures/"
@@ -142,9 +142,7 @@ def import_clean(
     li_col_filled = [col for col in df_clean.columns if col.endswith("_filled")]
     li_col_missing = [col for col in df_clean.columns if col.endswith("_missing")]
 
-    df_clean["any_filled"] = (
-        df_clean[li_col_filled].sum(axis=1) > 0
-    ).astype(int)
+    df_clean["any_filled"] = (df_clean[li_col_filled].sum(axis=1) > 0).astype(int)
 
     df_clean["all_missing"] = (
         df_clean[li_col_missing].sum(axis=1) == len(definitions)
@@ -158,25 +156,40 @@ def import_clean(
 
 
 def simple_latest_common_comparison(
-    df_clean, definitions,reg, other_vars, output_path,grouping,code_dict="", missing_check=False
+    df_clean,
+    definitions,
+    reg,
+    other_vars,
+    output_path,
+    grouping,
+    code_dict="",
+    missing_check=False,
 ):
     for definition in definitions:
         if missing_check:
             df_clean = df_clean[df_clean[f"{definition}_date"] == "1900-01-01"]
         vars = [s for s in other_vars if s.startswith(definition)]
         df_subset = df_clean.loc[~df_clean[definition].isna()]
-        df_subset = df_subset[df_subset[definition].isin(code_dict[definition].values())]
+        df_subset = df_subset[
+            df_subset[definition].isin(code_dict[definition].values())
+        ]
         df_subset = df_subset[[definition] + vars].set_index(definition)
         df_subset = df_subset.replace(0, np.nan)
         # reorder columns
-        col_arrange = [f"{definition}_asian",f"{definition}_black",f"{definition}_mixed",f"{definition}_white",f"{definition}_other"]
-        df_subset=df_subset[col_arrange]
+        col_arrange = [
+            f"{definition}_asian",
+            f"{definition}_black",
+            f"{definition}_mixed",
+            f"{definition}_white",
+            f"{definition}_other",
+        ]
+        df_subset = df_subset[col_arrange]
         # find column with first instance of the maximum value
-        df_subset['max'] = df_subset.astype(float).idxmax(axis=1)    
+        df_subset["max"] = df_subset.astype(float).idxmax(axis=1)
         df_subset2 = df_subset
         # returning 1 for the first column that contains the highest value and 0 for everything else
         for col in df_subset2.columns:
-	        df_subset2[col] = np.where(df_subset2['max'] == col, 1, 0)
+            df_subset2[col] = np.where(df_subset2["max"] == col, 1, 0)
         # drop max column
         df_subset2 = df_subset2[col_arrange]
         df_sum = redact_round_table(df_subset2.groupby(definition).sum())
@@ -189,8 +202,9 @@ def simple_latest_common_comparison(
                 f"output/{output_path}/{grouping}/tables/simple_latest_common_{definition}_{reg}.csv"
             )
 
+
 def simple_state_change(
-    df_clean, definitions,reg, other_vars, output_path,grouping, missing_check=False
+    df_clean, definitions, reg, other_vars, output_path, grouping, missing_check=False
 ):
     for definition in definitions:
         if missing_check:
@@ -203,13 +217,15 @@ def simple_state_change(
             .reset_index()
         )
         ### sum all 'vars' which are not null
-        df_subset[f"{definition}_any"]=df_subset[vars].notnull().sum(axis=1)
-        ### all px with a latest ethnicity must have at least 1 defined ethnicity 
+        df_subset[f"{definition}_any"] = df_subset[vars].notnull().sum(axis=1)
+        ### all px with a latest ethnicity must have at least 1 defined ethnicity
         ### If they only have 1 recorded ethnicity this must equal the latest ethnicity
         ### replace 1 with NULL and count all with over one recorded ethnicity (i.e. latest plus another ethnicity)
-        df_subset[f"{definition}_any"] = df_subset[f"{definition}_any"].replace(1, np.nan)
+        df_subset[f"{definition}_any"] = df_subset[f"{definition}_any"].replace(
+            1, np.nan
+        )
         df_subset["n"] = 1
-        ### check if any px have latest ethnicity but no recorded ethnicity (this should be impossible!) 
+        ### check if any px have latest ethnicity but no recorded ethnicity (this should be impossible!)
         # df_any_check=df_subset
         # df_any_check[f"{definition}_any_check"]=df_any_check[f"{definition}_any"]==0
         # df_any_check[f"{definition}_any_check"]=df_any_check[f"{definition}_any_check"].replace(False, np.nan)
@@ -260,9 +276,17 @@ def simple_patient_counts(
             for definition in definitions:
                 df_clean.loc[df_clean[definition] == x, f"{x}_{definition}_filled"] = 1
                 li_cat_def.append(f"{x}_{definition}")
-            df_clean[f"{x}_any"]=(df_clean[df_clean.filter(regex=f'{x}').columns].sum(axis=1) > 0).astype(int)
+            df_clean[f"{x}_any"] = (
+                df_clean[df_clean.filter(regex=f"{x}").columns].sum(axis=1) > 0
+            ).astype(int)
             # Assumes definition[1] is sus and definition[0] is the primary codelist
-            df_clean[f"{x}_supplemented"] =  ((df_clean[f"{x}_{definitions[0]}_filled"] == 1 )| (df_clean[definitions[0]].isnull() & (df_clean[f"{x}_{definitions[1]}_filled"]==1 ))).astype(int)
+            df_clean[f"{x}_supplemented"] = (
+                (df_clean[f"{x}_{definitions[0]}_filled"] == 1)
+                | (
+                    df_clean[definitions[0]].isnull()
+                    & (df_clean[f"{x}_{definitions[1]}_filled"] == 1)
+                )
+            ).astype(int)
         definitions = li_cat_def
         li_cat_any = [x + "_any" for x in li_cat]
         li_cat_supplemented = [x + "_supplemented" for x in li_cat]
@@ -276,16 +300,20 @@ def simple_patient_counts(
             .set_index("patient_id")
         )
         li_filled.append(df_temp)
-    if categories == True:  
+    if categories == True:
         df_temp = (
-            df_clean[["patient_id", "all_filled", "all_missing","any_filled"]+ li_cat_any + li_cat_supplemented]
+            df_clean[
+                ["patient_id", "all_filled", "all_missing", "any_filled"]
+                + li_cat_any
+                + li_cat_supplemented
+            ]
             .drop_duplicates()
             .dropna()
             .set_index("patient_id")
         )
     else:
         df_temp = (
-            df_clean[["patient_id", "all_filled", "all_missing","any_filled"]]
+            df_clean[["patient_id", "all_filled", "all_missing", "any_filled"]]
             .drop_duplicates()
             .dropna()
             .set_index("patient_id")
@@ -312,9 +340,13 @@ def simple_patient_counts(
                 .reset_index(drop=True)
             )
             li_filled_group.append(df_temp)
-        if categories == True:  
+        if categories == True:
             df_temp = (
-                df_clean[["patient_id", "all_filled", "all_missing","any_filled",group] + li_cat_any + li_cat_supplemented]
+                df_clean[
+                    ["patient_id", "all_filled", "all_missing", "any_filled", group]
+                    + li_cat_any
+                    + li_cat_supplemented
+                ]
                 .drop_duplicates()
                 .dropna()
                 .reset_index(drop=True)
@@ -322,7 +354,9 @@ def simple_patient_counts(
             li_filled_group.append(df_temp)
         else:
             df_temp = (
-                df_clean[["patient_id", "all_filled", "all_missing","any_filled",group]]
+                df_clean[
+                    ["patient_id", "all_filled", "all_missing", "any_filled", group]
+                ]
                 .drop_duplicates()
                 .dropna()
                 .reset_index(drop=True)
@@ -359,10 +393,18 @@ def simple_patient_counts(
             f"output/{output_path}/{grouping}/tables/simple_patient_counts_categories_{grouping}_{reg}.csv"
         )
     else:
-        df_append.to_csv(f"output/{output_path}/{grouping}/tables/simple_patient_counts_{grouping}_{reg}.csv")
+        df_append.to_csv(
+            f"output/{output_path}/{grouping}/tables/simple_patient_counts_{grouping}_{reg}.csv"
+        )
 
 
-def upset(df_clean, output_path, comparator_1, comparator_2,grouping,):
+def upset(
+    df_clean,
+    output_path,
+    comparator_1,
+    comparator_2,
+    grouping,
+):
     # create csv for output checking
     upset_output_check = df_clean[[comparator_1, comparator_2]]
     upset_output_check[comparator_1] = df_clean[comparator_1].fillna("Unknown")
@@ -384,10 +426,19 @@ def upset(df_clean, output_path, comparator_1, comparator_2,grouping,):
     )
 
     upset.plot()
-    plt.savefig(f"output/{output_path}/{grouping}/figures/upset_{comparator_1}_{comparator_2}.png")
+    plt.savefig(
+        f"output/{output_path}/{grouping}/figures/upset_{comparator_1}_{comparator_2}.png"
+    )
 
 
-def upset_cat(df_clean, output_path, comparator_1, comparator_2, other_vars,grouping,):
+def upset_cat(
+    df_clean,
+    output_path,
+    comparator_1,
+    comparator_2,
+    other_vars,
+    grouping,
+):
     upset_cat_df = pd.DataFrame(df_clean[comparator_1])
     for definition in [comparator_1, comparator_2]:
         for var in other_vars:
@@ -419,142 +470,252 @@ def upset_cat(df_clean, output_path, comparator_1, comparator_2, other_vars,grou
         f"output/{output_path}/{grouping}/figures/upset_category_{comparator_1}_{comparator_2}.png"
     )
 
-def records_over_time(df_clean, definitions, demographic_covariates, clinical_covariates, output_path, filepath,grouping,reg):
+
+def records_over_time(
+    df_clean,
+    definitions,
+    demographic_covariates,
+    clinical_covariates,
+    output_path,
+    filepath,
+    grouping,
+    reg,
+):
     """
     Count the number of records over time
-    
+
     Arguments:
         df_clean: a dataframe that has been cleaned using import_clean()
         definitions: a list of derived variables to be evaluated
-        demographic_covariates: a list of demographic covariates 
+        demographic_covariates: a list of demographic covariates
         clinical_covariates: a list of clinical covariates
         output_path: filepath to the output folder
         filepath: filepath to the output file
-       
+
     Returns:
         .csv file (underlying data)
         .png file (line plot)
     """
     li_df = []
     for definition in definitions:
-        df_grouped = df_clean[[definition+'_date',definition]].groupby(
-            definition+'_date'
-        ).count().reset_index().rename(columns={definition+'_date':'date'}).set_index('date')
+        df_grouped = (
+            df_clean[[definition + "_date", definition]]
+            .groupby(definition + "_date")
+            .count()
+            .reset_index()
+            .rename(columns={definition + "_date": "date"})
+            .set_index("date")
+        )
         li_df.append(redact_round_table(df_grouped))
-    df_all_time = pd.concat(li_df).stack().reset_index().rename(columns={'level_1':'variable',0:'value'})
-    del li_df 
-    
+    df_all_time = (
+        pd.concat(li_df)
+        .stack()
+        .reset_index()
+        .rename(columns={"level_1": "variable", 0: "value"})
+    )
+    del li_df
+
     fig, ax = plt.subplots(figsize=(12, 8))
     fig.autofmt_xdate()
-    sns.lineplot(x = 'date', y = 'value', hue='variable', data = df_all_time, ax=ax).set_title('New records by month')
-    ax.legend().set_title('')
+    sns.lineplot(
+        x="date", y="value", hue="variable", data=df_all_time, ax=ax
+    ).set_title("New records by month")
+    ax.legend().set_title("")
     if len(df_all_time) > 0:
-        df_all_time.to_csv(f'output/{output_path}/{grouping}/tables/records_over_time{filepath}_{reg}.csv')
-        plt.savefig(f'output/{output_path}/{grouping}/figures/records_over_time{filepath}_{reg}.png')
+        df_all_time.to_csv(
+            f"output/{output_path}/{grouping}/tables/records_over_time{filepath}_{reg}.csv"
+        )
+        plt.savefig(
+            f"output/{output_path}/{grouping}/figures/records_over_time{filepath}_{reg}.png"
+        )
 
     for group in demographic_covariates + clinical_covariates:
         for definition in definitions:
-            df_grouped = df_clean[[definition+'_date',definition,group]].groupby(
-                                  [definition+'_date',group]).count().reset_index().rename(columns={definition+'_date':'date'}).set_index(['date', group])
-            df_time=redact_round_table(df_grouped).reset_index()
+            df_grouped = (
+                df_clean[[definition + "_date", definition, group]]
+                .groupby([definition + "_date", group])
+                .count()
+                .reset_index()
+                .rename(columns={definition + "_date": "date"})
+                .set_index(["date", group])
+            )
+            df_time = redact_round_table(df_grouped).reset_index()
             fig, ax = plt.subplots(figsize=(12, 8))
             fig.autofmt_xdate()
-            sns.lineplot(x = 'date', y = definition, hue=group, data = df_time, ax=ax).set_title(f'{definition} recorded by {group} and month')
-            ax.legend().set_title('')
+            sns.lineplot(
+                x="date", y=definition, hue=group, data=df_time, ax=ax
+            ).set_title(f"{definition} recorded by {group} and month")
+            ax.legend().set_title("")
             if len(df_time) > 0:
-                df_time.to_csv(f'output/{output_path}/{grouping}/tables/records_over_time_{definition}_{group}{filepath}_{reg}.csv')
-                plt.savefig(f'output/{output_path}/{grouping}/figures/records_over_time_{definition}_{group}{filepath}_{reg}.png')
-            
-def records_over_time_perc(df_clean, definitions, demographic_covariates, clinical_covariates, output_path, filepath,grouping,reg):
+                df_time.to_csv(
+                    f"output/{output_path}/{grouping}/tables/records_over_time_{definition}_{group}{filepath}_{reg}.csv"
+                )
+                plt.savefig(
+                    f"output/{output_path}/{grouping}/figures/records_over_time_{definition}_{group}{filepath}_{reg}.png"
+                )
+
+
+def records_over_time_perc(
+    df_clean,
+    definitions,
+    demographic_covariates,
+    clinical_covariates,
+    output_path,
+    filepath,
+    grouping,
+    reg,
+):
     """
     Count the number of records over time as a percentage of all records
-    
+
     Arguments:
         df_clean: a dataframe that has been cleaned using import_clean()
         definitions: a list of derived variables to be evaluated
-        demographic_covariates: a list of demographic covariates 
+        demographic_covariates: a list of demographic covariates
         clinical_covariates: a list of clinical covariates
         output_path: filepath to the output folder
         filepath: filepath to the output file
-       
+
     Returns:
         .csv file (underlying data)
         .png file (line plot)
     """
     li_df = []
     for definition in definitions:
-        df_grouped = df_clean[[definition+'_date',definition]].groupby(
-            definition+'_date'
-        ).count().reset_index().rename(columns={definition+'_date':'date'}).set_index('date')
+        df_grouped = (
+            df_clean[[definition + "_date", definition]]
+            .groupby(definition + "_date")
+            .count()
+            .reset_index()
+            .rename(columns={definition + "_date": "date"})
+            .set_index("date")
+        )
         li_df.append(redact_round_table(df_grouped))
-    df_all_time = pd.concat(li_df).stack().reset_index().rename(columns={'level_1':'variable',0:'value'})
-    df_all_time['sum']=df_all_time.groupby('variable', sort=False)['value'].transform('sum')
-    df_all_time['value']=df_all_time['value']/df_all_time['sum']
-    del li_df 
-    
+    df_all_time = (
+        pd.concat(li_df)
+        .stack()
+        .reset_index()
+        .rename(columns={"level_1": "variable", 0: "value"})
+    )
+    df_all_time["sum"] = df_all_time.groupby("variable", sort=False)["value"].transform(
+        "sum"
+    )
+    df_all_time["value"] = df_all_time["value"] / df_all_time["sum"]
+    del li_df
+
     fig, ax = plt.subplots(figsize=(12, 8))
     fig.autofmt_xdate()
-    sns.lineplot(x = 'date', y = 'value', hue='variable', data = df_all_time, ax=ax).set_title('New records by month')
-    ax.legend().set_title('')
+    sns.lineplot(
+        x="date", y="value", hue="variable", data=df_all_time, ax=ax
+    ).set_title("New records by month")
+    ax.legend().set_title("")
     if len(df_all_time) > 0:
-        df_all_time.to_csv(f'output/{output_path}/{grouping}/tables/records_over_time{filepath}_{reg}.csv')
-        plt.savefig(f'output/{output_path}/{grouping}/figures/records_over_time{filepath}_{reg}.png')
+        df_all_time.to_csv(
+            f"output/{output_path}/{grouping}/tables/records_over_time{filepath}_{reg}.csv"
+        )
+        plt.savefig(
+            f"output/{output_path}/{grouping}/figures/records_over_time{filepath}_{reg}.png"
+        )
 
     for group in demographic_covariates + clinical_covariates:
         for definition in definitions:
-            df_grouped = df_clean[[definition+'_date',definition,group]].groupby(
-                                  [definition+'_date',group]).count().reset_index().rename(columns={definition+'_date':'date'}).set_index(['date', group])
-            df_time=redact_round_table(df_grouped).reset_index()
-            df_time['sum']=df_time.groupby(group, sort=False)[definition].transform('sum')
-            df_time[definition]=df_time[definition]/df_time['sum']*100
+            df_grouped = (
+                df_clean[[definition + "_date", definition, group]]
+                .groupby([definition + "_date", group])
+                .count()
+                .reset_index()
+                .rename(columns={definition + "_date": "date"})
+                .set_index(["date", group])
+            )
+            df_time = redact_round_table(df_grouped).reset_index()
+            df_time["sum"] = df_time.groupby(group, sort=False)[definition].transform(
+                "sum"
+            )
+            df_time[definition] = df_time[definition] / df_time["sum"] * 100
             fig, ax = plt.subplots(figsize=(12, 8))
             fig.autofmt_xdate()
-            sns.lineplot(x = 'date', y = definition, hue=group, data = df_time, ax=ax).set_title(f'{definition} recorded by {group} and month')
-            ax.legend().set_title('')
+            sns.lineplot(
+                x="date", y=definition, hue=group, data=df_time, ax=ax
+            ).set_title(f"{definition} recorded by {group} and month")
+            ax.legend().set_title("")
             if len(df_time) > 0:
-                df_time.to_csv(f'output/{output_path}/{grouping}/tables/records_over_time_{definition}_{group}{filepath}_{reg}.csv')
-                plt.savefig(f'output/{output_path}/{grouping}/figures/records_over_time_{definition}_{group}{filepath}_{reg}.png')
-            
+                df_time.to_csv(
+                    f"output/{output_path}/{grouping}/tables/records_over_time_{definition}_{group}{filepath}_{reg}.csv"
+                )
+                plt.savefig(
+                    f"output/{output_path}/{grouping}/figures/records_over_time_{definition}_{group}{filepath}_{reg}.png"
+                )
+
 
 def display_heatmap(df_clean, definitions, output_path):
     # All with measurement
     li_filled = []
     for definition in definitions:
-        df_temp = df_clean[['patient_id']].drop_duplicates().set_index('patient_id')
-        df_temp[definition+'_filled'] = 1
-        df_temp = df_clean[['patient_id', definition+'_filled']].drop_duplicates().dropna().set_index('patient_id')
+        df_temp = df_clean[["patient_id"]].drop_duplicates().set_index("patient_id")
+        df_temp[definition + "_filled"] = 1
+        df_temp = (
+            df_clean[["patient_id", definition + "_filled"]]
+            .drop_duplicates()
+            .dropna()
+            .set_index("patient_id")
+        )
         li_filled.append(df_temp)
 
     # Prepare data for heatmap input
     df_temp2 = pd.concat(li_filled, axis=1)
     # Remove list from memory
-    del li_filled 
-    df_transform = df_temp2.replace(np.nan,0)
+    del li_filled
+    df_transform = df_temp2.replace(np.nan, 0)
     df_dot = redact_round_table(df_transform.T.dot(df_transform))
-    
+
     # Create mask to eliminate duplicates in heatmap
     mask = np.triu(np.ones_like(df_dot))
     np.fill_diagonal(mask[::1], 0)
 
     # Draw the heatmap with the mask
     fig, ax = plt.subplots(figsize=(12, 8))
-    sns.heatmap(df_dot, annot=True, mask=mask, fmt='g', cmap="YlGnBu", vmin=0)
-    #plt.show()
-    plt.savefig(f'output/{output_path}/figures/heatmap.png')
-
-def simple_sus_crosstab(
-    df_clean, output_path,grouping,reg
-):       
-    
+    sns.heatmap(df_dot, annot=True, mask=mask, fmt="g", cmap="YlGnBu", vmin=0)
+    # plt.show()
+    plt.savefig(f"output/{output_path}/figures/heatmap.png")
+
+
+def simple_sus_crosstab(df_clean, output_path, grouping, reg):
     df_clean.ethnicity_new_5 = df_clean.ethnicity_new_5.fillna(" Unknown")
     df_clean.ethnicity_sus_5 = df_clean.ethnicity_sus_5.fillna(" Unknown")
-    data_crosstab = pd.crosstab(df_clean.ethnicity_new_5, 
-                                df_clean.ethnicity_sus_5, margins = False)
-    data_crosstab=redact_round_table(data_crosstab)                            
-    data_crosstab.to_csv(f"output/{output_path}/{grouping}/tables/simple_sus_crosstab_{reg}.csv")
-
-    df_clean=df_clean[["ethnicity_new_5","ethnicity_sus_5"]]    
-    data_crosstab_long = pd.DataFrame(df_clean.groupby(["ethnicity_new_5","ethnicity_sus_5"]).size())
-    data_crosstab_long=redact_round_table(data_crosstab_long) 
-    data_crosstab_long.to_csv(f"output/{output_path}/{grouping}/tables/simple_sus_crosstab_long_{reg}.csv")
-        
\ No newline at end of file
+    data_crosstab = pd.crosstab(
+        df_clean.ethnicity_new_5, df_clean.ethnicity_sus_5, margins=False
+    )
+    data_crosstab = redact_round_table(data_crosstab)
+    data_crosstab.to_csv(
+        f"output/{output_path}/{grouping}/tables/simple_sus_crosstab_{reg}.csv"
+    )
+
+    df_clean = df_clean[["ethnicity_new_5", "ethnicity_sus_5"]]
+    data_crosstab_long = pd.DataFrame(
+        df_clean.groupby(["ethnicity_new_5", "ethnicity_sus_5"]).size()
+    )
+    data_crosstab_long = redact_round_table(data_crosstab_long)
+    data_crosstab_long.to_csv(
+        f"output/{output_path}/{grouping}/tables/simple_sus_crosstab_long_{reg}.csv"
+    )
+
+
+def simple_ctv3_sus_crosstab(df_clean, output_path, grouping, reg):
+    df_clean.ethnicity_5 = df_clean.ethnicity_5.fillna(" Unknown")
+    df_clean.ethnicity_sus_5 = df_clean.ethnicity_sus_5.fillna(" Unknown")
+    data_crosstab = pd.crosstab(
+        df_clean.ethnicity_5, df_clean.ethnicity_sus_5, margins=False
+    )
+    data_crosstab = redact_round_table(data_crosstab)
+    data_crosstab.to_csv(
+        f"output/{output_path}/{grouping}/tables/simple_sus_crosstab_{reg}.csv"
+    )
+
+    df_clean = df_clean[["ethnicity_5", "ethnicity_sus_5"]]
+    data_crosstab_long = pd.DataFrame(
+        df_clean.groupby(["ethnicity_5", "ethnicity_sus_5"]).size()
+    )
+    data_crosstab_long = redact_round_table(data_crosstab_long)
+    data_crosstab_long.to_csv(
+        f"output/{output_path}/{grouping}/tables/simple_ctv3_sus_crosstab_long_{reg}.csv"
+    )
diff --git a/analysis/sus/simple_validation_script_ctv3_sus.py b/analysis/sus/simple_validation_script_ctv3_sus.py
new file mode 100644
index 0000000..ec911e8
--- /dev/null
+++ b/analysis/sus/simple_validation_script_ctv3_sus.py
@@ -0,0 +1,142 @@
+from lib_phenotype_validation_sus import *
+
+############################ CONFIGURE OPTIONS HERE ################################
+
+# Import file
+input_path = "output/extract_5/input_5.feather"
+
+# Definitions
+definitions = [
+    "ethnicity_5",
+    "ethnicity_sus_5",
+]
+
+# Code dictionary
+code_dict = {
+    "imd": {
+        0: "Unknown",
+        1: "1 Most deprived",
+        2: "2",
+        3: "3",
+        4: "4",
+        5: "5 Least deprived",
+    },
+    "ethnicity_new_5": {1: "White", 2: "Mixed", 3: "Asian", 4: "Black", 5: "Other"},
+    "ethnicity_sus_5": {1: "White", 2: "Mixed", 3: "Asian", 4: "Black", 5: "Other"},
+}
+
+# Other variables to include
+other_vars = ["white", "mixed", "asian", "black", "other"]
+other_vars_combined = [x + "_" + y for x in definitions for y in other_vars]
+
+# Restrict to registered as of index date
+registered = True
+reg = "fullset"
+if registered == True:
+    reg = "registered"
+
+# Dates
+dates = False
+date_min = ""
+date_max = ""
+time_delta = ""
+
+# Min/max range
+min_range = 4
+max_range = 200
+
+# Null value – could be multiple values in a list [0,'0',NA]
+null = [0, "0"]
+
+# Covariates
+demographic_covariates = ["age_band", "sex", "region", "imd"]
+clinical_covariates = ["dementia", "diabetes", "hypertension", "learning_disability"]
+
+# Output filepath
+output_path = "sus/simplified_output"
+grouping = "5_group"
+
+########################## SPECIFY ANALYSES TO RUN HERE ##############################
+
+
+def main():
+    # combine defintions and other_vars
+    df_clean = import_clean(
+        input_path,
+        definitions,
+        other_vars_combined,
+        demographic_covariates,
+        clinical_covariates,
+        reg,
+        null,
+        date_min,
+        date_max,
+        time_delta,
+        output_path,
+        grouping,
+        code_dict,
+        dates=False,
+        registered=registered,
+        dates_check=False,
+    )
+    # Count patients with records
+    simple_patient_counts(
+        df_clean,
+        definitions,
+        reg,
+        demographic_covariates,
+        clinical_covariates,
+        output_path,
+        grouping,
+    )
+
+    # Count patients by categories
+    simple_patient_counts(
+        df_clean,
+        definitions,
+        reg,
+        demographic_covariates,
+        clinical_covariates,
+        output_path,
+        grouping,
+        categories=True,
+    )
+
+    simple_sus_crosstab(df_clean, output_path, grouping, reg)
+
+    # Latest v most common
+    simple_latest_common_comparison(
+        df_clean,
+        definitions,
+        reg,
+        other_vars_combined,
+        output_path,
+        grouping,
+        code_dict,
+    )
+
+    # State change
+    simple_state_change(
+        df_clean,
+        definitions,
+        reg,
+        other_vars_combined,
+        output_path,
+        grouping,
+    )
+    # # records over time
+    # records_over_time_perc(
+    #     df_clean, definitions, demographic_covariates, clinical_covariates, output_path, "",grouping,reg
+    #     )
+
+
+########################## DO NOT EDIT – RUNS SCRIPT ##############################
+
+if __name__ == "__main__":
+    main()
+
+# registered = False
+# reg = "fullset"
+
+# if __name__ == "__main__":
+#     main()
diff --git a/project.yaml b/project.yaml
index ceb3801..2f1e286 100644
--- a/project.yaml
+++ b/project.yaml
@@ -90,6 +90,14 @@ actions:
         tables: output/sus/simplified_output/5_group/tables/*.csv
 #        figures: output/sus/simplified_output/5_group/figures/*.png
 
+  execute_simple_validation_analyses_ctv3_sus:
+    run: python:latest python analysis/sus/simple_validation_script_sus.py
+    needs: [generate_study_population_5]
+    outputs:
+      moderately_sensitive: 
+        tables: output/sus/simplified_output/5_group/tables/*.csv
+#        figures: output/sus/simplified_output/5_group/figures/*.png
+
   execute_simple_validation_analyses_16_sus:
     run: python:latest python analysis/sus/simple_validation_script_16_sus.py
     needs: [generate_study_population_16]