diff --git a/notebooks_r/report.Rmd b/notebooks_r/report.Rmd index fd28bbd..77a6242 100644 --- a/notebooks_r/report.Rmd +++ b/notebooks_r/report.Rmd @@ -555,7 +555,7 @@ SUS5
-```{r, fig.width=30, fig.height=27} +```{r, fig.width=40, fig.height=27} new_prop_reg_cat <- read_csv(here::here("output", "sus", "simplified_output", "5_group", "tables", "simple_patient_counts_categories_5_group_new_sus_registered.csv"), col_types = (cols())) %>% rename_with(~ sub("ethnicity_", "", .), contains("ethnicity_")) %>% @@ -884,11 +884,11 @@ for (codelist in c("new", "ctv3")) { ``` ```{r} -anyrepeated_new +anyrepeated_ctv3 ```
```{r} -anyrepeated_ctv3 +anyrepeated_new ``` ### 16 Group @@ -1044,169 +1044,202 @@ latestcommon_new_16 %>% ```{r} for (codelist in c("new", "ctv3")) { - ifelse(codelist == "new", codelist_name <- "SNOMED:2022", codelist_name <- "CTV3:2020") - ifelse(codelist == "new", codelist_path <- glue("ethnicity_new_5"), codelist_path <- glue("ethnicity_5")) - df_sus_new_cross <- read_csv(here::here("output", "sus", "simplified_output", "5_group", "tables", glue("simple_{codelist}_sus_crosstab_long_registered.csv"))) %>% - rename_with(~"ethnicity", contains("ethnicity_") & !contains("sus")) - - ethnicity_cat <- - read_csv(here::here("output", "sus", "simplified_output", "5_group", "tables", glue("simple_patient_counts_categories_5_group_{codelist}_sus_registered.csv")), col_types = (cols())) %>% - rename_with(~ sub("ethnicity_", "", .), contains("ethnicity_")) %>% - rename_with(~ sub("_new", "", .), contains("_new")) %>% - rename_with(~ sub("_ctv3", "", .), contains("_ctv3")) %>% - rename_with(~ sub("_5_filled", "", .), contains("_5_filled")) %>% - select(-contains("filled"), -contains("missing"), -contains("sus")) %>% - mutate( - Asian_anydiff = Asian_any - Asian, - Black_anydiff = Black_any - Black, - Mixed_anydiff = Mixed_any - Mixed, - White_anydiff = White_any - White, - Other_anydiff = Other_any - Other, - ) + for (known in c("_known","")){ + ifelse(codelist == "new", codelist_name <- "SNOMED:2022", codelist_name <- "CTV3:2020") + ifelse(codelist == "new", codelist_path <- glue("ethnicity_new_5"), codelist_path <- glue("ethnicity_5")) + + + df_sus_new_cross <- read_csv(here::here("output", "sus", "simplified_output", "5_group", "tables", glue("simple_{codelist}_sus_crosstab_long_registered.csv"))) %>% + rename_with(~"ethnicity", contains("ethnicity_") & !contains("sus")) + + if(known == "_known"){df_sus_new_cross <- df_sus_new_cross %>% + filter( + ethnicity != "Unknown", + ethnicity_sus_5 != "Unknown" + ) + } - population <- read_csv(here::here("output", "sus", "simplified_output", "5_group", "tables", glue("simple_patient_counts_5_group_{codelist}_sus_registered.csv")), col_types = (cols())) %>% - filter(group == "all") %>% - summarise( - ethnicity = "Unknown", - population = population - !!as.name(glue("{codelist_path}_filled")) - ) - - ethnicity_cat_pivot <- ethnicity_cat %>% - pivot_longer( - cols = c(contains("_")), - names_to = c("ethnicity", "codelist"), - names_pattern = "(.*)_(.*)", - values_to = "n" - ) %>% - filter(codelist == "any", group == "all") %>% + ethnicity_cat <- + read_csv(here::here("output", "sus", "simplified_output", "5_group", "tables", glue("simple_patient_counts_categories_5_group_{codelist}_sus_registered.csv")), col_types = (cols())) %>% + rename_with(~ sub("ethnicity_", "", .), contains("ethnicity_")) %>% + rename_with(~ sub("_new", "", .), contains("_new")) %>% + rename_with(~ sub("_ctv3", "", .), contains("_ctv3")) %>% + rename_with(~ sub("_5_filled", "", .), contains("_5_filled")) %>% + select(-contains("filled"), -contains("missing"), -contains("sus")) + + + population <- read_csv(here::here("output", "sus", "simplified_output", "5_group", "tables", glue("simple_patient_counts_5_group_{codelist}_sus_registered.csv")), col_types = (cols())) %>% + filter(group == "all") %>% summarise( - ethnicity, - population = n - ) %>% - bind_rows(population) + ethnicity = "Unknown", + population = population - !!as.name(glue("{codelist_path}_filled")) + ) + + ethnicity_cat_pivot <- ethnicity_cat %>% + filter( group == "all") %>% + select(levels_5) %>% + pivot_longer( + cols = levels_5, + names_to = c("ethnicity"), + values_to = "population" + ) %>% + bind_rows(population) + + + df_sus_new_cross_perc <- df_sus_new_cross %>% + left_join(ethnicity_cat_pivot, by = "ethnicity") %>% + group_by(ethnicity ) %>% + mutate(population=sum(na.omit(`0`))) %>% + ungroup() %>% + mutate( + percentage = round(`0` / population * 100, 1), + ethnicity = fct_relevel( + ethnicity, + levels_5 + ), + ethnicity_sus_5 = fct_relevel( + ethnicity_sus_5, + levels_5 + ), + left_paren = " (", + right_paren = ")", + N = comma(as.numeric(`0`)), + population = comma(as.numeric(population)) + ) %>% + arrange(ethnicity, ethnicity_sus_5) %>% + unite("labl", N, left_paren, percentage, right_paren, sep = "", remove = F) %>% + unite("ethnicity", ethnicity, left_paren, population, right_paren, sep = "") %>% + select(-`0`, -percentage, -N) %>% + pivot_wider(names_from = c("ethnicity_sus_5"), values_from = labl) + + ifelse(known=="_known", + my_cols <- setNames(c(codelist_name, "Asian", "Black", "Mixed", "White", "Other"), names(df_sus_new_cross_perc)), + my_cols <- setNames(c(codelist_name, "Asian", "Black", "Mixed", "White", "Other", "Unknown"), names(df_sus_new_cross_perc))) + + + df_sus_new_cross_table <- df_sus_new_cross_perc %>% + gt(groupname_col = "") %>% + tab_spanner(label = "Primary Care ethnicity", columns = c(1)) %>% + tab_spanner(label = "Secondary Care ethnicity", columns = c(2:ncol(df_sus_new_cross_perc))) %>% + cols_label(!!!my_cols) %>% + tab_style( + style = list( + # cell_fill(color = "gray96") + ), + locations = cells_body() + ) %>% + tab_style( + style = list( + cell_text(weight = "bold") + ), + locations = cells_column_labels(everything()) + ) %>% + tab_options( + table.align = "left", + # row_group.as_column = TRUE option not available on the OS R image + table.font.size = 8, + column_labels.border.top.width = px(3), + column_labels.border.top.color = "transparent", + table.border.top.color = "transparent", + heading.align = "left" + # ) %>% + # tab_header( + # title = md("Table 5: Count of patients with a recorded ethnicity in Secondary Care by ethnicity group (proportion of Primary Care population). All counts are rounded to the nearest 5. "), + ) %>% + tab_options( + data_row.padding = px(0) + ) + + assign(glue("df_sus_new_cross_table_{codelist}{known}"), df_sus_new_cross_table) + + ### sankey plot + + df_secondary_new_cross_perc <- df_sus_new_cross %>% + mutate( + ethnicity = fct_relevel( + ethnicity, + "Unknown", "Other", "White", "Mixed", "Black", "Asian" + ), + ethnicity_sus_5 = fct_relevel( + ethnicity_sus_5, + "Asian", "Black", "Mixed", "White", "Other" + ) + ) + + bennett_pal <- c("#FFB700", "#F20D52", "#FF369C", "#FF7CFE", "#9C54E6", "#5323B3") + + ifelse(known=="",fill_list<-rev(c("#FFD23B", "#808080", "#FF7C00", "#5323B3", "#5A71F3", "#17D7E6")), + fill_list<-rev(c("#FFD23B", "#FF7C00", "#5323B3", "#5A71F3", "#17D7E6"))) + + assign(glue("alluvial_{codelist}{known}"), ggplot( + as.data.frame(df_secondary_new_cross_perc), + aes(y = `0`, axis1 = ethnicity, axis2 = ethnicity_sus_5) + ) + + geom_alluvium(aes(fill = ethnicity)) + + geom_stratum(aes(fill = ethnicity_sus_5)) + + # geom_text(stat = "stratum", aes(label = after_stat(stratum)), colour = "white",size = 10) + + scale_x_discrete(limits = c("ethnicity", "ethnicity_sus_5"), expand = c(.05, .05), labels = c("ethnicity" = "Primary Care ethnicity", "ethnicity_sus_5" = "Secondary Care ethnicity"), position = "top") + + scale_fill_manual(values = fill_list, na.value = NA) + + # theme_minimal() + + ggtitle("") + + theme( + axis.title.y = element_blank(), + axis.text.y = element_blank(), + axis.ticks.y = element_blank(), + axis.text.x = element_text(size = 20) + ) + + theme( + panel.background = element_rect(fill = "white"), + panel.grid.major = element_blank(), + panel.grid.minor = element_blank() + ) + + theme( + legend.position = "bottom", + legend.title = element_blank() + ) + + geom_label_repel( + stat = "stratum", + aes( + label = after_stat(stratum), + fill = after_stat(stratum) + ), + colour = "white", + size = 10, + fontface = "bold", + direction = "x", + show.legend = F + )) + } +} +``` - df_sus_new_cross_perc <- df_sus_new_cross %>% - left_join(ethnicity_cat_pivot, by = "ethnicity") %>% - mutate( - percentage = round(`0` / population * 100, 1), - ethnicity = fct_relevel( - ethnicity, - levels_5 - ), - ethnicity_sus_5 = fct_relevel( - ethnicity_sus_5, - levels_5 - ), - left_paren = " (", - right_paren = ")", - N = comma(as.numeric(`0`)), - population = comma(as.numeric(population)) - ) %>% - arrange(ethnicity, ethnicity_sus_5) %>% - unite("labl", N, left_paren, percentage, right_paren, sep = "", remove = F) %>% - unite("ethnicity", ethnicity, left_paren, population, right_paren, sep = "") %>% - select(-`0`, -percentage, -N) %>% - pivot_wider(names_from = c("ethnicity_sus_5"), values_from = labl) +```{r} +df_sus_new_cross_table_ctv3 +``` - my_cols <- setNames(c(codelist_name, "Asian", "Black", "Mixed", "White", "Other", "Unknown"), names(df_sus_new_cross_perc)) - df_sus_new_cross_table <- df_sus_new_cross_perc %>% - gt(groupname_col = "") %>% - tab_spanner(label = "Primary Care ethnicity", columns = c(1)) %>% - tab_spanner(label = "Secondary Care ethnicity", columns = c(2:7)) %>% - cols_label(!!!my_cols) %>% - tab_style( - style = list( - # cell_fill(color = "gray96") - ), - locations = cells_body() - ) %>% - tab_style( - style = list( - cell_text(weight = "bold") - ), - locations = cells_column_labels(everything()) - ) %>% - tab_options( - table.align = "left", - # row_group.as_column = TRUE option not available on the OS R image - table.font.size = 8, - column_labels.border.top.width = px(3), - column_labels.border.top.color = "transparent", - table.border.top.color = "transparent", - heading.align = "left" - # ) %>% - # tab_header( - # title = md("Table 5: Count of patients with a recorded ethnicity in Secondary Care by ethnicity group (proportion of Primary Care population). All counts are rounded to the nearest 5. "), - ) %>% - tab_options( - data_row.padding = px(0) - ) - - assign(glue("df_sus_new_cross_table_{codelist}"), df_sus_new_cross_table) +
- ### sankey plot +```{r, fig.width=15, fig.height=10} +alluvial_ctv3 +``` - df_secondary_new_cross_perc <- df_sus_new_cross %>% - mutate( - ethnicity = fct_relevel( - ethnicity, - "Unknown", "Other", "White", "Mixed", "Black", "Asian" - ), - ethnicity_sus_5 = fct_relevel( - ethnicity_sus_5, - "Asian", "Black", "Mixed", "White", "Other" - ) - ) +
- bennett_pal <- c("#FFB700", "#F20D52", "#FF369C", "#FF7CFE", "#9C54E6", "#5323B3") +```{r} +df_sus_new_cross_table_ctv3_known +``` - assign(glue("alluvial_{codelist}"), ggplot( - as.data.frame(df_secondary_new_cross_perc), - aes(y = `0`, axis1 = ethnicity, axis2 = ethnicity_sus_5) - ) + - geom_alluvium(aes(fill = ethnicity)) + - geom_stratum(aes(fill = ethnicity_sus_5)) + - # geom_text(stat = "stratum", aes(label = after_stat(stratum)), colour = "white",size = 10) + - scale_x_discrete(limits = c("ethnicity", "ethnicity_sus_5"), expand = c(.05, .05), labels = c("ethnicity" = "Primary Care ethnicity", "ethnicity_sus_5" = "Secondary Care ethnicity"), position = "top") + - scale_fill_manual(values = rev(c("#FFD23B", "#808080", "#FF7C00", "#5323B3", "#5A71F3", "#17D7E6")), na.value = NA) + - # theme_minimal() + - ggtitle("") + - theme( - axis.title.y = element_blank(), - axis.text.y = element_blank(), - axis.ticks.y = element_blank(), - axis.text.x = element_text(size = 20) - ) + - theme( - panel.background = element_rect(fill = "white"), - panel.grid.major = element_blank(), - panel.grid.minor = element_blank() - ) + - theme( - legend.position = "bottom", - legend.title = element_blank() - ) + - geom_label_repel( - stat = "stratum", - aes( - label = after_stat(stratum), - fill = after_stat(stratum) - ), - colour = "white", - size = 10, - fontface = "bold", - direction = "x", - show.legend = F - )) -} +```{r, fig.width=15, fig.height=10} +alluvial_ctv3_known ``` +
```{r} -df_sus_new_cross_table_ctv3 +df_sus_new_cross_table_new ```
@@ -1217,13 +1250,14 @@ alluvial_new
+
+ ```{r} -df_sus_new_cross_table_new +df_sus_new_cross_table_new_known ``` -
```{r, fig.width=15, fig.height=10} -alluvial_new +alluvial_new_known ```
@@ -1231,11 +1265,19 @@ alluvial_new ## 16 Group ```{r} for (codelist in c("new", "ctv3")) { + for (known in c("_known","")){ ifelse(codelist == "new", codelist_name <- "SNOMED:2022", codelist_name <- "CTV3:2020") ifelse(codelist == "new", codelist_path <- glue("ethnicity_new_16"), codelist_path <- glue("ethnicity_16")) + df_sus_new_cross <- read_csv(here::here("output", "sus", "simplified_output", "16_group", "tables", glue("simple_{codelist}_sus_crosstab_long_registered.csv"))) %>% rename_with(~"ethnicity", contains("ethnicity_") & !contains("sus")) + if(known == "_known"){df_sus_new_cross <- df_sus_new_cross %>% + filter( + ethnicity != "Unknown", + ethnicity_sus_16 != "Unknown" + ) + } population <- read_csv(here::here("output", "sus", "simplified_output", "16_group", "tables", glue("simple_patient_counts_16_group_{codelist}_sus_registered.csv")), col_types = (cols())) %>% filter(group == "all") %>% @@ -1252,42 +1294,18 @@ for (codelist in c("new", "ctv3")) { rename_with(~ sub("_new", "", .), contains("_new")) %>% rename_with(~ sub("_ctv3", "", .), contains("_ctv3")) %>% rename_with(~ sub("_16_filled", "", .), contains("_16_filled")) %>% - select(-contains("filled"), -contains("missing"), -contains("sus")) %>% - mutate( - White_British_anydiff = White_British_any - White_British, - White_Irish_anydiff = White_Irish_any - White_Irish, - Other_White_anydiff = Other_White_any - Other_White, - White_and_Black_Caribbean_anydiff = White_and_Black_Caribbean_any - White_and_Black_Caribbean, - White_and_Black_African_anydiff = White_and_Black_African_any - White_and_Black_African, - White_and_Asian_anydiff = White_and_Asian_any - White_and_Asian, - Other_Mixed_anydiff = Other_Mixed_any - Other_Mixed, - Indian_anydiff = Indian_any - Indian, - Pakistani_anydiff = Pakistani_any - Pakistani, - Bangladeshi_anydiff = Bangladeshi_any - Bangladeshi, - Other_Asian_anydiff = Other_Asian_any - Other_Asian, - Caribbean_anydiff = Caribbean_any - Caribbean, - African_anydiff = African_any - African, - Other_Black_anydiff = Other_Black_any - Other_Black, - Chinese_anydiff = Chinese_any - Chinese, - Any_other_ethnic_group_anydiff = Any_other_ethnic_group_any - Any_other_ethnic_group - ) + select(-contains("filled"), -contains("missing"), -contains("sus")) - - - ethnicity_cat_pivot <- ethnicity_cat %>% + ethnicity_cat_pivot <- ethnicity_cat %>% + filter( group == "all") %>% + select(levels_16) %>% pivot_longer( - cols = c(contains("_")), - names_to = c("ethnicity", "codelist"), - names_pattern = "(.*)_(.*)", - values_to = "n" - ) %>% - filter(codelist == "any", group == "all") %>% - summarise( - ethnicity, - population = n - ) %>% + cols = levels_16, + names_to = c("ethnicity"), + values_to = "population" + ) %>% bind_rows(population) - + df_sus_new_cross_perc <- df_sus_new_cross %>% left_join(ethnicity_cat_pivot, by = "ethnicity") %>% @@ -1312,13 +1330,15 @@ for (codelist in c("new", "ctv3")) { select(-`0`, -percentage, -N) %>% pivot_wider(names_from = c("ethnicity_sus_16"), values_from = labl) - my_cols <- setNames(c(codelist_name, "Indian", "Pakistani", "Bangladeshi", "Other Asian", "Caribbean", "African", "Other Black", "White and Black Caribbean", "White and Black African", "White and Asian", "Other Mixed", "White British", "White Irish", "Other White", "Chinese", "Any other ethnic group", "Unknown"), names(df_sus_new_cross_perc)) + ifelse(known=="_known", + my_cols <- setNames(c(codelist_name, "Indian", "Pakistani", "Bangladeshi", "Other Asian", "Caribbean", "African", "Other Black", "White and Black Caribbean", "White and Black African", "White and Asian", "Other Mixed", "White British", "White Irish", "Other White", "Chinese", "Any other ethnic group"), names(df_sus_new_cross_perc)), + my_cols <- setNames(c(codelist_name, "Indian", "Pakistani", "Bangladeshi", "Other Asian", "Caribbean", "African", "Other Black", "White and Black Caribbean", "White and Black African", "White and Asian", "Other Mixed", "White British", "White Irish", "Other White", "Chinese", "Any other ethnic group", "Unknown"), names(df_sus_new_cross_perc))) df_sus_new_cross_table <- df_sus_new_cross_perc %>% mutate(ethnicity = gsub("_", " ", ethnicity)) %>% gt(groupname_col = "") %>% tab_spanner(label = "Primary Care ethnicity", columns = c(1)) %>% - tab_spanner(label = "Secondary Care ethnicity", columns = c(2:7)) %>% + tab_spanner(label = "Secondary Care ethnicity", columns = c(2:ncol(df_sus_new_cross_perc))) %>% cols_label(!!!my_cols) %>% tab_style( style = list( @@ -1348,7 +1368,8 @@ for (codelist in c("new", "ctv3")) { data_row.padding = px(0) ) - assign(glue("df_sus_new_cross_table_{codelist}"), df_sus_new_cross_table) + assign(glue("df_sus_new_cross_table_{codelist}{known}"), df_sus_new_cross_table) + } } ``` @@ -1357,11 +1378,21 @@ df_sus_new_cross_table_ctv3 ```
+```{r} +df_sus_new_cross_table_ctv3_known +``` +
+ ```{r} df_sus_new_cross_table_new ```
+```{r} +df_sus_new_cross_table_new_known +``` + +
# Comparison with the 2021 UK census population @@ -1588,7 +1619,7 @@ ONS_tab_2001 %>% tab_options( data_row.padding = px(0) ) %>% - tab_options(., container.width = 3200) + tab_options(., container.width = 3300) ```