Skip to content

Commit

Permalink
Merge pull request #82 from opensafely/adding-u16-figure-generation
Browse files Browse the repository at this point in the history
Adding generation of figures for kids vaccines
  • Loading branch information
LisaHopcroft authored Jan 4, 2023
2 parents 0d3069b + 453bbd9 commit cbd2665
Show file tree
Hide file tree
Showing 2 changed files with 284 additions and 0 deletions.
275 changes: 275 additions & 0 deletions analysis/generate_u16_publication_objects.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,275 @@
library(ggplot2)
library(dplyr)
library(here)
library(readr)
library(tidyr)
library(dplyr)
library(data.table)
library(cowplot)
library(RColorBrewer)
library(purrr)
library(stringr)
library(glue)
library(colorspace)

### Find the data files
data_location = here::here("interim-outputs", "u16", "tables")
files_to_read = dir(data_location, pattern = "^Cumulative vaccination figures *", full.names = TRUE)

###  Combine the data files
concatenate_data_files = function(x, columns = c("src", "category", "group", "vaccinated", "total", "percent")) {
src_string = x %>%
basename() %>%
str_replace(".* among ", "") %>%
str_replace(" population.csv", "")
tmp = read_csv(x) %>% mutate(src = src_string)
return(tmp[columns])
}

combined_data = files_to_read %>%
map(concatenate_data_files) %>%
rbindlist() %>%
as.data.frame()

### Write out the table
output_table = combined_data %>%
tidyr::pivot_wider(
names_from = "src",
values_from = c("vaccinated", "total", "percent"),
id_cols = c("category", "group")
) %>%
mutate(category = category %>%
str_replace("_6_groups", " (broad categories)") %>%
str_replace("_16_groups", " (detailed categories)") %>%
str_replace_all("_", " ") %>%
str_to_sentence() %>%
str_replace("Imd categories", "IMD"))

output_table = output_table[c(
"category", "group",
"vaccinated_12-15", "total_12-15", "percent_12-15",
"vaccinated_5-11", "total_5-11", "percent_5-11"
)]

new_column_names = output_table %>%
colnames() %>%
str_replace("_(\\d+-\\d+)", " \\(\\1\\)") %>%
str_replace("vaccinated", "Vaccinated (n)") %>%
str_replace("total", "Total eligible (n)") %>%
str_replace("percent", "Vaccinated (%)") %>%
str_replace( "category", "Category" ) %>%
str_replace( "group", "Group" )

new_column_names = new_column_names[
c(
new_column_names %>% str_detect("Category") %>% which(),
new_column_names %>% str_detect("Group") %>% which(),
new_column_names %>% str_detect("12-15") %>% which(),
new_column_names %>% str_detect("5-11") %>% which()
)
]

colnames(output_table) = new_column_names

output_table %>% write_csv(here::here("interim-outputs", "u16", "tables", "u16_publication_table.csv"))

### Draw a figure

fig_data = combined_data %>%
filter(category != "Brand of first dose") %>%
select(src, category, group, percent) %>%
rename(Cohort = src) %>%
mutate(percent_rounded = sprintf("%.1f", percent))


palette_name = "Paired"

fill_colours = list(
"12-15" = brewer.pal(9,"YlOrRd")[5] %>% lighten,
"5-11" = brewer.pal(9, "YlGnBu")[5] %>% lighten
# "12-15" = brewer.pal(3,"Paired")[2] %>% lighten,
# "5-11" = brewer.pal(3,"Paired")[1] %>% lighten
)



# ### COHORTS side by side

demographic_plots <- lapply(split(fig_data, fig_data$category), function(x) {
ggplot(x, aes(x = group, y = percent, fill = Cohort)) +
geom_col(position = "dodge") +
ylim(0, 100) +
scale_fill_manual(values=fill_colours) +
theme_minimal() +
theme( axis.text.x = element_text(size=8) )+
ylab(NULL) +
xlab(NULL)
})

legend <- get_legend(
# create some space to the left of the legend
demographic_plots$overall +
guides(color = guide_legend(nrow = 1)) +
theme(legend.position = "bottom")
)

### COHORTS combined

max_y = combined_data %>% pull(percent) %>% max %>% round( digits = -1 )


fig_data_focus = fig_data %>% filter( category != "overall" )
overall_values = fig_data %>% filter( category == "overall" )


fig_data_focus = fig_data_focus %>%
mutate(group = group %>%
str_replace( "F$", "F\n ") %>%
str_replace( "M$", "M\n ") %>%
str_replace("risk group", "\n risk group") %>%
str_replace("South Asian", "South\n Asian" ) %>%
str_replace("Most deprived", "\nMost deprived") %>%
str_replace("Least deprived", "\nLeast deprived"))



### Version 1: without percentage labels

overall_colour <- grey.colors(7)[3]
these_plot_margins <- unit(c(25, 5, 5, 5), "points")
top_plot_margins = unit(c(25, 5, 2, 5), "points")


demographic_plots_combined <- lapply(split(fig_data_focus, fig_data_focus$category), function(x) {
this_plot = ggplot(x, aes(x = group, y = percent, fill = Cohort)) +
# "Overall" annotation - line
geom_hline(data = overall_values, aes(yintercept = percent), colour = overall_colour, linetype = "dashed") +
# Data
geom_col() +
facet_wrap(~Cohort, scales = "free_y") +
scale_fill_manual(values = fill_colours) +
# "Overall" annotation - label
geom_text(data = overall_values, aes(x = 0, y = percent, label = percent_rounded), size = 3, hjust = 0, vjust = -0.5, colour = overall_colour) +
# Theme/visuals
theme_minimal() +
theme(
axis.text.x = element_text(size = 8),
axis.title.y = element_text(face="bold"),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
strip.text = element_blank(),
plot.margin = these_plot_margins
) +
ylab("Percent") +
xlab(NULL) +
### Stop text labels falling off the top
coord_cartesian(clip = "off")

})

demographic_plots_combined_annotated = demographic_plots_combined


top_row_combined_annotated <- plot_grid(
demographic_plots_combined_annotated$sex + theme(legend.position = "none",
plot.margin = top_plot_margins),
demographic_plots_combined_annotated$risk_status + theme(legend.position = "none",
plot.margin = top_plot_margins),
ncol = 2,
rel_widths = c(1, 1),
labels = c("Sex", "Risk status"),
label_size = 14,
label_x = 0.05
)

demographics_matrix_combined_annotated <- plot_grid(top_row_combined_annotated,
demographic_plots_combined_annotated$imd_categories + theme(legend.position = "none"),
demographic_plots_combined_annotated$ethnicity_6_groups + theme(legend.position = "none"),
demographic_plots_combined_annotated$ethnicity_16_groups + theme(legend.position = "none", axis.text.x = element_text(angle = 90, hjust = 1)),
legend,
rel_heights = c(2, 2, 2, 4, 0.2),
ncol = 1,
labels = c("", "Index of multiple deprivation", "Ethnicity (broad categories)", "Ethnicity (detailed categories)", ""),
label_size = 14,
label_x = 0.05,
hjust = 0
)

save_plot(here::here("interim-outputs", "u16", "figures", "u16_publication_figure.png"),
# save_plot("tmp.png",
demographics_matrix_combined_annotated,
base_width = 9, base_height = 10
)



### Version 2: Add percentage labels

fig_data_focus = fig_data_focus %>%
mutate(percent_rounded = sprintf("%.1f", percent))


demographic_plots_combined <- lapply(split(fig_data_focus, fig_data_focus$category), function(x) {
this_plot <- ggplot(x, aes(x = group, y = percent, fill = Cohort)) +
# "Overall" annotation - line
geom_hline(data = overall_values, aes(yintercept = percent), colour = overall_colour, linetype = "dashed") +
# Data
geom_col() +
facet_wrap(~Cohort, scales = "free_y") +
scale_fill_manual(values = fill_colours) +
# "Overall" annotation - label
geom_text(data = overall_values, aes(x = 0, y = percent, label = percent_rounded), size = 3, hjust = 0, vjust = -0.5, colour = overall_colour) +
geom_text( aes(label=percent_rounded), y=0, angle=90, size=3, hjust=-0.4 ) +
# Theme/visuals
theme_minimal() +
theme(
axis.text.x = element_text(size = 8),
axis.title.y = element_text(face = "bold"),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
strip.text = element_blank(),
plot.margin = these_plot_margins
) +
ylab("Percent") +
xlab(NULL) +
### Stop text labels falling off the top
coord_cartesian(clip = "off")
})

demographic_plots_combined_annotated = demographic_plots_combined


top_row_combined_annotated <- plot_grid(
demographic_plots_combined_annotated$sex + theme(
legend.position = "none",
plot.margin = top_plot_margins
),
demographic_plots_combined_annotated$risk_status + theme(
legend.position = "none",
plot.margin = top_plot_margins
),
ncol = 2,
rel_widths = c(1, 1),
labels = c("Sex", "Risk status"),
label_size = 14,
label_x = 0.05
)

demographics_matrix_combined_annotated <- plot_grid(top_row_combined_annotated,
demographic_plots_combined_annotated$imd_categories + theme(legend.position = "none"),
demographic_plots_combined_annotated$ethnicity_6_groups + theme(legend.position = "none"),
demographic_plots_combined_annotated$ethnicity_16_groups + theme(legend.position = "none", axis.text.x = element_text(angle = 90, hjust = 1)),
legend,
rel_heights = c(2, 2, 2, 4, 0.2),
ncol = 1,
labels = c("", "Index of multiple deprivation", "Ethnicity (broad categories)", "Ethnicity (detailed categories)", ""),
label_size = 14,
label_x = 0.05,
hjust = 0
)

save_plot(here::here("interim-outputs", "u16", "figures", "u16_publication_figure_labels.png"),
# save_plot("tmp.png",
demographics_matrix_combined_annotated,
base_width = 9, base_height = 10
)
9 changes: 9 additions & 0 deletions project.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -85,3 +85,12 @@ actions:
moderately_sensitive:
notebook: output/second_doses_u16.html
tables: output/machine_readable_outputs/second_doses/*_u16*.csv

generate_u16_publication_objects:
run: r:latest analysis/generate_u16_publication_objects.R
needs: [generate_u16_notebook]
outputs:
moderately_sensitive:
table: interim-outputs/u16/tables/u16_publication_table.csv
figure_option1: interim-outputs/u16/figures/u16_publication_figure.png
figure_option2: interim-outputs/u16/figures/u16_publication_figure_labels.png

0 comments on commit cbd2665

Please sign in to comment.