Skip to content

Commit

Permalink
Add Github Codespaces
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewscolm committed Jul 3, 2024
1 parent beb6858 commit dddc0bf
Show file tree
Hide file tree
Showing 7 changed files with 246 additions and 4 deletions.
Binary file added .RData
Binary file not shown.
2 changes: 1 addition & 1 deletion .Rproj.user/23EE8BC9/pcs/source-pane.pper
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
{
"activeTab": 3
"activeTab": 0
}
9 changes: 9 additions & 0 deletions .devcontainer/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Devcontainer Configuration

This folder contains configuration files required for Codespaces/devcontainers.
Please do not modify its contents unless you know what you are doing and there
is a specific change to the devcontainer configuration you need to make.

If you believe you have a broken configuration or are unsure if you have the
latest version of the configuration, please follow the
[instructions to add codespaces to your project](https://docs.opensafely.org/getting-started/how-to/add-github-codespaces-to-your-project/).
55 changes: 55 additions & 0 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
// README at: https://github.com/devcontainers/templates/tree/main/src/python
{
"name": "OpenSAFELY",
"image": "ghcr.io/opensafely-core/research-template:v0",
// Features to add to the dev container. More info: https://containers.dev/features.
"features": {
"ghcr.io/devcontainers/features/docker-in-docker:2": {}
},
"postCreateCommand": "/bin/bash /opt/devcontainer/postCreate.sh ${containerWorkspaceFolder}",
"postAttachCommand": "/bin/bash /opt/devcontainer/postAttach.sh",
"forwardPorts": [
8787
],
"portsAttributes": {
"8787": {
"label": "RStudio IDE"
}
},
// Configure tool-specific properties.
"customizations": {
"codespaces": {
"repositories": {
"opensafely/server-instructions": {
"permissions": {
"contents": "read"
}
}
}
},
"vscode": {
"extensions": [
"ms-python.python",
"ms-toolsai.jupyter",
"ms-toolsai.jupyter-renderers"
],
"settings": {
"extensions.ignoreRecommendations": true,
"files.autoSave": "afterDelay",
"files.autoSaveDelay": 1000,
"git.autofetch": true,
"python.analysis.extraPaths": [".devcontainer/ehrql-main/"],
"python.defaultInterpreterPath": "/opt/venv/bin/python",
"python.terminal.activateEnvInCurrentTerminal": true,
"python.terminal.activateEnvironment": true,
"window.autoDetectColorScheme": true
}
}
},
// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
// "remoteUser": "root"
"remoteEnv": {
"MAX_WORKERS": "2"
}
}
7 changes: 4 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@ model.log
*/input.csv
__pycache__
.python-version
output/*
/output/*
metadata/*
venv/
.Rproj.user
*/Rhistory
.DS_Store
.Rhistory
.Rproj.user/
.devcontainer/ehrql-main/
*/test.ipynb
notebooks_jupyter/local_report_ethnicity.ipynb
133 changes: 133 additions & 0 deletions ONS_check.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
# Author: Colm D Andrews
# Date: 14/07/2022
#
################################################################################
library(tidyverse)
library(scales)
library(readr)
library(glue)
library(arrow)
library(ggplot2)
library(ggalluvial)

### Point 1. Why is there weirdness with lower N in the grouped compared to overall England


ethnicity <-
read_csv(here::here("output","released","simple_patient_counts_categories_5_group_registered.csv"),col_types =(cols())) %>%
filter(group=="region") %>%
summarise(N=sum(Asian_ethnicity_new_5_filled))

# Adding all regions gives 1706020 compared to 1708430 total ethnicities for Asian New codelist



#######################################################

ons_na_removed <-
read_csv(here::here("output", "released", "made_locally", "ethnic_group_2021_registered_with_2001_categories.csv")) %>%
mutate(
cohort = fct_relevel(cohort, "ONS", "new", "supplemented"),
Ethnic_Group = fct_relevel(
Ethnic_Group,
"Asian", "Black", "Mixed", "White", "Other"
)
)

ons_england<-ons_na_removed %>%
filter(region!="England") %>%
group_by(Ethnic_Group,cohort) %>%
summarise(
region="England",
N_sumregion = sum(N),
Total_sumregion=sum(Total))

ons_na_removed_compare <-ons_na_removed %>%
select("Ethnic_Group","region","cohort","N","Total")%>%
inner_join(ons_england) %>%
mutate(N_diff = N- N_sumregion,
total_diff = Total - Total_sumregion,
perc_diff = N_diff / N *100)
### some weirdness with lower N in the grouped* See point 1 above



ons_england_remix <- ons_england %>%
summarise(
region,
Ethnic_Group,
Total = Total_sumregion,
N = N_sumregion,
group = 5,
cohort=cohort,
percentage = N/Total *100)

#### Is only White overrepresented in England?
ons_ethnicity_plot_na_diff <- ons_england_remix %>%
group_by(Ethnic_Group, region) %>%
arrange(cohort) %>%
mutate(diff = percentage - first(percentage)) %>%
select(region, Ethnic_Group, cohort, diff)


## create difference in percentage between ONS and TPP (for plotting)
ons_ethnicity_plot_na_diff <- ons_na_removed %>%
group_by(Ethnic_Group, region, group) %>%
arrange(cohort) %>%
mutate(diff = percentage - first(percentage)) %>%
filter(Ethnic_Group=="White",
cohort=="new"| cohort=="ONS") %>%
select(N, Total,cohort,region,percentage) %>%
pivot_wider(names_from = cohort,values_from = c(N,Total,percentage))

# step 2: check England isa above ONS
step2 <- ons_ethnicity_plot_na_diff %>% filter(region!="England") %>%
group_by(Ethnic_Group) %>%
summarise(perc_ONS = sum(N_ONS)/sum(Total_ONS),
perc_new = sum(N_new)/sum(Total_new))

## Why is it above
coverage<- ons_ethnicity_plot_na_diff %>%
filter(region!="England") %>%
mutate(
diff = percentage_new - percentage_ONS,
sign=case_when(
diff>0 ~"positive",
diff<0 ~"negative"),
coverage= Total_new / Total_ONS
)

totals<-coverage %>%
select(region,N_ONS,N_new,Total_ONS,Total_new,sign) %>%
group_by(sign) %>%
summarise(N_ONS = sum(N_ONS),
Total_ONS = sum(Total_ONS),
N_new= sum (N_new),
Total_new = sum(Total_new),
perc_ONS = N_ONS / Total_ONS*100,
perc_new = N_new / Total_new*100,
diff = perc_new - perc_ONS,
coverage= Total_new / Total_ONS)

# does this agree with step2?
totals %>%
select(N_ONS,N_new,Total_ONS,Total_new) %>%
mutate(group=1) %>%
group_by(group) %>%
summarise(N_ONS = sum(N_ONS),
Total_ONS = sum(Total_ONS),
N_new= sum (N_new),
Total_new = sum(Total_new),
perc_ONS = N_ONS / Total_ONS*100,
perc_new = N_new / Total_new*100)

step2

### Does ONS discrepancy widen as -> 100%
ons_discrepancy<- ons_ethnicity_plot_na_diff %>%
mutate(diff=percentage_new - percentage_ONS)
View(ons_discrepancy)




44 changes: 44 additions & 0 deletions analysis/local/local_manuscript_plots.R
Original file line number Diff line number Diff line change
Expand Up @@ -1101,3 +1101,47 @@ ggsave(
)

View(df_sus_new_cross_perc_1)

#### practice distribution

pratice_dist <-
read_csv(here::here("output", "released","practice_completeness.csv")) %>%
mutate(name = "")


pratice_dist_plot <- ggplot(pratice_dist, aes(x = name,
ymin = p05,
ymax = p95,
lower = Q1,
middle = median,
upper = Q3,
fill ="#B24745FF"
)) +
geom_boxplot(stat = "identity") +
coord_flip() +
theme_minimal() +
labs(y="Completeness of ethnicity recording (%)",x="") +
theme(legend.position = "none") +
theme(
panel.background = element_rect(fill = "white"))

ggsave(
filename = here::here(
"output",
"released",
"made_locally",
"practice_distribution.png"
),
pratice_dist_plot,
dpi = 600,
width = 12,
height = 4,
units = "cm"
)

realtive_difference <-
read_csv(here::here("output", "released", "made_locally", "ethnic_group_2021_registered_with_2001_categories.csv")) %>%
filter(cohort!="supplemented") %>%
select(region,Ethnic_Group,cohort,percentage) %>%
pivot_wider(names_from = cohort,values_from = percentage) %>%
mutate(rd=(new/ONS *100)-100)

0 comments on commit dddc0bf

Please sign in to comment.