generated from opensafely/research-template
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcohortcovid.R
82 lines (51 loc) · 2.51 KB
/
cohortcovid.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# Load packages ----
library(magrittr)
# Specify redaction threshold ----
threshold <- 6
# Source common functions ----
source("analysis/utility.R")
# Derive persondays ----
for (cohort in c("prevax","vax","unvax")) {
# Load data ----
input <- dplyr::as_tibble(readr::read_rds(paste0("output/input_",cohort, ifelse(cohort=="vax","","_extf"),"_stage1.rds")))
# Restrict to required variables for dataset preparation ----
input <- input[,unique(c("patient_id",
"index_date",
"exp_date_covid19_confirmed",
"end_date_exposure"))]
# Remove exposures outside of follow-up time ----
input <- dplyr::rename(input,
"exp_date" = "exp_date_covid19_confirmed")
input <- input %>%
dplyr::mutate(exp_date = replace(exp_date, which(exp_date>end_date_exposure | exp_date<index_date), NA))
# Derive covid19 and person_persondays variables ----
input <- input %>%
dplyr::mutate(persondays = as.numeric((exp_date - index_date))+1,
covid19 = !is.na(exp_date))
# Restrict variables ----
input <- input[,c("patient_id","covid19","persondays")]
colnames(input) <- c("patient_id",paste0(cohort,"_",c("covid19","persondays")))
# Name data ----
assign(cohort, input)
}
# Merge dataframe ----
df <- prevax
df <- merge(df, vax, by = "patient_id", all.x = TRUE)
df <- merge(df, unvax, by = "patient_id", all.x = TRUE)
# Aggregate infections ----
df <- df %>%
dplyr::mutate(prevax_covid19_sum = sum(prevax_covid19, na.rm = TRUE),
prevax_persondays_sum = sum(prevax_persondays, na.rm = TRUE),
vax_covid19_sum = sum(vax_covid19, na.rm = TRUE),
vax_persondays_sum = sum(vax_persondays, na.rm = TRUE),
unvax_covid19_sum = sum(unvax_covid19, na.rm = TRUE),
unvax_persondays_sum = sum(unvax_persondays, na.rm = TRUE))
df <- unique(df[,colnames(df)[grepl("_sum",colnames(df))]])
colnames(df) <- gsub("_sum","",colnames(df))
df <- tidyr::pivot_longer(df, cols = colnames(df), names_sep = "_", names_to = c("cohort","value1"))
df <- tidyr::pivot_wider(df, names_from = "value1")
# Perform redaction ----
df$covid19_midpoint6 <- roundmid_any(as.numeric(df$covid19), to=threshold)
# Save output ----
write.csv(df[,c("cohort","covid19","persondays")], "output/cohortcovid.csv", row.names = FALSE)
write.csv(df[,c("cohort","covid19_midpoint6","persondays")], "output/cohortcovid_midpoint6.csv", row.names = FALSE)