Skip to content

Commit

Permalink
Merge pull request #39 from opensafely/update-oct-5
Browse files Browse the repository at this point in the history
Update oct 12
  • Loading branch information
HelenCEBM authored Oct 12, 2021
2 parents 1ae4254 + 4ab2b9c commit 588c2d5
Show file tree
Hide file tree
Showing 65 changed files with 36,281 additions and 35,766 deletions.
33 changes: 27 additions & 6 deletions analysis/study_definition_delivery.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from codelists import *
from study_definition_delivery_common import common_variables, campaign_start, index_date

# Specifiy study defeinition
# Specify study definition

study = StudyDefinition(
index_date = index_date,
Expand Down Expand Up @@ -221,7 +221,7 @@
"earliest": "2020-12-08", # first vaccine administered on the 8/12
"latest": index_date,
},
"incidence":0.5
"incidence":0.9
},
),
# SECOND DOSE COVID VACCINATION
Expand All @@ -236,9 +236,30 @@
"earliest": "2020-12-29", # first reported second dose administered on the 29/12
"latest": index_date,
},
"incidence": 0.3
"incidence": 0.8
},
),

# BOOSTER (3rd) DOSE COVID VACCINATION
## Booster dose scehdule is 6 months from 2nd dose. However, for now, we will use an 8 week interval,
## to ensure that anyone having a third dose within the primary course (immunosuppressed, from 1st Sept)
## are not shown as due/missing a booster dose.
## however those with third doses will also eventually become eligible for booster so this may need to be revisited
covid_vacc_third_dose_date=patients.with_tpp_vaccination_record(
target_disease_matches="SARS-2 CORONAVIRUS",
on_or_after="covid_vacc_second_dose_date + 56 days",
find_first_match_in_period=True,
returning="date",
date_format="YYYY-MM-DD",
return_expectations={
"date": {
"earliest": "2021-09-24", # first booster dose recorded
"latest": index_date,
},
"incidence": 0.1
},
),

# COVID VACCINATION - Pfizer BioNTech
covid_vacc_pfizer_date=patients.with_tpp_vaccination_record(
product_name_matches="COVID-19 mRNA Vaccine Comirnaty 30micrograms/0.3ml dose conc for susp for inj MDV (Pfizer)",
Expand All @@ -250,7 +271,7 @@
"date": {
"earliest": "2020-12-08", # first vaccine administered on the 8/12
"latest": index_date,},
"incidence": 0.3
"incidence": 0.7
},
),
# COVID VACCINATION - Oxford AZ
Expand All @@ -265,7 +286,7 @@
"earliest": "2020-01-04", # first vaccine administered on the 4/1
"latest": index_date,
},
"incidence": 0.2
"incidence": 0.7
},
),

Expand All @@ -281,7 +302,7 @@
"earliest": "2020-04-01", # expected from early april
"latest": index_date,
},
"incidence": 0.1
"incidence": 0.4
},
),

Expand Down
84 changes: 40 additions & 44 deletions lib/data_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,56 +60,52 @@ def load_data(input_file='input_delivery.csv.gz', input_path="output"):
covid_vacc_flag_pfz = np.where(df["covid_vacc_pfizer_date"]!=0, 1, 0),
covid_vacc_flag_mod = np.where(df["covid_vacc_moderna_date"]!=0, 1, 0),
covid_vacc_2nd = np.where(df["covid_vacc_second_dose_date"]!=0, 1, 0),
covid_vacc_3rd = np.where(df["covid_vacc_third_dose_date"]!=0, 1, 0),
covid_vacc_bin = np.where(df["covid_vacc_date"]!=0, 1, 0))

# Create a single field for brand of first dose

# Create a single field for brand of first and second dose
# This excludes any uncertain cases where date of brand was too early or multiple brands were recorded
conditions = [( # pt has had an oxford vaccine, on or after the date this brand was first administered
# in UK (minus 1 day; if date is unfeasible, vaccine type may be incorrect):
df["covid_vacc_oxford_date"].astype(str)>="2020-01-03") & (
# oxford vaccine was on date of first dose:
df["covid_vacc_oxford_date"]==df["covid_vacc_date"]) & (
# oxford vaccine was not on same date as another brand:
df["covid_vacc_oxford_date"]!=df["covid_vacc_pfizer_date"]) & (
df["covid_vacc_oxford_date"]!=df["covid_vacc_moderna_date"]),
## repeat for pfizer and moderna:
(df["covid_vacc_pfizer_date"].astype(str)>="2020-12-07") & (
df["covid_vacc_pfizer_date"]==df["covid_vacc_date"]) & (
df["covid_vacc_pfizer_date"]!=df["covid_vacc_oxford_date"]) & (
df["covid_vacc_pfizer_date"]!=df["covid_vacc_moderna_date"]),
# moderna - only include if first dose date is after the date first administered in UK
(df["covid_vacc_moderna_date"].astype(str)>="2021-04-06") & (
df["covid_vacc_moderna_date"]==df["covid_vacc_date"]) & (
df["covid_vacc_moderna_date"]!=df["covid_vacc_oxford_date"]) & (
df["covid_vacc_moderna_date"]!=df["covid_vacc_pfizer_date"]),
## unknown type - pt has had first dose but the above conditions do not apply
# these may be unspecified brands or where two diff brands were recorded same day
df["covid_vacc_date"]!=0
]
choices = ["Oxford-AZ", "Pfizer", "Moderna", "Unknown"]

df['brand_of_first_dose'] = np.select(conditions, choices, default="none")

doses = {"first":"covid_vacc_date", "second":"covid_vacc_second_dose_date"}
for dose, field_name in doses.items():
conditions = [( # pt has had an oxford vaccine, on or after the date this brand was first administered
# in UK (minus 1 day; if date is unfeasible, vaccine type may be incorrect):
df["covid_vacc_oxford_date"].astype(str)>="2020-01-03") & (
# oxford vaccine was on date of selected dose:
df["covid_vacc_oxford_date"]==df[field_name]) & (
# oxford vaccine was not on same date as another brand:
df["covid_vacc_oxford_date"]!=df["covid_vacc_pfizer_date"]) & (
df["covid_vacc_oxford_date"]!=df["covid_vacc_moderna_date"]),
## repeat for pfizer and moderna:
(df["covid_vacc_pfizer_date"].astype(str)>="2020-12-07") & (
df["covid_vacc_pfizer_date"]==df[field_name]) & (
df["covid_vacc_pfizer_date"]!=df["covid_vacc_oxford_date"]) & (
df["covid_vacc_pfizer_date"]!=df["covid_vacc_moderna_date"]),
# moderna - only include if dose date is after the date first administered in UK
(df["covid_vacc_moderna_date"].astype(str)>="2021-04-06") & (
df["covid_vacc_moderna_date"]==df[field_name]) & (
df["covid_vacc_moderna_date"]!=df["covid_vacc_oxford_date"]) & (
df["covid_vacc_moderna_date"]!=df["covid_vacc_pfizer_date"]),
## unknown type - pt has had the dose but the above conditions do not apply
# these may be unspecified brands or where two diff brands were recorded same day
df[field_name]!=0
]

df[f'brand_of_{dose}_dose'] = np.select(conditions, choices, default="none")

# Mixed doses:
# flag patients with a second dose, where the two specified brands have been recorded on different dates
# (sometimes more than one brand is recorded on the same date)
# Also use the brand-of-first-dose field above to ensure the first dose was clear
# flag patients with different brands for the first and second dose
df = df.assign(
covid_vacc_ox_pfz = np.where((df["covid_vacc_2nd"]==1) & (
df['brand_of_first_dose'].isin(["Oxford-AZ", "Pfizer"])) & (
df["covid_vacc_flag_pfz"]==1) & (
df["covid_vacc_flag_ox"]==1) & (
df["covid_vacc_pfizer_date"] != df["covid_vacc_oxford_date"]), 1, 0),
covid_vacc_ox_mod = np.where((df["covid_vacc_2nd"]==1) & (
df['brand_of_first_dose'].isin(["Oxford-AZ", "Moderna"])) & (
df["covid_vacc_flag_ox"]==1) & (
df["covid_vacc_flag_mod"]==1) & (
df["covid_vacc_oxford_date"] != df["covid_vacc_moderna_date"]), 1, 0),
covid_vacc_mod_pfz = np.where((df["covid_vacc_2nd"]==1) & (
df['brand_of_first_dose'].isin(["Moderna", "Pfizer"])) & (
df["covid_vacc_flag_mod"]==1) & (
df["covid_vacc_flag_pfz"]==1) & (
df["covid_vacc_moderna_date"] != df["covid_vacc_pfizer_date"]), 1, 0),
covid_vacc_ox_pfz = np.where(
((df['brand_of_first_dose']=="Oxford-AZ") & (df['brand_of_second_dose']=="Pfizer")) | (
(df['brand_of_first_dose']=="Pfizer") & (df['brand_of_second_dose']=="Oxford-AZ")), 1, 0),
covid_vacc_ox_mod = np.where(
((df['brand_of_first_dose']=="Oxford-AZ") & (df['brand_of_second_dose']=="Moderna")) | (
(df['brand_of_first_dose']=="Moderna") & (df['brand_of_second_dose']=="Oxford-AZ")), 1, 0),
covid_vacc_mod_pfz = np.where(
((df['brand_of_first_dose']=="Moderna") & (df['brand_of_second_dose']=="Pfizer")) | (
(df['brand_of_first_dose']=="Pfizer") & (df["brand_of_second_dose"]=="Moderna")), 1, 0),
)

# declined - suppress if vaccine has been received
Expand Down
44 changes: 37 additions & 7 deletions notebooks/diffable_python/opensafely_vaccine_report_overall.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,13 @@

# ####
# ## Contents:
# - **<a href=#summary>Overview</a>**
# - **<a href=#summary>Overview</a>** (NEW: now includes third/booster dose counts)
# <br>
# <br>
# - **<a href=#summarychart>Summary Charts</a>**
# - **Trends in vaccination coverage** according to demographic/clinical features, for:
# <br>
# <br>
# - **Charts:** Trends in first dose vaccination coverage according to demographic/clinical features, for:
# - <a href=#charts80>80+ population</a>
# - <a href=#charts70>70-79 population</a>
# - <a href=#charts_shield>shielding (aged 16-69) population</a>
Expand All @@ -59,9 +63,19 @@
# - <a href=#charts40>40-49 population</a>
# <br>
# <br>
# - **<a href=#tables>Current vaccination coverage of each eligible population group, according to demographic/clinical features</a>**
# - Includes each of the groups above, plus <a href=#Cumulative-vaccination-figures-among-care-home-population>care home (65+)</a> and <a href=#Cumulative-vaccination-figures-among-Learning-Disabilities-(aged-16-64)-population>LD (aged 16-64)</a> populations.
# - **NEW** - tables now include <a href=#Cumulative-vaccination-figures-among-16-17-population>16-17</a> population
# - **Tables:** Current first dose vaccination coverage according to demographic/clinical features, for:
# - <a href=#Cumulative-vaccination-figures-among-80+-population>80+</a> population
# - <a href=#Cumulative-vaccination-figures-among-70-79-population>70-79</a> population
# - <a href=#Cumulative-vaccination-figures-among-care-home-population>care home (65+)</a> population
# - <a href=#Cumulative-vaccination-figures-among-shielding-(aged-16-69)-population>shielding (aged 16-69)</a> population
# - <a href=#Cumulative-vaccination-figures-among-65-69-population>65-69</a> population
# - <a href=#Cumulative-vaccination-figures-among-Learning-Disabilities-(aged-16-64)-population>LD (aged 16-64)</a> populations.
# - <a href=#Cumulative-vaccination-figures-among-60-64-population>60-64</a> population
# - <a href=#Cumulative-vaccination-figures-among-55-59-population>55-59</a> population
# - <a href=#Cumulative-vaccination-figures-among-50-54-population>50-54</a> population
# - <a href=#Cumulative-vaccination-figures-among-40-49-population>40-49</a> population
# - <a href=#Cumulative-vaccination-figures-among-18-39-population>18-39</a> population
# - <a href=#Cumulative-vaccination-figures-among-16-17-population>16-17</a> population
# <br>
# <br>
# - Appendix: <a href=#ethnicity>Proportion of each population group for whom ethnicity is known</a>
Expand All @@ -75,15 +89,18 @@
import json
summary_stats_1 = pd.read_csv(os.path.join("..", "interim-outputs","text", "summary_stats_first_dose.txt")).set_index("Unnamed: 0")
summary_stats_2 = pd.read_csv(os.path.join("..", "interim-outputs","text", "summary_stats_second_dose.txt")).set_index("Unnamed: 0")
summary_stats_3 = pd.read_csv(os.path.join("..", "interim-outputs","text", "summary_stats_third_dose.txt")).set_index("Unnamed: 0")
additional_stats = pd.read_csv(os.path.join("..", "interim-outputs","text", "additional_stats_first_dose.txt")).set_index("Unnamed: 0")

out = summary_stats_1.join(summary_stats_2)
out = out.join(summary_stats_3)
out.index = out.index.rename("Group")
display(out)


display(Markdown(f"##### \n"
"**NB** Patient counts are rounded to nearest 7\n"
"\nSecond doses are at least 19 days after the first; third doses at least 8 weeks after the second\n"
"##### \n" ))

display(Markdown("### Group definitions \n - The **care home** group is defined based on patients (aged 65+) having one of [these codes](https://codelists.opensafely.org/codelist/primis-covid19-vacc-uptake/longres/v1/).\n"
Expand All @@ -109,9 +126,9 @@
for x in additional_stats.index[3:7]:
display(Markdown(f"{x}: {additional_stats.loc[x][0]}\n"))

display(Markdown("<br>**Note:** mixed doses counts patients with two doses at least 19 days apart, \
display(Markdown("<br>**Note:** mixed doses counts patients with first and second doses at least 19 days apart, \
excluding patients with two different brands recorded on the same day \
or where the first dose was recorded on a date prior to when the given brand was available in the UK"))
or recorded on a date prior to when the given brand was available in the UK"))

for x in additional_stats.index[7:]:
display(Markdown(f"{x}: {additional_stats.loc[x][0]}\n"))
Expand Down Expand Up @@ -242,6 +259,19 @@

# #
# ## Vaccination rates of each eligible population group, according to demographic/clinical features <a name='tables' />
# - <a href=#Cumulative-vaccination-figures-among-80+-population>80+</a> population
# - <a href=#Cumulative-vaccination-figures-among-70-79-population>70-79</a> population
# - <a href=#Cumulative-vaccination-figures-among-care-home-population>care home (65+)</a> population
# - <a href=#Cumulative-vaccination-figures-among-shielding-(aged-16-69)-population>shielding (aged 16-69)</a> population
# - <a href=#Cumulative-vaccination-figures-among-65-69-population>65-69</a> population
# - <a href=#Cumulative-vaccination-figures-among-Learning-Disabilities-(aged-16-64)-population>LD (aged 16-64)</a> populations.
# - <a href=#Cumulative-vaccination-figures-among-60-64-population>60-64</a> population
# - <a href=#Cumulative-vaccination-figures-among-55-59-population>55-59</a> population
# - <a href=#Cumulative-vaccination-figures-among-50-54-population>50-54</a> population
# - <a href=#Cumulative-vaccination-figures-among-40-49-population>40-49</a> population
# - <a href=#Cumulative-vaccination-figures-among-18-39-population>18-39</a> population
# - <a href=#Cumulative-vaccination-figures-among-16-17-population>16-17</a> population
# <br>

# +
tablelist = find_and_sort_filenames("tables", by_demographics_or_population="population",
Expand Down
19 changes: 15 additions & 4 deletions notebooks/diffable_python/population_characteristics.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,13 +127,16 @@
df_dict_cum = cumulative_sums(df, groups_of_interest=population_subgroups, features_dict=features_dict, latest_date=latest_date)

# +
# for details on second doses, no need for breakdowns of any groups (only "overall" figures will be included)
# for details on second/third doses, no need for breakdowns of any groups (only "overall" figures will be included)
second_dose_features = {}
for g in groups:
second_dose_features[g] = []

df_dict_cum_second_dose = cumulative_sums(df, groups_of_interest=population_subgroups, features_dict=second_dose_features,
latest_date=latest_date, reference_column_name="covid_vacc_second_dose_date")

df_dict_cum_third_dose = cumulative_sums(df, groups_of_interest=population_subgroups, features_dict=second_dose_features,
latest_date=latest_date, reference_column_name="covid_vacc_third_dose_date")
# -

# ### Cumulative vaccination figures - overall
Expand All @@ -150,21 +153,31 @@

summarised_data_dict = summarise_data_by_group(df_dict_cum, latest_date=latest_date, groups=groups)

# +
summarised_data_dict_2nd_dose = summarise_data_by_group(df_dict_cum_second_dose, latest_date=latest_date, groups=groups)

summarised_data_dict_3rd_dose = summarise_data_by_group(df_dict_cum_third_dose, latest_date=latest_date, groups=groups)
# -

# ### Proportion of each eligible population vaccinated to date

from report_results import create_summary_stats, create_detailed_summary_uptake

summ_stat_results, additional_stats = create_summary_stats(df, summarised_data_dict, formatted_latest_date, groups=groups,
savepath=savepath, suffix=suffix)

# +
summ_stat_results_2nd_dose, _ = create_summary_stats(df, summarised_data_dict_2nd_dose, formatted_latest_date,
groups=groups, savepath=savepath,
vaccine_type="second_dose", suffix=suffix)

summ_stat_results_3rd_dose, _ = create_summary_stats(df, summarised_data_dict_3rd_dose, formatted_latest_date,
groups=groups, savepath=savepath,
vaccine_type="third_dose", suffix=suffix)
# -

# display the results of the summary stats on first and second doses
display(pd.DataFrame(summ_stat_results).join(pd.DataFrame(summ_stat_results_2nd_dose)))
display(pd.DataFrame(summ_stat_results).join(pd.DataFrame(summ_stat_results_2nd_dose)).join(pd.DataFrame(summ_stat_results_3rd_dose)))
display(Markdown(f"*\n figures rounded to nearest 7"))

# +
Expand Down Expand Up @@ -242,11 +255,9 @@

second_dose_summarised_data_dict = summarise_data_by_group(df_dict_cum_second_dose, latest_date=latest_date, groups=groups)

# + collapsed=true jupyter={"outputs_hidden": true}
create_detailed_summary_uptake(second_dose_summarised_data_dict, formatted_latest_date,
groups=groups,
savepath=savepath, vaccine_type="second_dose")
# -

# ## For comparison look at first doses UP TO 14 WEEKS AGO
#
Expand Down
Loading

0 comments on commit 588c2d5

Please sign in to comment.