Skip to content

Commit

Permalink
Merge pull request #41 from opensafely/thrid-doses-and-housebound
Browse files Browse the repository at this point in the history
Thrid doses and housebound
  • Loading branch information
HelenCEBM authored Nov 16, 2021
2 parents 61cfafd + fb2b115 commit b8111f5
Show file tree
Hide file tree
Showing 85 changed files with 60,957 additions and 40,446 deletions.
21 changes: 14 additions & 7 deletions analysis/codelists.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,14 @@
codelist_from_csv,
)

care_home_snomed_codes = codelist(
['16073400','394923006', '160737007', '248171000000108', '1024771000000108', '224224003'], system="snomed")
care_home_snomed_codes = codelist_from_csv(
"codelists/primis-covid19-vacc-uptake-longres.csv", system="snomed", column="code")

high_risk_codes = codelist(
['1300561000000107'], system="snomed")

not_high_risk_codes = codelist(
['1300591000000101', '1300571000000100'], system="snomed")
high_risk_codes = codelist_from_csv(
"codelists/primis-covid19-vacc-uptake-shield.csv", system="snomed", column="code")

not_high_risk_codes = codelist_from_csv(
"codelists/primis-covid19-vacc-uptake-nonshield.csv", system="snomed", column="code")

adrenaline_pen = codelist_from_csv(
"codelists/opensafely-adrenaline-pens.csv", system="snomed", column="dmd_id"
Expand Down Expand Up @@ -170,3 +169,11 @@
covid_vacc_declined = codelist_from_csv(
"codelists/primis-covid19-vacc-uptake-cov1decl.csv", system="snomed", column="code"
)

housebound_codes = codelist_from_csv(
"codelists/opensafely-housebound.csv", system="snomed", column="code"
)

no_longer_housebound_codes = codelist_from_csv(
"codelists/opensafely-no-longer-housebound.csv", system="snomed", column="code"
)
2 changes: 1 addition & 1 deletion analysis/study_definition_delivery.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,7 @@

# COVID VACCINATION - Moderna
covid_vacc_moderna_date=patients.with_tpp_vaccination_record(
product_name_matches="COVID-19 mRNA (nucleoside modified) Vaccine Moderna 0.1mg/0.5mL dose dispersion for inj MDV",
product_name_matches="COVID-19 mRNA Vaccine Spikevax (nucleoside modified) 0.1mg/0.5mL dose disp for inj MDV (Moderna)",
on_or_after="2020-12-01", # check all december to date
find_first_match_in_period=True,
returning="date",
Expand Down
27 changes: 27 additions & 0 deletions analysis/study_definition_delivery_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,33 @@
return_expectations={"incidence": 0.01,},
),
),

housebound = patients.satisfying(
"""housebound_date
AND NOT no_longer_housebound
AND NOT moved_into_care_home""",
return_expectations={
"incidence": 0.01,
},

housebound_date=patients.with_these_clinical_events(
housebound_codes,
on_or_before=index_date,
find_last_match_in_period = True,
returning="date",
date_format="YYYY-MM-DD",
),
no_longer_housebound=patients.with_these_clinical_events(
no_longer_housebound_codes,
on_or_after="housebound_date",
),
moved_into_care_home=patients.with_these_clinical_events(
care_home_snomed_codes,
on_or_after="housebound_date",
),

),


LD = patients.with_these_clinical_events(
wider_ld_codes,
Expand Down
34 changes: 32 additions & 2 deletions codelists/codelists.json
Original file line number Diff line number Diff line change
Expand Up @@ -177,14 +177,44 @@
"primis-covid19-vacc-uptake-learndis.csv": {
"id": "primis-covid19-vacc-uptake/learndis/v1",
"url": "https://codelists.opensafely.org/codelist/primis-covid19-vacc-uptake/learndis/v1/",
"downloaded_at": "2021-03-09 11:58:49.613669Z",
"downloaded_at": "2021-03-05 18:16:49.633938Z",
"sha": "41f81fcb9fadb082a77cadd6cf60614d7c8d860f"
},
"primis-covid19-vacc-uptake-cov1decl.csv": {
"id": "primis-covid19-vacc-uptake/cov1decl/v1.1",
"url": "https://codelists.opensafely.org/codelist/primis-covid19-vacc-uptake/cov1decl/v1.1/",
"downloaded_at": "2021-04-19 13:00:27.222055Z",
"downloaded_at": "2021-04-13 16:49:28.250760Z",
"sha": "c8f6ef075bb2f267ee8524453967ba927703fe9d"
},
"opensafely-housebound.csv": {
"id": "opensafely/housebound/5bc77310",
"url": "https://codelists.opensafely.org/codelist/opensafely/housebound/5bc77310/",
"downloaded_at": "2021-11-08 14:57:57.273405Z",
"sha": "4408752ee525151741ce050f1a20c51cd5c0116a"
},
"opensafely-no-longer-housebound.csv": {
"id": "opensafely/no-longer-housebound/29a88ca6",
"url": "https://codelists.opensafely.org/codelist/opensafely/no-longer-housebound/29a88ca6/",
"downloaded_at": "2021-11-08 14:57:57.499598Z",
"sha": "cedcdcc0354885c1aa08224072718ff6bdae7450"
},
"primis-covid19-vacc-uptake-longres.csv": {
"id": "primis-covid19-vacc-uptake/longres/v1",
"url": "https://codelists.opensafely.org/codelist/primis-covid19-vacc-uptake/longres/v1/",
"downloaded_at": "2021-11-08 14:57:57.741756Z",
"sha": "4d4c94b9c6aa267c6a60657150bbc5ec7d2d7625"
},
"primis-covid19-vacc-uptake-shield.csv": {
"id": "primis-covid19-vacc-uptake/shield/v1",
"url": "https://codelists.opensafely.org/codelist/primis-covid19-vacc-uptake/shield/v1/",
"downloaded_at": "2021-11-08 14:57:57.925818Z",
"sha": "7edf08877d201478d4f794edb4d4dfe1353cb095"
},
"primis-covid19-vacc-uptake-nonshield.csv": {
"id": "primis-covid19-vacc-uptake/nonshield/v1",
"url": "https://codelists.opensafely.org/codelist/primis-covid19-vacc-uptake/nonshield/v1/",
"downloaded_at": "2021-11-08 14:57:58.113991Z",
"sha": "fb1e019781d5a58c5512d71e9cd6cf47c05e72c5"
}
}
}
5 changes: 5 additions & 0 deletions codelists/codelists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,8 @@ opensafely/permanent-immunosuppression/2020-06-02
opensafely/temporary-immunosuppression/2020-04-24
primis-covid19-vacc-uptake/learndis/v1
primis-covid19-vacc-uptake/cov1decl/v1.1
opensafely/housebound/5bc77310
opensafely/no-longer-housebound/29a88ca6
primis-covid19-vacc-uptake/longres/v1
primis-covid19-vacc-uptake/shield/v1
primis-covid19-vacc-uptake/nonshield/v1
10 changes: 10 additions & 0 deletions codelists/opensafely-housebound.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
code,term
138079003,Housebound
160685001,Bed-ridden
160689007,Housebound
276041000000103,Temporarily housebound
276051000000100,Temporarily housebound
276061000000102,Temporarily housebound
428415003,Temporarily housebound
759311000000103,Housebound patient review
759321000000109,Housebound patient review
3 changes: 3 additions & 0 deletions codelists/opensafely-no-longer-housebound.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
code,term
760661000000106,No longer housebound
760671000000104,No longer housebound
7 changes: 7 additions & 0 deletions codelists/primis-covid19-vacc-uptake-longres.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
code,term
1024771000000108,Lives in hospice
160734000,Lives in a nursing home
160737007,Lives in an old peoples home
224224003,Lives in staffed home
248171000000108,Lives in care home
394923006,Lives in a residential home
3 changes: 3 additions & 0 deletions codelists/primis-covid19-vacc-uptake-nonshield.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
code,term
1300571000000100,Moderate risk category for developing complication from coronavirus disease 19 caused by severe acute respiratory syndrome coronavirus 2 infection
1300591000000101,Low risk category for developing complication from coronavirus disease 19 caused by severe acute respiratory syndrome coronavirus 2 infection
2 changes: 2 additions & 0 deletions codelists/primis-covid19-vacc-uptake-shield.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
code,term
1300561000000107,High risk category for developing complication from coronavirus disease 19 caused by severe acute respiratory syndrome coronavirus 2 infection
4 changes: 2 additions & 2 deletions lib/data_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def load_data(input_file='input_delivery.csv.gz', input_path="output"):
# categorise BMI into obese (i.e. BMI >=30) or non-obese (<30)
df = df.assign(bmi = np.where((df["bmi"]=="Not obese"), "under 30", "30+"))

# drop unnecssary columns or columns created for processing
# drop unnecessary columns or columns created for processing
df = df.drop(["imd","ethnicity_16", "ethnicity", 'ethnicity_6_sus',
'ethnicity_16_sus', "has_follow_up"], 1)

Expand Down Expand Up @@ -166,7 +166,7 @@ def load_data(input_file='input_delivery.csv.gz', input_path="output"):
for c in ["2nd_dose", "LD", "newly_shielded_since_feb_15", "dementia",
"chronic_cardiac_disease", "current_copd", "dialysis", "dmards","psychosis_schiz_bipolar",
"solid_organ_transplantation", "chemo_or_radio", "intel_dis_incl_downs_syndrome","ssri",
"lung_cancer", "cancer_excl_lung_and_haem", "haematological_cancer"]:
"lung_cancer", "cancer_excl_lung_and_haem", "haematological_cancer", "housebound"]:
df[c] = np.where(df[c]==1, "yes", "no")


Expand Down
7 changes: 7 additions & 0 deletions lib/group_definitions.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
### Group definitions
- The **care home** group is defined based on patients (aged 65+) having one of [these codes](https://codelists.opensafely.org/codelist/primis-covid19-vacc-uptake/longres/v1/).
- The **shielding** group is defined based on patients (aged 16-69) having one of [these codes](https://codelists.opensafely.org/codelist/primis-covid19-vacc-uptake/shield/v1/) provided it was not superceded by one of [these codes](https://codelists.opensafely.org/codelist/primis-covid19-vacc-uptake/nonshield/v1/).
- The **LD** (learning disability) group is defined based on patients (aged 16-64) having one of [these](https://codelists.opensafely.org/codelist/primis-covid19-vacc-uptake/learndis/v1/) codes and excludes people who are shielding.
- Patients are counted in their highest risk category only; e.g. a 65-year-old who is shielding is only counted in the shielding group, not in the 65-69.
- The **housebound** group is defined based on [this](https://codelists.opensafely.org/codelist/opensafely/housebound/5bc77310/)
codelist and excludes people who were later recorded as [not housebound](https://codelists.opensafely.org/codelist/opensafely/no-longer-housebound/29a88ca6/) or in a [care home](https://codelists.opensafely.org/codelist/primis-covid19-vacc-uptake/longres/v1/).
2 changes: 1 addition & 1 deletion lib/report_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ def filtered_cumulative_sum(df, columns, latest_date, reference_column_name="cov
out2.loc[max(out2.index)+1] = [latest_date, out2.loc[out2[reference_column_name]<latest_date]["overall"].max()]

# suppress low numbers
out2["overall"] = out2["overall"].replace([1,2,3,4,5,6], 0).fillna(0).astype(int)
out2["overall"] = round7(out2["overall"].replace([1,2,3,4,5,6], 0).fillna(0).astype(int))

# Rounds the overall_total values (and makes into integers)
out2["overall_total"] = round7(total)
Expand Down
194 changes: 194 additions & 0 deletions lib/second_third_doses.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
'This produces summary tables and charts for second or third doses due/overdue.'


import matplotlib.pyplot as plt

from IPython.display import display, Markdown
import os
import pandas as pd
import sys
sys.path.append('../lib/')
from create_report import import_table, show_table


def second_third_doses(tablelist, tablelist_2nd, cohorts=None, *, dose_type="Second", time_period="14 weeks", latest_date_fmt, latest_date_fmt_2,
max_ylim=12,
backend="expectations", suffix = "_tpp"):

'''
This produces summary tables and charts for second or third doses due/overdue.
INPUTS
tablelist (list): list of tables, each containing data for a single cohort on the "previous" dose (1st or 2nd)
tablelist_2nd (list): list of tables, each containing data for a single cohort on the dose of interest (2nd or 3rd)
dose_type (str): "Second" or "Third"
time_period (str): E.g. "14 weeks"
latest_date_fmt (str): latest date of any vaccines
latest_date_fmt (str): e.g. "3rd July 2020" - formatted version of cut-off date up to which vaccines due were calculated
cohorts (list): cohorts to include e.g. ["80+", "70-79"]
max_ylim (int): max value for ymax (puts a limit on ymax to prevent chart axes being set by one rogue value).
backend (str): backend
suffix (str): backend string to append to filenames
OUTPUTS
A summary table and chart for each cohort, broken down into various subgroups
Also a summary table with one line per cohort.
'''

# set up other variables needed:
if dose_type=="Second":
previous_dose = "first"
elif dose_type=="Third":
previous_dose = "second"
else:
assert False, f"unexpected dose_type: {dose_type}"

dose_file_name = f"{dose_type.lower()}_doses"

# create empty df for summary results ("overall" row for each cohort)
summary = pd.DataFrame()


for f, f2 in zip(tablelist, tablelist_2nd):

df, _ = import_table(f, latest_date_fmt=latest_date_fmt_2, show_carehomes=True, suffix=suffix, export_csv=False)
df = df.drop(["Previous week's vaccination coverage (%)", "Total eligible", "Vaccinated over last 7d (%)"],1)

df2, title = import_table(f2, latest_date_fmt, show_carehomes=True, suffix=suffix, export_csv=False)
df2 = df2.drop(["Previous week's vaccination coverage (%)", "Vaccinated over last 7d (%)"],1)

# column renaming and number formatting
for c in df2.columns:
if "(n)" in c:
df2[c] = pd.to_numeric(df2[c], downcast='integer')
df2 = df2.rename(columns={c:f"{dose_type} doses given (n)"})
for c in df.columns:
if "(n)" in c:
# the number of doses due is the number of previous doses given <time period> ago
df = df.rename(columns={c:f"{dose_type} Doses due at {latest_date_fmt.replace(' 2021','')} (n)"})

df = df2.join(df)

df = df.rename(columns={"Total eligible":"Total population"})

# only show tables where a significant proportion of the total population are due second dose
df[f"{dose_type} Doses due (% of total)"] = 100*df[f"{dose_type} Doses due at {latest_date_fmt.replace(' 2021','')} (n)"]\
/df["Total population"]
if backend != "expectations" and (
df[f"{dose_type} Doses due (% of total)"][("overall","overall")] < 0.50):
continue
df = df.drop(f"{dose_type} Doses due (% of total)", 1)

# calculate difference from expected
df[f"{dose_type} doses given (% of due)"] = 100*(df[f"{dose_type} doses given (n)"]/\
df[f"{dose_type} Doses due at {latest_date_fmt.replace(' 2021','')} (n)"]).round(3)

df[f"{dose_type} doses overdue (n)"] = df[f"{dose_type} Doses due at {latest_date_fmt.replace(' 2021','')} (n)"] -\
df[f"{dose_type} doses given (n)"]

# column order
df = df[[f"{dose_type} Doses due at {latest_date_fmt.replace(' 2021','')} (n)", f"{dose_type} doses overdue (n)",
f"{dose_type} doses given (n)", f"{dose_type} doses given (% of due)", "Total population"]]

export_path = os.path.join("..", "output", dose_file_name)
if not os.path.exists(export_path):
os.makedirs(export_path)
df.to_csv(os.path.join(export_path, f"{title}{suffix}.csv"), index=True)


######### create summary by extracting "overall" row
pop_overall = df.loc[("overall","overall")]
pop_overall = pop_overall.rename(title.replace(f"Cumulative {dose_type.lower()} dose vaccination figures among ","").replace(" population",""))
summary = summary.append(pop_overall)

# if a list of cohorts have been supplied, exit loop here for groups not in cohorts
if cohorts:
if any(c in title for c in cohorts)==False:
continue

display(Markdown("[Back to top](#Contents)"))

# add comma separators to numbers before displaying table
df_to_show = df.copy()
for c in [f"{dose_type} Doses due at {latest_date_fmt.replace(' 2021','')} (n)",
f"{dose_type} doses overdue (n)", f"{dose_type} doses given (n)", "Total population"]:
df_to_show[c] = df_to_show[c].apply('{:,}'.format)
show_table(df_to_show, title, latest_date_fmt, show_carehomes=True)

df[f"{dose_type} doses overdue (% of due)"] = 100 - df[f"{dose_type} doses given (% of due)"]


######### plot charts

if " LD " in title:
title = title.replace("LD (aged 16-64) population", "people with learning disabilities (aged 16-64)")
display(Markdown(f"## \n ## {title.replace('Cumulative ','').replace(' vaccination figures', 's overdue').title()}"))

cats_to_include = ["Age band", "Ethnicity (broad categories)",
"Index of Multiple Deprivation (quintiles)", "Dementia",
"Learning disability", "Psychosis, schizophrenia, or bipolar",
"brand of first dose"]
cats = [c for c in df.index.levels[0] if c in cats_to_include]
df = df.loc[cats]

# find errors based on rounding
# both num and denom are rounded to nearest 7 so both may be out by <=3
df["pos_error"] = 100*3/(df[f"{dose_type} Doses due at {latest_date_fmt.replace(' 2021','')} (n)"]-3)
df["neg_error"] = 100*3/(df[f"{dose_type} Doses due at {latest_date_fmt.replace(' 2021','')} (n)"]+3)

# do not show in charts values representing less than 100 people
df.loc[df[f"{dose_type} Doses due at {latest_date_fmt.replace(' 2021','')} (n)"]<100,
[f"{dose_type} doses overdue (% of due)","neg_error","pos_error"]] = 0

# find ymax
ymax = df[[f"{dose_type} doses overdue (% of due)"]].max()[0]

rows_of_charts = int(len(cats)/2 + (len(cats)%2)/2)
fig, axs = plt.subplots(rows_of_charts, 2, figsize=(12, 4*rows_of_charts))

# unpack all the axes subplots
axes = axs.ravel()
# turn off axes until they are used
for ax in axes:
ax.set_axis_off()

# plot charts and display titles
for n, cat in enumerate(cats):
chart_title = f"{dose_type} doses overdue (% of those due)\n by "+ cat
dfp=df.copy().loc[cat]

# do not include "unknown" brand of first dose (unless it's the only item in the index)
if (cat == f"brand of {previous_dose} dose") & (len(dfp.index)>1):
dfp = dfp.loc[dfp.index!="Unknown"]



# plot chart
dfp[[f"{dose_type} doses overdue (% of due)"]].plot.bar(title=chart_title, ax=axes[n], legend=False)
# add errorbars
axes[n].errorbar(dfp.index, dfp[f"{dose_type} doses overdue (% of due)"], # same location as each bar
yerr=[dfp["neg_error"], dfp["pos_error"]], #"First row contains the lower errors, the second row contains the upper errors."
fmt="none", # no markers or connecting lines
ecolor='k')
axes[n].set_axis_on()

axes[n].set_ylim([0, min(max_ylim, ymax*1.05)])
axes[n].set_ylabel(f"{dose_type} doses overdue (%)")
axes[n].set_xlabel(cat.title())

# reduce tick label sizes
if cat in ("Ethnicity (broad categories)", "Index of Multiple Deprivation (quintiles)"):
plt.setp(axes[n].get_xticklabels(), fontsize=8)
plt.subplots_adjust(hspace=1)

display(Markdown(f"{dose_type} doses which have not been given at least {time_period} since the {previous_dose} dose"),
Markdown("Error bars indicate possible error caused by rounding"))

plt.show()

# show summary table (first improve number formatting)
for c in summary:
if "(n)" in c or "Total population" in c:
summary[c] = summary[c].astype(int).apply('{:,}'.format)
display(Markdown(f"## \n # Summary"), summary)
Loading

0 comments on commit b8111f5

Please sign in to comment.