Merge pull request #41 from opensafely/thrid-doses-and-housebound

Thrid doses and housebound
opensafely · Nov 16, 2021 · b8111f5 · b8111f5
2 parents 61cfafd + fb2b115
commit b8111f5
Show file tree

Hide file tree

Showing 85 changed files with 60,957 additions and 40,446 deletions.
diff --git a/analysis/codelists.py b/analysis/codelists.py
@@ -3,15 +3,14 @@
     codelist_from_csv,
 )
 
-care_home_snomed_codes = codelist(
-    ['16073400','394923006', '160737007', '248171000000108', '1024771000000108', '224224003'], system="snomed")
+care_home_snomed_codes = codelist_from_csv(
+    "codelists/primis-covid19-vacc-uptake-longres.csv", system="snomed", column="code")
 
-high_risk_codes = codelist(
-    ['1300561000000107'], system="snomed")
-
-not_high_risk_codes = codelist(
-    ['1300591000000101', '1300571000000100'], system="snomed")
+high_risk_codes = codelist_from_csv(
+    "codelists/primis-covid19-vacc-uptake-shield.csv", system="snomed", column="code")
 
+not_high_risk_codes = codelist_from_csv(
+    "codelists/primis-covid19-vacc-uptake-nonshield.csv", system="snomed", column="code")
 
 adrenaline_pen = codelist_from_csv(
     "codelists/opensafely-adrenaline-pens.csv", system="snomed", column="dmd_id"
@@ -170,3 +169,11 @@
 covid_vacc_declined = codelist_from_csv(
     "codelists/primis-covid19-vacc-uptake-cov1decl.csv", system="snomed", column="code"
 )
+
+housebound_codes = codelist_from_csv(
+    "codelists/opensafely-housebound.csv", system="snomed", column="code"
+)
+
+no_longer_housebound_codes = codelist_from_csv(
+    "codelists/opensafely-no-longer-housebound.csv", system="snomed", column="code"
+)
diff --git a/analysis/study_definition_delivery.py b/analysis/study_definition_delivery.py
@@ -292,7 +292,7 @@
 
     # COVID VACCINATION - Moderna
     covid_vacc_moderna_date=patients.with_tpp_vaccination_record(
-        product_name_matches="COVID-19 mRNA (nucleoside modified) Vaccine Moderna 0.1mg/0.5mL dose dispersion for inj MDV",
+        product_name_matches="COVID-19 mRNA Vaccine Spikevax (nucleoside modified) 0.1mg/0.5mL dose disp for inj MDV (Moderna)",
         on_or_after="2020-12-01",  # check all december to date
         find_first_match_in_period=True,
         returning="date",

diff --git a/analysis/study_definition_delivery_common.py b/analysis/study_definition_delivery_common.py
@@ -333,6 +333,33 @@
             return_expectations={"incidence": 0.01,},
         ),
     ),
+
+    housebound = patients.satisfying(
+            """housebound_date
+                AND NOT no_longer_housebound
+                AND NOT moved_into_care_home""",
+        return_expectations={
+            "incidence": 0.01,
+                },
+
+        housebound_date=patients.with_these_clinical_events( 
+            housebound_codes, 
+            on_or_before=index_date,
+            find_last_match_in_period = True,
+            returning="date",
+            date_format="YYYY-MM-DD",
+        ),   
+        no_longer_housebound=patients.with_these_clinical_events( 
+            no_longer_housebound_codes, 
+            on_or_after="housebound_date",
+        ),
+        moved_into_care_home=patients.with_these_clinical_events(
+            care_home_snomed_codes,
+            on_or_after="housebound_date",
+        ),
+
+    ),
+
 
     LD = patients.with_these_clinical_events(
             wider_ld_codes, 

diff --git a/codelists/codelists.json b/codelists/codelists.json
@@ -177,14 +177,44 @@
     "primis-covid19-vacc-uptake-learndis.csv": {
       "id": "primis-covid19-vacc-uptake/learndis/v1",
       "url": "https://codelists.opensafely.org/codelist/primis-covid19-vacc-uptake/learndis/v1/",
-      "downloaded_at": "2021-03-09 11:58:49.613669Z",
+      "downloaded_at": "2021-03-05 18:16:49.633938Z",
       "sha": "41f81fcb9fadb082a77cadd6cf60614d7c8d860f"
     },
     "primis-covid19-vacc-uptake-cov1decl.csv": {
       "id": "primis-covid19-vacc-uptake/cov1decl/v1.1",
       "url": "https://codelists.opensafely.org/codelist/primis-covid19-vacc-uptake/cov1decl/v1.1/",
-      "downloaded_at": "2021-04-19 13:00:27.222055Z",
+      "downloaded_at": "2021-04-13 16:49:28.250760Z",
       "sha": "c8f6ef075bb2f267ee8524453967ba927703fe9d"
+    },
+    "opensafely-housebound.csv": {
+      "id": "opensafely/housebound/5bc77310",
+      "url": "https://codelists.opensafely.org/codelist/opensafely/housebound/5bc77310/",
+      "downloaded_at": "2021-11-08 14:57:57.273405Z",
+      "sha": "4408752ee525151741ce050f1a20c51cd5c0116a"
+    },
+    "opensafely-no-longer-housebound.csv": {
+      "id": "opensafely/no-longer-housebound/29a88ca6",
+      "url": "https://codelists.opensafely.org/codelist/opensafely/no-longer-housebound/29a88ca6/",
+      "downloaded_at": "2021-11-08 14:57:57.499598Z",
+      "sha": "cedcdcc0354885c1aa08224072718ff6bdae7450"
+    },
+    "primis-covid19-vacc-uptake-longres.csv": {
+      "id": "primis-covid19-vacc-uptake/longres/v1",
+      "url": "https://codelists.opensafely.org/codelist/primis-covid19-vacc-uptake/longres/v1/",
+      "downloaded_at": "2021-11-08 14:57:57.741756Z",
+      "sha": "4d4c94b9c6aa267c6a60657150bbc5ec7d2d7625"
+    },
+    "primis-covid19-vacc-uptake-shield.csv": {
+      "id": "primis-covid19-vacc-uptake/shield/v1",
+      "url": "https://codelists.opensafely.org/codelist/primis-covid19-vacc-uptake/shield/v1/",
+      "downloaded_at": "2021-11-08 14:57:57.925818Z",
+      "sha": "7edf08877d201478d4f794edb4d4dfe1353cb095"
+    },
+    "primis-covid19-vacc-uptake-nonshield.csv": {
+      "id": "primis-covid19-vacc-uptake/nonshield/v1",
+      "url": "https://codelists.opensafely.org/codelist/primis-covid19-vacc-uptake/nonshield/v1/",
+      "downloaded_at": "2021-11-08 14:57:58.113991Z",
+      "sha": "fb1e019781d5a58c5512d71e9cd6cf47c05e72c5"
     }
   }
 }
diff --git a/codelists/codelists.txt b/codelists/codelists.txt
@@ -29,3 +29,8 @@ opensafely/permanent-immunosuppression/2020-06-02
 opensafely/temporary-immunosuppression/2020-04-24
 primis-covid19-vacc-uptake/learndis/v1
 primis-covid19-vacc-uptake/cov1decl/v1.1
+opensafely/housebound/5bc77310
+opensafely/no-longer-housebound/29a88ca6 
+primis-covid19-vacc-uptake/longres/v1
+primis-covid19-vacc-uptake/shield/v1
+primis-covid19-vacc-uptake/nonshield/v1
diff --git a/codelists/opensafely-housebound.csv b/codelists/opensafely-housebound.csv
@@ -0,0 +1,10 @@
+code,term
+138079003,Housebound
+160685001,Bed-ridden
+160689007,Housebound
+276041000000103,Temporarily housebound
+276051000000100,Temporarily housebound 
+276061000000102,Temporarily housebound 
+428415003,Temporarily housebound
+759311000000103,Housebound patient review
+759321000000109,Housebound patient review 
diff --git a/codelists/opensafely-no-longer-housebound.csv b/codelists/opensafely-no-longer-housebound.csv
@@ -0,0 +1,3 @@
+code,term
+760661000000106,No longer housebound
+760671000000104,No longer housebound 
diff --git a/codelists/primis-covid19-vacc-uptake-longres.csv b/codelists/primis-covid19-vacc-uptake-longres.csv
@@ -0,0 +1,7 @@
+code,term
+1024771000000108,Lives in hospice
+160734000,Lives in a nursing home
+160737007,Lives in an old peoples home
+224224003,Lives in staffed home
+248171000000108,Lives in care home
+394923006,Lives in a residential home
diff --git a/codelists/primis-covid19-vacc-uptake-nonshield.csv b/codelists/primis-covid19-vacc-uptake-nonshield.csv
@@ -0,0 +1,3 @@
+code,term
+1300571000000100,Moderate risk category for developing complication from coronavirus disease 19 caused by severe acute respiratory syndrome coronavirus 2 infection
+1300591000000101,Low risk category for developing complication from coronavirus disease 19 caused by severe acute respiratory syndrome coronavirus 2 infection
diff --git a/codelists/primis-covid19-vacc-uptake-shield.csv b/codelists/primis-covid19-vacc-uptake-shield.csv
@@ -0,0 +1,2 @@
+code,term
+1300561000000107,High risk category for developing complication from coronavirus disease 19 caused by severe acute respiratory syndrome coronavirus 2 infection
diff --git a/lib/data_processing.py b/lib/data_processing.py
@@ -131,7 +131,7 @@ def load_data(input_file='input_delivery.csv.gz', input_path="output"):
     # categorise BMI into obese (i.e. BMI >=30) or non-obese (<30)
     df = df.assign(bmi = np.where((df["bmi"]=="Not obese"), "under 30", "30+"))
 
-    # drop unnecssary columns or columns created for processing 
+    # drop unnecessary columns or columns created for processing 
     df = df.drop(["imd","ethnicity_16", "ethnicity", 'ethnicity_6_sus',
        'ethnicity_16_sus', "has_follow_up"], 1)
 
@@ -166,7 +166,7 @@ def load_data(input_file='input_delivery.csv.gz', input_path="output"):
     for c in ["2nd_dose", "LD", "newly_shielded_since_feb_15", "dementia", 
           "chronic_cardiac_disease", "current_copd", "dialysis", "dmards","psychosis_schiz_bipolar",
          "solid_organ_transplantation", "chemo_or_radio", "intel_dis_incl_downs_syndrome","ssri",
-          "lung_cancer", "cancer_excl_lung_and_haem", "haematological_cancer"]:
+          "lung_cancer", "cancer_excl_lung_and_haem", "haematological_cancer", "housebound"]:
           df[c] = np.where(df[c]==1, "yes", "no")
 
 

diff --git a/lib/group_definitions.txt b/lib/group_definitions.txt
@@ -0,0 +1,7 @@
+### Group definitions
+- The **care home** group is defined based on patients (aged 65+) having one of [these codes](https://codelists.opensafely.org/codelist/primis-covid19-vacc-uptake/longres/v1/).
+- The **shielding** group is defined based on patients (aged 16-69) having one of [these codes](https://codelists.opensafely.org/codelist/primis-covid19-vacc-uptake/shield/v1/) provided it was not superceded by one of [these codes](https://codelists.opensafely.org/codelist/primis-covid19-vacc-uptake/nonshield/v1/).            
+- The **LD** (learning disability) group is defined based on patients (aged 16-64) having one of [these](https://codelists.opensafely.org/codelist/primis-covid19-vacc-uptake/learndis/v1/) codes and excludes people who are shielding.
+- Patients are counted in their highest risk category only; e.g. a 65-year-old who is shielding is only counted in the shielding group, not in the 65-69.
+- The **housebound** group is defined based on [this](https://codelists.opensafely.org/codelist/opensafely/housebound/5bc77310/)
+codelist and excludes people who were later recorded as [not housebound](https://codelists.opensafely.org/codelist/opensafely/no-longer-housebound/29a88ca6/) or in a [care home](https://codelists.opensafely.org/codelist/primis-covid19-vacc-uptake/longres/v1/).
diff --git a/lib/report_results.py b/lib/report_results.py
@@ -199,7 +199,7 @@ def filtered_cumulative_sum(df, columns, latest_date, reference_column_name="cov
         out2.loc[max(out2.index)+1] = [latest_date, out2.loc[out2[reference_column_name]<latest_date]["overall"].max()]
 
     # suppress low numbers
-    out2["overall"] = out2["overall"].replace([1,2,3,4,5,6], 0).fillna(0).astype(int)
+    out2["overall"] = round7(out2["overall"].replace([1,2,3,4,5,6], 0).fillna(0).astype(int))
 
     # Rounds the overall_total values (and makes into integers)
     out2["overall_total"] = round7(total)    

diff --git a/lib/second_third_doses.py b/lib/second_third_doses.py
@@ -0,0 +1,194 @@
+'This produces summary tables and charts for second or third doses due/overdue.'
+
+
+import matplotlib.pyplot as plt
+
+from IPython.display import display, Markdown
+import os
+import pandas as pd
+import sys
+sys.path.append('../lib/')
+from create_report import import_table, show_table
+
+
+def second_third_doses(tablelist, tablelist_2nd, cohorts=None, *, dose_type="Second", time_period="14 weeks", latest_date_fmt, latest_date_fmt_2,
+                       max_ylim=12, 
+                       backend="expectations", suffix = "_tpp"):
+
+    '''
+    This produces summary tables and charts for second or third doses due/overdue.
+    
+    
+    INPUTS
+    tablelist (list): list of tables, each containing data for a single cohort on the "previous" dose (1st or 2nd)
+    tablelist_2nd (list):  list of tables, each containing data for a single cohort on the dose of interest (2nd or 3rd)
+    dose_type (str): "Second" or "Third"
+    time_period (str): E.g. "14 weeks"
+    latest_date_fmt (str): latest date of any vaccines
+    latest_date_fmt (str): e.g. "3rd July 2020" - formatted version of cut-off date up to which vaccines due were calculated
+    cohorts (list): cohorts to include e.g. ["80+", "70-79"]
+    max_ylim (int): max value for ymax (puts a limit on ymax to prevent chart axes being set by one rogue value). 
+    backend (str): backend
+    suffix (str): backend string to append to filenames
+    
+    OUTPUTS
+    A summary table and chart for each cohort, broken down into various subgroups
+    Also a summary table with one line per cohort.
+    '''
+
+    # set up other variables needed:
+    if dose_type=="Second":
+         previous_dose = "first" 
+    elif dose_type=="Third":
+         previous_dose = "second" 
+    else:
+        assert False, f"unexpected dose_type: {dose_type}"
+
+    dose_file_name = f"{dose_type.lower()}_doses"
+
+    # create empty df for summary results ("overall" row for each cohort)
+    summary = pd.DataFrame()
+
+
+    for f, f2 in zip(tablelist, tablelist_2nd):
+
+        df, _ = import_table(f, latest_date_fmt=latest_date_fmt_2, show_carehomes=True, suffix=suffix, export_csv=False)
+        df = df.drop(["Previous week's vaccination coverage (%)", "Total eligible", "Vaccinated over last 7d (%)"],1)
+
+        df2, title = import_table(f2, latest_date_fmt, show_carehomes=True, suffix=suffix, export_csv=False)
+        df2 = df2.drop(["Previous week's vaccination coverage (%)", "Vaccinated over last 7d (%)"],1)
+
+        # column renaming and number formatting
+        for c in df2.columns:
+            if "(n)" in c:
+                df2[c] = pd.to_numeric(df2[c], downcast='integer')
+                df2 = df2.rename(columns={c:f"{dose_type} doses given (n)"})
+        for c in df.columns:
+            if "(n)" in c:
+                # the number of doses due is the number of previous doses given <time period> ago
+                df = df.rename(columns={c:f"{dose_type} Doses due at {latest_date_fmt.replace(' 2021','')} (n)"})
+
+        df = df2.join(df)
+
+        df = df.rename(columns={"Total eligible":"Total population"})
+
+        # only show tables where a significant proportion of the total population are due second dose
+        df[f"{dose_type} Doses due (% of total)"] = 100*df[f"{dose_type} Doses due at {latest_date_fmt.replace(' 2021','')} (n)"]\
+                                              /df["Total population"]
+        if backend != "expectations" and (
+            df[f"{dose_type} Doses due (% of total)"][("overall","overall")] < 0.50):
+            continue    
+        df = df.drop(f"{dose_type} Doses due (% of total)", 1)
+
+        # calculate difference from expected
+        df[f"{dose_type} doses given (% of due)"] = 100*(df[f"{dose_type} doses given (n)"]/\
+                                                     df[f"{dose_type} Doses due at {latest_date_fmt.replace(' 2021','')} (n)"]).round(3)
+
+        df[f"{dose_type} doses overdue (n)"] = df[f"{dose_type} Doses due at {latest_date_fmt.replace(' 2021','')} (n)"] -\
+                                          df[f"{dose_type} doses given (n)"]
+
+        # column order
+        df = df[[f"{dose_type} Doses due at {latest_date_fmt.replace(' 2021','')} (n)", f"{dose_type} doses overdue (n)",
+                 f"{dose_type} doses given (n)", f"{dose_type} doses given (% of due)", "Total population"]]
+
+        export_path = os.path.join("..", "output", dose_file_name)
+        if not os.path.exists(export_path):
+            os.makedirs(export_path)
+        df.to_csv(os.path.join(export_path, f"{title}{suffix}.csv"), index=True)
+
+
+        ######### create summary by extracting "overall" row
+        pop_overall = df.loc[("overall","overall")]
+        pop_overall = pop_overall.rename(title.replace(f"Cumulative {dose_type.lower()} dose vaccination figures among ","").replace(" population",""))
+        summary = summary.append(pop_overall)
+
+        # if a list of cohorts have been supplied, exit loop here for groups not in cohorts
+        if cohorts:
+            if any(c in title for c in cohorts)==False: 
+                continue
+
+        display(Markdown("[Back to top](#Contents)"))
+
+        # add comma separators to numbers before displaying table
+        df_to_show = df.copy()
+        for c in [f"{dose_type} Doses due at {latest_date_fmt.replace(' 2021','')} (n)", 
+                  f"{dose_type} doses overdue (n)", f"{dose_type} doses given (n)", "Total population"]:
+            df_to_show[c] = df_to_show[c].apply('{:,}'.format)
+        show_table(df_to_show, title, latest_date_fmt, show_carehomes=True)    
+
+        df[f"{dose_type} doses overdue (% of due)"] = 100 - df[f"{dose_type} doses given (% of due)"]
+
+
+        ######### plot charts
+
+        if " LD " in title:
+            title = title.replace("LD (aged 16-64) population", "people with learning disabilities (aged 16-64)")
+        display(Markdown(f"## \n ## {title.replace('Cumulative ','').replace(' vaccination figures', 's overdue').title()}"))
+
+        cats_to_include = ["Age band", "Ethnicity (broad categories)", 
+                       "Index of Multiple Deprivation (quintiles)", "Dementia", 
+                       "Learning disability", "Psychosis, schizophrenia, or bipolar", 
+                        "brand of first dose"]
+        cats = [c for c in df.index.levels[0] if c in cats_to_include]
+        df = df.loc[cats]
+
+        # find errors based on rounding
+        # both num and denom are rounded to nearest 7 so both may be out by <=3 
+        df["pos_error"] = 100*3/(df[f"{dose_type} Doses due at {latest_date_fmt.replace(' 2021','')} (n)"]-3)
+        df["neg_error"] = 100*3/(df[f"{dose_type} Doses due at {latest_date_fmt.replace(' 2021','')} (n)"]+3)
+
+        # do not show in charts values representing less than 100 people
+        df.loc[df[f"{dose_type} Doses due at {latest_date_fmt.replace(' 2021','')} (n)"]<100, 
+                 [f"{dose_type} doses overdue (% of due)","neg_error","pos_error"]] = 0
+
+        # find ymax
+        ymax = df[[f"{dose_type} doses overdue (% of due)"]].max()[0]
+
+        rows_of_charts = int(len(cats)/2 + (len(cats)%2)/2)
+        fig, axs = plt.subplots(rows_of_charts, 2, figsize=(12, 4*rows_of_charts))
+
+        # unpack all the axes subplots
+        axes = axs.ravel()
+        # turn off axes until they are used
+        for ax in axes:
+            ax.set_axis_off()
+
+        # plot charts and display titles
+        for n, cat in enumerate(cats):
+            chart_title = f"{dose_type} doses overdue (% of those due)\n by "+ cat
+            dfp=df.copy().loc[cat]
+
+            # do not include "unknown" brand of first dose (unless it's the only item in the index)
+            if (cat == f"brand of {previous_dose} dose") & (len(dfp.index)>1):
+                dfp = dfp.loc[dfp.index!="Unknown"]
+
+
+
+            # plot chart
+            dfp[[f"{dose_type} doses overdue (% of due)"]].plot.bar(title=chart_title, ax=axes[n], legend=False)
+            # add errorbars
+            axes[n].errorbar(dfp.index, dfp[f"{dose_type} doses overdue (% of due)"], # same location as each bar
+                             yerr=[dfp["neg_error"], dfp["pos_error"]], #"First row contains the lower errors, the second row contains the upper errors."
+                             fmt="none", # no markers or connecting lines
+                             ecolor='k')
+            axes[n].set_axis_on()
+
+            axes[n].set_ylim([0, min(max_ylim, ymax*1.05)])
+            axes[n].set_ylabel(f"{dose_type} doses overdue (%)")
+            axes[n].set_xlabel(cat.title())
+
+            # reduce tick label sizes
+            if cat in ("Ethnicity (broad categories)", "Index of Multiple Deprivation (quintiles)"):
+                plt.setp(axes[n].get_xticklabels(), fontsize=8)
+        plt.subplots_adjust(hspace=1)
+
+        display(Markdown(f"{dose_type} doses which have not been given at least {time_period} since the {previous_dose} dose"),
+               Markdown("Error bars indicate possible error caused by rounding"))
+
+        plt.show()
+
+    # show summary table (first improve number formatting)
+    for c in summary:
+        if "(n)" in c or "Total population" in c:
+                summary[c] = summary[c].astype(int).apply('{:,}'.format)
+    display(Markdown(f"## \n # Summary"), summary)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		code,term
		1300561000000107,High risk category for developing complication from coronavirus disease 19 caused by severe acute respiratory syndrome coronavirus 2 infection