Merge pull request #52 from opensafely/update-15-feb-2022

Update 15 Feb 2022
opensafely · Feb 16, 2022 · ab4565f · ab4565f
2 parents 10cc2c8 + d1c186b
commit ab4565f
Show file tree

Hide file tree

Showing 84 changed files with 61,971 additions and 47,415 deletions.
diff --git a/lib/data_processing.py b/lib/data_processing.py
@@ -4,13 +4,14 @@
 import pandas as pd
 import numpy as np
 import os
+import re
 
 # Errors
 from errors import DataCleaningError
 
 
 
-def load_data(input_file='input_delivery.csv.gz', input_path="output"):
+def load_data(input_file='input_delivery.csv.gz', input_path="output", save_path={} ):
     """
     This reads in a csv that must be in output/ directory and cleans the
     data ready for use in the graphs and tables
@@ -137,9 +138,9 @@ def load_data(input_file='input_delivery.csv.gz', input_path="output"):
                     (df["LD"]==0) & (df["dementia"]==0), 1, 0))
 
     # Replace a region and STP with a value `0` with Unknown
-#     df = df.assign(
-#         region = df['region'].replace(0, "Unknown"),
-#         stp = df['stp'].replace(0, "Unknown"))
+    # df = df.assign(
+    #     region = df['region'].replace(0, "Unknown"),
+    #     stp = df['stp'].replace(0, "Unknown"))
 
     # Replace `I` or `U` for sex with `Other/Unknown`
     df = df.assign(
@@ -188,11 +189,21 @@ def load_data(input_file='input_delivery.csv.gz', input_path="output"):
 
 
     # get total population sizes and names for each STP
-    # stps = pd.read_csv(os.path.join("..","lib","stp_dict.csv"), usecols=["stp_id","name","list_size_o80"])
+    # stps = pd.read_csv(os.path.join("..","lib","stp_dict_total.csv"), usecols=["stp_id","name","total_list_size"])
     # df = df.merge(stps, left_on="stp", right_on="stp_id", how="left").rename(columns={"name":"stp_name"})
-
+
+    # missing_stps = set(stps['name']).difference( set( df['stp_name'] ) )
+    # dummy_regex = re.compile(r'^Dummy STP \d+$')
+    # missing_stps_final = [ele for ele in missing_stps if not dummy_regex.match(ele)]
+
+    # if save_path:
+    #     with open(os.path.join(save_path["text"], f"Missing_STPs.txt"), "w") as text_file:
+    #         text_file.write(f"{len(missing_stps_final)} STPs are not represented in our dataset.")
+    #         for this_missing_stp in missing_stps_final:
+    #             text_file.write(f"\n- {this_missing_stp}")
+
     # drop additional columns
-    df = df.drop(['care_home', 'age'], 1)  
+    df = df.drop(['care_home', 'age'], 1)
 
     return df
 

diff --git a/lib/report_results.py b/lib/report_results.py
@@ -21,7 +21,7 @@ def create_output_dirs(subfolder=None):
     """
     # create /assign directories for exporting figures and tables
     savepath = {}
-    for filetype in ["tables", "figures", "text"]:
+    for filetype in ["tables", "figures", "text", "objects"]:
         if subfolder:
             savepath[filetype] = os.path.abspath(os.path.join("..", "interim-outputs", subfolder, filetype))
         else: