-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathWorkReportingDelays.py
147 lines (109 loc) · 4.23 KB
/
WorkReportingDelays.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
# -*- coding: utf-8 -*-
"""
Created on Mon Nov 30 09:03:15 2020
@author: Matt Bayer
"""
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime
import plotly.express as px
import covid
#%% Load data
state = covid.read_covid_data_wi('state')
county = covid.read_covid_data_wi('county')
mke = county[county.NAME == 'Milwaukee']
# rename
col_rename = {'Date': 'Date', 'POS_NEW': 'Cases', 'TEST_NEW': 'Tests', 'DTH_NEW': 'Deaths', 'HOSP_NEW': 'Hospitalizations'}
state = state.rename(columns=col_rename)
mke = mke.rename(columns=col_rename)
#%% Read in all death-by-date data
# get all death by date files
file_dates = list()
file_names = list()
deaths = pd.DataFrame({'Date': pd.date_range(start='2020-03-01', end=datetime.date.today())})
deaths = deaths.set_index('Date')
path = 'data'
for file in os.listdir(path):
if file.startswith('Deaths by day stacked_20'):
name = os.path.join(path, file)
date = pd.to_datetime(file[-14:-4])
file_names.append(name)
file_dates.append(date)
# read Confirmed deaths, add as column to DataFrame
temp_deaths = covid.read_deathdate_wi(name).set_index('Date')
col_name = date.strftime('%#d-%b')
deaths[col_name] = temp_deaths['Confirmed deaths']
col_names = deaths.columns
latest = col_names[-1]
compare = col_names[-2]
#%% Plots death by date comparisons
# latest deaths by date and the difference between them
deaths['Latest difference'] = deaths[latest] - deaths[compare]
deaths.plot(y=[latest, compare, 'Latest difference'],
title='Date of Death, '+latest+' vs. '+compare)
# deaths by date vs deaths by report - big delay here
deaths['Reported'] = state.set_index('Date')['Deaths']
deaths.rolling(7).mean().plot(y=[latest, 'Reported'], title='Date of Death vs. Report (7-day avg)')
#%% Plot delay in cases
# Cases by test date for Wisconsin
cases_filename = '.\data\Cases_with_prob_stacked_data_2021-04-15.csv'
death_filename = '.\data\Deaths by day stacked_2021-05-05.csv'
# cases_filename = '.\data\Cases_with_prob_stacked_data_Milwaukee_2021-02-24.csv'
# death_filename = '.\data\Deaths by day stacked_Milwaukee_2021-02-24.csv'
# case_latest = 'Cases as of 27-Feb'
# death_latest = 'Deaths as of 27-Feb';
case_latest = 'Cases'
death_latest = 'Deaths, lagged<br>and scaled';
cases = pd.read_csv(cases_filename)
# filter out redundant data
# cases = cases.loc[cases['Measure Names'] == 'Confirmed cases'] # deprecated after file format change
cases = cases.iloc[0::3,:] # every data point is reproduced three times
# rename columns
col_rename = {'Day of Epi Dt': 'Date', 'Stacked Confirm + Probable cases': case_latest}
cases = cases[col_rename.keys()]
cases = cases.rename(columns=col_rename)
cases['Date'] = pd.to_datetime(cases['Date'])
cases[case_latest] = pd.to_numeric(cases[case_latest].apply(lambda s: s.replace(',', '')))
# add deaths; set index as date temporarily so they merge correctly
cases = cases.set_index('Date')
temp_deaths = covid.read_deathdate_wi(death_filename).set_index('Date')
cases[death_latest] = temp_deaths['Confirm + Probable deaths']
# add reported cases
cases['Cases (reported)'] = state.set_index('Date').Cases
cases['Deaths (reported)'] = state.set_index('Date').Deaths
# # switch to reported
# case_latest = 'Cases (reported)'
# death_latest = 'Deaths (reported)'
# state
lag = 14
cfr = 0.012
# # Milwaukee
# lag = 16
# cfr = 0.01
death2 = cases[death_latest].reset_index(drop=False)
death2['Date'] = death2['Date'] - datetime.timedelta(days=lag)
cases[death_latest] = death2.set_index('Date')[death_latest] / cfr
cases = cases.rolling(7).mean()
cases = cases.reset_index(drop=False)
# cases.plot(x='Date', y=[case_latest, 'Cases (reported)'])
# cases.plot(x='Date', y=[case_latest, death_latest])
fig = px.line(
cases,
x='Date',
y=[case_latest, death_latest],
color_discrete_sequence=['steelblue', 'firebrick'],
title='Cases by test date vs Deaths by death date<br>'
+'(7-day avg, 14-day lag, CFR 1.2%)',
labels={'value': 'Cases / day'}
)
fig.update_layout(legend_title='')
pngfile = 'docs\\assets\\Cases-Deaths-Match_2021-04-15.png'
fig.write_image(
pngfile,
width=700,
height=500,
engine='kaleido',
)
os.startfile(pngfile)