-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathWorkTruePrevalence.py
120 lines (84 loc) · 2.5 KB
/
WorkTruePrevalence.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
# -*- coding: utf-8 -*-
"""
Work on Coronavirus data analysis
Script for downloading, parsing, plotting Covid data from Wisconsin.
"""
path = 'C:/dev/Covid/'
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import covid
import urllib
from scipy import signal
#%% Get the data
# First retrieve data from server and save to csv file
# Second read data from the previously saved csv file
# comment sections if no need to re-download
csv_file_covid = path + 'Covid-Data-WI.csv'
csv_file_pop = path + 'Population-Data-WI.csv'
# population data
# covid.download_pop_data_wi(csv_file_pop)
popdata = covid.read_pop_data_wi(csv_file_pop)
# covid data
covid.download_covid_data_wi(csv_file_covid)
widata = covid.read_covid_data_wi(csv_file_covid)
#%% Try to estimate true prevalence
loc = 'WI'
select = covid.select_data(widata, loc)
avg = select.rolling(window=7, center=True).mean()
# offset tests by ten days?
cases = avg.POS_NEW
tests = avg.TEST_NEW
# tests.index -= pd.DateOffset(days=10)
pos_rate = cases/tests
plt.figure()
plt.plot(tests, pos_rate, '.:')
plt.figure()
plt.plot(cases*10)
plt.plot(tests)
plt.plot(500 * cases / np.sqrt(tests))
#%% Try correcting for test result reporting postponement by forward convolving
t = tests.to_numpy().copy()
# fill in NaNs at the end with copies
t[-3:] = t[-4]
t[0:4] = t[4]
# two sided exponential
k = 7
x0 = 8
n = 20
r = np.concatenate((np.arange(1,n-x0), np.arange(n-x0,n-2*x0-1,-1)))
test_response = np.exp(r/k)
test_response = test_response / np.sum(test_response)
t = np.concatenate((t, np.ones(n)*t[-1]))
x = np.arange(0,len(t))
t2 = signal.convolve(t, test_response, 'valid')
x2 = np.arange(0,len(t2))
plt.figure()
plt.plot(x,t)
plt.plot(x2, t2)
#%% Similar plots with modified test curve
tests2 = pd.Series(data=t2[0:-1], index=tests.index)
pos_rate2 = cases/tests2
plt.figure()
plt.plot(tests2, pos_rate2, '.:')
plt.figure()
plt.plot(cases*10)
plt.plot(tests2)
plt.plot(500 * cases / np.sqrt(tests2))
#%% Try deconvolving with some kind of reporting response
# Doesn't seem to work very well.
# test_response = np.arange(14, 0, -1)
# test_response[0] *= 2
# test_response = np.ones(10)
# test_response = signal.windows.triang(11)
test_response = np.exp(-np.arange(0,11)/5)
test_response = test_response / np.sum(test_response)
t = tests.to_numpy()
x = np.arange(0,len(t))
t[np.isnan(t)] = 0
recovered, remainder = signal.deconvolve(t, test_response)
plt.figure()
plt.plot(t)
plt.plot(recovered)
plt.plot(remainder)