-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpandas_test_kit_evaluator.py
108 lines (79 loc) · 3.4 KB
/
pandas_test_kit_evaluator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
"""
/**
* Implementation of an agent that utilizes an epsilon-greedy algorithm to distribute test kits.
*
* @author Joe McCall; Chris Zahuranec
* @date 4/24/2020
* @info Course CAP5600
*/
"""
from random import random
from typing import List
import pandas as pd
from county import County
from test_kit_evaluator import TestKitEvaluator
class PandasTestKitEvaluator(TestKitEvaluator):
counties: List[County]
actual_positive_cases_key = 'Actual Positive Cases (P_a)'
def __init__(self, counties):
self.counties = counties
self.data_frame = self.get_data_frame()
def get_data_frame(self):
data_url = 'us-counties.csv'
# data_url = 'https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv'
df = pd.read_csv(data_url, error_bad_lines=False)
# create 'areas', county + state
df['Area'] = df['county'] + ', ' + df['state']
# obtain primary keys of areas available in data
area_df = df.groupby('Area')
areas_list = list(area_df.groups.keys())
# obtain available dates in data
date_frame = df.groupby('date')
dates_list = list(date_frame.groups.keys())
# create 'infection frame' that can hold data in format needed
infection_frame = pd.DataFrame(index=areas_list, columns=dates_list)
# filter frame for counties in question
selected_counties = [c.name for c in self.counties]
final_frame = infection_frame.loc[selected_counties]
# fill the final frame for use compared to available data
for location in selected_counties:
for time in dates_list:
target = df[(df['Area'] == location) & (df['date'] == time)]
if not target.empty:
insert_value = target.iloc[0]['cases']
final_frame.at[location, time] = insert_value
# replace 'NaN' data points as 0's
final_frame.fillna(0, inplace=True)
#add row for totals used to generate graph
final_frame.loc[self.actual_positive_cases_key] = final_frame.sum()
return final_frame
def return_final_counts(self):
self.final_total = self.data_frame.loc[self.actual_positive_cases_key]
return self.final_total
def update_county_data(self, county, current_date):
"""
Evaluate the county's chances of returning a positive result based on the data stored by Pandas and the
current date.
:param county: the county to check
:param current_date: the date to check for
:return:
"""
date_string = current_date.strftime('%Y-%m-%d')
#converted_date = '2020-01-21'
infected_population = self.data_frame.loc[county.name, date_string]
# Update the actual positive cases on the county object from real data
county.num_actual_positive_cases = infected_population
def evaluate_test(self, county, current_date):
"""
Evaluate the county's chances of returning a positive result based on the data stored by Pandas and the
current date.
:param county: the county to check
:param current_date: the date to check for
:return:
"""
infection_percent = county.num_actual_positive_cases / county.population
chance = random()
result = False
if chance < infection_percent:
result = True
return result