Skip to content

Commit 9bc423c

Browse files
committed
Added processing to apply project credits, determine institution name for each PI, and exporting HU and BU invoices
1 parent 18c2770 commit 9bc423c

File tree

2 files changed

+218
-6
lines changed

2 files changed

+218
-6
lines changed

process_report/process_report.py

Lines changed: 155 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,106 @@
11
import argparse
22
import os
3+
import sys
34

45
import pandas
56

67

8+
### Invoice field names
9+
PROJECT_FIELD = 'Project - Allocation'
10+
MANAGER_PI_FIELD = 'Manager (PI)'
11+
INSTITUTION_FIELD = 'Institution'
12+
COST_FIELD = 'Cost'
13+
CREDIT_FIELD = 'Credit'
14+
CREDIT_CODE_FIELD = 'Credit Code'
15+
BALANCE_FIELD = 'Balance'
16+
###
17+
18+
19+
def apply_credits_0001(dataframe):
20+
credit_code = "0001"
21+
pass
22+
23+
24+
def apply_credits_0002(dataframe):
25+
"""Applies the New PI Credit. This credit function expects the
26+
env var `C0002_OLD_PI` to be set, pointing to a txt file containing old PIs"""
27+
credit_code = "0002"
28+
credit_amount = 1000
29+
30+
old_pi_list = set()
31+
try:
32+
with open(os.getenv("C0002_OLD_PI")) as f:
33+
for pi in f: old_pi_list.add(pi.strip())
34+
35+
except Exception:
36+
print("Old PI file does not exist, or the C0002_OLD_PI env var is not set")
37+
sys.exit(1)
38+
39+
print("Old pi list: ", old_pi_list)
40+
pi_list = dataframe[MANAGER_PI_FIELD].unique()
41+
42+
for pi in pi_list:
43+
if pi != pi: continue # NaN check
44+
if pi in old_pi_list: continue # Is the PI an old PI?
45+
46+
pi_projects = dataframe[dataframe[MANAGER_PI_FIELD] == pi]
47+
rem_credit = credit_amount
48+
for i, row in pi_projects.iterrows():
49+
project_cost = row[COST_FIELD]
50+
if project_cost >= rem_credit:
51+
dataframe.at[i, CREDIT_FIELD] = rem_credit
52+
dataframe.at[i, CREDIT_CODE_FIELD] = credit_code
53+
dataframe.at[i, BALANCE_FIELD] = row[COST_FIELD] - rem_credit
54+
break
55+
else:
56+
dataframe.at[i, CREDIT_FIELD] = project_cost
57+
dataframe.at[i, CREDIT_CODE_FIELD] = credit_code
58+
dataframe.at[i, BALANCE_FIELD] = 0
59+
rem_credit -= project_cost
60+
61+
return dataframe
62+
63+
64+
applied_credits = [apply_credits_0002]
65+
66+
67+
def get_institution_from_pi(pi_uname):
68+
69+
institute_map = {
70+
"northeastern.edu" : "Northeastern University",
71+
"bu.edu" : "Boston University",
72+
"bentley.edu" : "Bentley",
73+
"uri.edu" : "University of Rhode Island",
74+
"redhat.com" : "Red Hat",
75+
"childrens.harvard.edu" : "Boston Childrens Hospital",
76+
"mclean.harvard.edu" : "McLean Hospital",
77+
"meei.harvard.edu" : "Massachusetts Eye & Ear",
78+
"dfci.harvard.edu" : "Dana-Farber Cancer Institute",
79+
"bwh.harvard.edu" : "Brigham and Women's Hospital",
80+
"bidmc.harvard.edu" : "Beth Israel Deaconess Medical Center",
81+
"harvard.edu" : "Harvard University",
82+
"wpi.edu" : "Worcester Polytechnic Institute",
83+
"mit.edu" : "Massachusetts Institute of Technology",
84+
"umass.edu" : "University of Massachusetts Amherst",
85+
"uml.edu" : "University of Massachusetts Lowell",
86+
"codeforboston.org" : "Code For Boston",
87+
"mmsh" : "Harvard University",
88+
"gstuart" : "University of Massachusetts Amherst",
89+
"rudolph" : "Boston Childrens Hospital",
90+
"robbaron" : "Boston University",
91+
"kmdalton" : "Harvard University",
92+
"mzink" : "University of Massachusetts Amherst",
93+
"yale.edu" : "Yale University",
94+
"francesco.pontiggia" : "Harvard University",
95+
}
96+
97+
for name, institute in institute_map.items():
98+
if name in pi_uname: return institute
99+
100+
print(f"PI name {pi_uname} does not match any institution!")
101+
return ""
102+
103+
7104
def main():
8105
"""Remove non-billable PIs and projects"""
9106

@@ -41,6 +138,18 @@ def main():
41138
default="pi_invoices",
42139
help="Name of output folder containing pi-specific invoice csvs"
43140
)
141+
parser.add_argument(
142+
"--HU-only",
143+
required=False,
144+
default="HU_only.csv",
145+
help="Name of output csv for HU invoices"
146+
)
147+
parser.add_argument(
148+
"--HU-BU",
149+
required=False,
150+
default="HU_BU.csv",
151+
help="Name of output csv for HU and BU invoices"
152+
)
44153
args = parser.parse_args()
45154
merged_dataframe = merge_csv(args.csv_files)
46155

@@ -60,9 +169,13 @@ def main():
60169

61170
projects = list(set(projects + timed_projects_list))
62171

172+
merged_dataframe = add_credits(merged_dataframe)
173+
merged_dataframe = add_institution(merged_dataframe)
63174
billable_projects = remove_non_billables(merged_dataframe, pi, projects, args.output_file)
64175
remove_billables(merged_dataframe, pi, projects, "non_billable.csv")
65176
export_pi_billables(billable_projects, args.output_folder)
177+
export_HU_only(billable_projects, args.HU_only)
178+
export_HU_BU(billable_projects, args.HU_BU)
66179

67180

68181
def merge_csv(files):
@@ -102,7 +215,7 @@ def timed_projects(timed_projects_file, invoice_date):
102215

103216
def remove_non_billables(dataframe, pi, projects, output_file):
104217
"""Removes projects and PIs that should not be billed from the dataframe"""
105-
filtered_dataframe = dataframe[~dataframe['Manager (PI)'].isin(pi) & ~dataframe['Project - Allocation'].isin(projects)]
218+
filtered_dataframe = dataframe[~dataframe[MANAGER_PI_FIELD].isin(pi) & ~dataframe[PROJECT_FIELD].isin(projects)]
106219
filtered_dataframe.to_csv(output_file, index=False)
107220
return filtered_dataframe
108221

@@ -112,21 +225,57 @@ def remove_billables(dataframe, pi, projects, output_file):
112225
113226
So this *keeps* the projects/pis that should not be billed.
114227
"""
115-
filtered_dataframe = dataframe[dataframe['Manager (PI)'].isin(pi) | dataframe['Project - Allocation'].isin(projects)]
228+
filtered_dataframe = dataframe[dataframe[MANAGER_PI_FIELD].isin(pi) | dataframe[PROJECT_FIELD].isin(projects)]
116229
filtered_dataframe.to_csv(output_file, index=False)
117230

231+
118232
def export_pi_billables(dataframe: pandas.DataFrame, output_folder):
119233
if not os.path.exists(output_folder):
120234
os.mkdir(output_folder)
121235

122236
invoice_month = dataframe['Invoice Month'].iat[0]
123-
pi_list = dataframe['Manager (PI)'].unique()
237+
pi_list = dataframe[MANAGER_PI_FIELD].unique()
124238

125239
for pi in pi_list:
126-
pi_projects = dataframe[dataframe['Manager (PI)'] == pi]
127-
pi_instituition = pi_projects['Institution'].iat[0]
240+
if pi != pi: continue
241+
pi_projects = dataframe[dataframe[MANAGER_PI_FIELD] == pi]
242+
pi_instituition = pi_projects[INSTITUTION_FIELD].iat[0]
128243
pi_projects.to_csv(output_folder + f"/{pi_instituition}_{pi}_{invoice_month}.csv")
129-
244+
245+
246+
def add_credits(dataframe : pandas.DataFrame):
247+
"""Adds credits to PIs depending on different criterions"""
248+
dataframe.insert(dataframe.columns.get_loc(COST_FIELD) + 1, CREDIT_FIELD, 0.0)
249+
dataframe.insert(dataframe.columns.get_loc(CREDIT_FIELD) + 1, CREDIT_CODE_FIELD, None)
250+
dataframe.insert(dataframe.columns.get_loc(CREDIT_CODE_FIELD) + 1, BALANCE_FIELD, 0.0)
251+
252+
# Apply credits
253+
for credit_func in applied_credits:
254+
dataframe = credit_func(dataframe)
255+
256+
return dataframe
257+
258+
259+
def add_institution(dataframe: pandas.DataFrame):
260+
"""Determine the PI's institution name, logging any PI whose institution cannot be determined"""
261+
for i, row in dataframe.iterrows():
262+
pi_name = row[MANAGER_PI_FIELD]
263+
if pi_name != pi_name: print(f"Project {row[PROJECT_FIELD]} has no PI") # Nan check
264+
else: dataframe.at[i, INSTITUTION_FIELD] = get_institution_from_pi(pi_name)
265+
266+
return dataframe
267+
268+
269+
def export_HU_only(dataframe, output_file):
270+
HU_projects = dataframe[dataframe[INSTITUTION_FIELD] == 'Harvard University']
271+
HU_projects.to_csv(output_file)
272+
273+
274+
def export_HU_BU(dataframe, output_file):
275+
HU_BU_projects = dataframe[(dataframe[INSTITUTION_FIELD] == 'Harvard University') |
276+
(dataframe[INSTITUTION_FIELD] == 'Boston University')]
277+
HU_BU_projects.to_csv(output_file)
278+
130279

131280
if __name__ == "__main__":
132281
main()

process_report/tests/unit_tests.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from unittest import TestCase
2+
from unittest import skipIf
23
import tempfile
34
import pandas
45
import os
@@ -175,3 +176,65 @@ def test_export_pi(self):
175176
self.assertNotIn('ProjectA', pi_df['Project - Allocation'].tolist())
176177
self.assertNotIn('ProjectB', pi_df['Project - Allocation'].tolist())
177178
self.assertNotIn('ProjectC', pi_df['Project - Allocation'].tolist())
179+
180+
181+
class TestGetInstitute(TestCase):
182+
183+
def setUp(self):
184+
185+
data = {
186+
'Manager (PI)': ['quanmp@bu.edu', 'c@mclean.harvard.edu', 'b@harvard.edu', 'fake', 'pi@northeastern.edu'],
187+
'Project - Allocation': ['ProjectA', 'ProjectB', 'ProjectC', 'ProjectD', 'ProjectE'],
188+
'Answer': ["Boston University", "McLean Hospital", "Harvard University", "", "Northeastern University"]
189+
}
190+
self.data = pandas.DataFrame(data)
191+
192+
def test_get_pi_institution(self):
193+
for i, row in self.data.iterrows():
194+
institution_name = process_report.get_institution_from_pi(row['Manager (PI)'])
195+
self.assertEqual(institution_name, row['Answer'])
196+
197+
@skipIf(process_report.apply_credits_0002 not in process_report.applied_credits, "Skipping test for credit 0002 because credit not enabled")
198+
class TestCredit0002(TestCase):
199+
def setUp(self):
200+
201+
data = {
202+
'Manager (PI)': ['PI1', 'PI1', 'PI2', 'PI3', 'PI4', 'PI4'],
203+
'Project - Allocation': ['ProjectA', 'ProjectB', 'ProjectC', 'ProjectD', 'ProjectE', 'ProjectF'],
204+
'Cost': [10, 100, 10000, 5000, 800, 1000]
205+
}
206+
self.dataframe = pandas.DataFrame(data)
207+
old_pi = ['PI2', 'PI3']
208+
old_pi_file = tempfile.NamedTemporaryFile(delete=False, mode='w', suffix='.csv')
209+
for pi in old_pi: old_pi_file.write(pi + "\n")
210+
self.old_pi_file = old_pi_file.name
211+
212+
os.environ["C0002_OLD_PI"] = self.old_pi_file
213+
214+
def tearDown(self):
215+
os.remove(self.old_pi_file)
216+
217+
def test_apply_credit_0002(self):
218+
dataframe = process_report.add_credits(self.dataframe)
219+
220+
self.assertTrue('Credit' in dataframe)
221+
self.assertTrue('Credit Code' in dataframe)
222+
self.assertTrue('Balance' in dataframe)
223+
224+
credited_projects = dataframe[dataframe['Credit Code'] == '0002']
225+
226+
self.assertEqual(4, len(credited_projects.index))
227+
self.assertTrue('PI2' not in credited_projects['Manager (PI)'].unique())
228+
self.assertTrue('PI3' not in credited_projects['Manager (PI)'].unique())
229+
230+
self.assertEqual(10, credited_projects[credited_projects['Project - Allocation'] == 'ProjectA']['Credit'].iloc[0])
231+
self.assertEqual(100, credited_projects[credited_projects['Project - Allocation'] == 'ProjectB']['Credit'].iloc[0])
232+
self.assertEqual(800, credited_projects[credited_projects['Project - Allocation'] == 'ProjectE']['Credit'].iloc[0])
233+
self.assertEqual(200, credited_projects[credited_projects['Project - Allocation'] == 'ProjectF']['Credit'].iloc[0])
234+
235+
self.assertEqual(0, credited_projects[credited_projects['Project - Allocation'] == 'ProjectA']['Balance'].iloc[0])
236+
self.assertEqual(0, credited_projects[credited_projects['Project - Allocation'] == 'ProjectB']['Balance'].iloc[0])
237+
self.assertEqual(0, credited_projects[credited_projects['Project - Allocation'] == 'ProjectE']['Balance'].iloc[0])
238+
self.assertEqual(800, credited_projects[credited_projects['Project - Allocation'] == 'ProjectF']['Balance'].iloc[0])
239+
240+

0 commit comments

Comments
 (0)