1
1
import argparse
2
2
import os
3
+ import sys
3
4
4
5
import pandas
5
6
6
7
8
+ ### Invoice field names
9
+ PROJECT_FIELD = 'Project - Allocation'
10
+ MANAGER_PI_FIELD = 'Manager (PI)'
11
+ INSTITUTION_FIELD = 'Institution'
12
+ COST_FIELD = 'Cost'
13
+ CREDIT_FIELD = 'Credit'
14
+ CREDIT_CODE_FIELD = 'Credit Code'
15
+ BALANCE_FIELD = 'Balance'
16
+ ###
17
+
18
+
19
+ def apply_credits_0001 (dataframe ):
20
+ credit_code = "0001"
21
+ pass
22
+
23
+
24
+ def apply_credits_0002 (dataframe ):
25
+ """Applies the New PI Credit. This credit function expects the
26
+ env var `C0002_OLD_PI` to be set, pointing to a txt file containing old PIs"""
27
+ credit_code = "0002"
28
+ credit_amount = 1000
29
+
30
+ old_pi_list = set ()
31
+ try :
32
+ with open (os .getenv ("C0002_OLD_PI" )) as f :
33
+ for pi in f : old_pi_list .add (pi .strip ())
34
+
35
+ except Exception :
36
+ print ("Old PI file does not exist, or the C0002_OLD_PI env var is not set" )
37
+ sys .exit (1 )
38
+
39
+ print ("Old pi list: " , old_pi_list )
40
+ pi_list = dataframe [MANAGER_PI_FIELD ].unique ()
41
+
42
+ for pi in pi_list :
43
+ if pi != pi : continue # NaN check
44
+ if pi in old_pi_list : continue # Is the PI an old PI?
45
+
46
+ pi_projects = dataframe [dataframe [MANAGER_PI_FIELD ] == pi ]
47
+ rem_credit = credit_amount
48
+ for i , row in pi_projects .iterrows ():
49
+ project_cost = row [COST_FIELD ]
50
+ if project_cost >= rem_credit :
51
+ dataframe .at [i , CREDIT_FIELD ] = rem_credit
52
+ dataframe .at [i , CREDIT_CODE_FIELD ] = credit_code
53
+ dataframe .at [i , BALANCE_FIELD ] = row [COST_FIELD ] - rem_credit
54
+ break
55
+ else :
56
+ dataframe .at [i , CREDIT_FIELD ] = project_cost
57
+ dataframe .at [i , CREDIT_CODE_FIELD ] = credit_code
58
+ dataframe .at [i , BALANCE_FIELD ] = 0
59
+ rem_credit -= project_cost
60
+
61
+ return dataframe
62
+
63
+
64
+ applied_credits = [apply_credits_0002 ]
65
+
66
+
67
+ def get_institution_from_pi (pi_uname ):
68
+
69
+ institute_map = {
70
+ "northeastern.edu" : "Northeastern University" ,
71
+ "bu.edu" : "Boston University" ,
72
+ "bentley.edu" : "Bentley" ,
73
+ "uri.edu" : "University of Rhode Island" ,
74
+ "redhat.com" : "Red Hat" ,
75
+ "childrens.harvard.edu" : "Boston Childrens Hospital" ,
76
+ "mclean.harvard.edu" : "McLean Hospital" ,
77
+ "meei.harvard.edu" : "Massachusetts Eye & Ear" ,
78
+ "dfci.harvard.edu" : "Dana-Farber Cancer Institute" ,
79
+ "bwh.harvard.edu" : "Brigham and Women's Hospital" ,
80
+ "bidmc.harvard.edu" : "Beth Israel Deaconess Medical Center" ,
81
+ "harvard.edu" : "Harvard University" ,
82
+ "wpi.edu" : "Worcester Polytechnic Institute" ,
83
+ "mit.edu" : "Massachusetts Institute of Technology" ,
84
+ "umass.edu" : "University of Massachusetts Amherst" ,
85
+ "uml.edu" : "University of Massachusetts Lowell" ,
86
+ "codeforboston.org" : "Code For Boston" ,
87
+ "mmsh" : "Harvard University" ,
88
+ "gstuart" : "University of Massachusetts Amherst" ,
89
+ "rudolph" : "Boston Childrens Hospital" ,
90
+ "robbaron" : "Boston University" ,
91
+ "kmdalton" : "Harvard University" ,
92
+ "mzink" : "University of Massachusetts Amherst" ,
93
+ "yale.edu" : "Yale University" ,
94
+ "francesco.pontiggia" : "Harvard University" ,
95
+ }
96
+
97
+ for name , institute in institute_map .items ():
98
+ if name in pi_uname : return institute
99
+
100
+ print (f"PI name { pi_uname } does not match any institution!" )
101
+ return ""
102
+
103
+
7
104
def main ():
8
105
"""Remove non-billable PIs and projects"""
9
106
@@ -41,6 +138,18 @@ def main():
41
138
default = "pi_invoices" ,
42
139
help = "Name of output folder containing pi-specific invoice csvs"
43
140
)
141
+ parser .add_argument (
142
+ "--HU-only" ,
143
+ required = False ,
144
+ default = "HU_only.csv" ,
145
+ help = "Name of output csv for HU invoices"
146
+ )
147
+ parser .add_argument (
148
+ "--HU-BU" ,
149
+ required = False ,
150
+ default = "HU_BU.csv" ,
151
+ help = "Name of output csv for HU and BU invoices"
152
+ )
44
153
args = parser .parse_args ()
45
154
merged_dataframe = merge_csv (args .csv_files )
46
155
@@ -60,9 +169,13 @@ def main():
60
169
61
170
projects = list (set (projects + timed_projects_list ))
62
171
172
+ merged_dataframe = add_credits (merged_dataframe )
173
+ merged_dataframe = add_institution (merged_dataframe )
63
174
billable_projects = remove_non_billables (merged_dataframe , pi , projects , args .output_file )
64
175
remove_billables (merged_dataframe , pi , projects , "non_billable.csv" )
65
176
export_pi_billables (billable_projects , args .output_folder )
177
+ export_HU_only (billable_projects , args .HU_only )
178
+ export_HU_BU (billable_projects , args .HU_BU )
66
179
67
180
68
181
def merge_csv (files ):
@@ -102,7 +215,7 @@ def timed_projects(timed_projects_file, invoice_date):
102
215
103
216
def remove_non_billables (dataframe , pi , projects , output_file ):
104
217
"""Removes projects and PIs that should not be billed from the dataframe"""
105
- filtered_dataframe = dataframe [~ dataframe ['Manager (PI)' ].isin (pi ) & ~ dataframe ['Project - Allocation' ].isin (projects )]
218
+ filtered_dataframe = dataframe [~ dataframe [MANAGER_PI_FIELD ].isin (pi ) & ~ dataframe [PROJECT_FIELD ].isin (projects )]
106
219
filtered_dataframe .to_csv (output_file , index = False )
107
220
return filtered_dataframe
108
221
@@ -112,21 +225,57 @@ def remove_billables(dataframe, pi, projects, output_file):
112
225
113
226
So this *keeps* the projects/pis that should not be billed.
114
227
"""
115
- filtered_dataframe = dataframe [dataframe ['Manager (PI)' ].isin (pi ) | dataframe ['Project - Allocation' ].isin (projects )]
228
+ filtered_dataframe = dataframe [dataframe [MANAGER_PI_FIELD ].isin (pi ) | dataframe [PROJECT_FIELD ].isin (projects )]
116
229
filtered_dataframe .to_csv (output_file , index = False )
117
230
231
+
118
232
def export_pi_billables (dataframe : pandas .DataFrame , output_folder ):
119
233
if not os .path .exists (output_folder ):
120
234
os .mkdir (output_folder )
121
235
122
236
invoice_month = dataframe ['Invoice Month' ].iat [0 ]
123
- pi_list = dataframe ['Manager (PI)' ].unique ()
237
+ pi_list = dataframe [MANAGER_PI_FIELD ].unique ()
124
238
125
239
for pi in pi_list :
126
- pi_projects = dataframe [dataframe ['Manager (PI)' ] == pi ]
127
- pi_instituition = pi_projects ['Institution' ].iat [0 ]
240
+ if pi != pi : continue
241
+ pi_projects = dataframe [dataframe [MANAGER_PI_FIELD ] == pi ]
242
+ pi_instituition = pi_projects [INSTITUTION_FIELD ].iat [0 ]
128
243
pi_projects .to_csv (output_folder + f"/{ pi_instituition } _{ pi } _{ invoice_month } .csv" )
129
-
244
+
245
+
246
+ def add_credits (dataframe : pandas .DataFrame ):
247
+ """Adds credits to PIs depending on different criterions"""
248
+ dataframe .insert (dataframe .columns .get_loc (COST_FIELD ) + 1 , CREDIT_FIELD , 0.0 )
249
+ dataframe .insert (dataframe .columns .get_loc (CREDIT_FIELD ) + 1 , CREDIT_CODE_FIELD , None )
250
+ dataframe .insert (dataframe .columns .get_loc (CREDIT_CODE_FIELD ) + 1 , BALANCE_FIELD , 0.0 )
251
+
252
+ # Apply credits
253
+ for credit_func in applied_credits :
254
+ dataframe = credit_func (dataframe )
255
+
256
+ return dataframe
257
+
258
+
259
+ def add_institution (dataframe : pandas .DataFrame ):
260
+ """Determine the PI's institution name, logging any PI whose institution cannot be determined"""
261
+ for i , row in dataframe .iterrows ():
262
+ pi_name = row [MANAGER_PI_FIELD ]
263
+ if pi_name != pi_name : print (f"Project { row [PROJECT_FIELD ]} has no PI" ) # Nan check
264
+ else : dataframe .at [i , INSTITUTION_FIELD ] = get_institution_from_pi (pi_name )
265
+
266
+ return dataframe
267
+
268
+
269
+ def export_HU_only (dataframe , output_file ):
270
+ HU_projects = dataframe [dataframe [INSTITUTION_FIELD ] == 'Harvard University' ]
271
+ HU_projects .to_csv (output_file )
272
+
273
+
274
+ def export_HU_BU (dataframe , output_file ):
275
+ HU_BU_projects = dataframe [(dataframe [INSTITUTION_FIELD ] == 'Harvard University' ) |
276
+ (dataframe [INSTITUTION_FIELD ] == 'Boston University' )]
277
+ HU_BU_projects .to_csv (output_file )
278
+
130
279
131
280
if __name__ == "__main__" :
132
281
main ()
0 commit comments