diff --git a/process_report/process_report.py b/process_report/process_report.py index 2624276..caebecf 100644 --- a/process_report/process_report.py +++ b/process_report/process_report.py @@ -24,6 +24,7 @@ COST_FIELD = "Cost" CREDIT_FIELD = "Credit" CREDIT_CODE_FIELD = "Credit Code" +SUBSIDY_FIELD = "Subsidy" BALANCE_FIELD = "Balance" ### @@ -162,6 +163,12 @@ def main(): default="pi_invoices", help="Name of output folder containing pi-specific invoice csvs", ) + parser.add_argument( + "--BU-invoice-file", + required=False, + default="BU_Internal.csv", + help="Name of output csv for BU invoices", + ) parser.add_argument( "--HU-BU-invoice-file", required=False, @@ -179,6 +186,12 @@ def main(): required=False, help="Name of csv file listing previously billed PIs. If not provided, defaults to fetching from S3", ) + parser.add_argument( + "--BU-subsidy-amount", + required=True, + type=int, + help="Amount of subsidy given to BU PIs", + ) args = parser.parse_args() invoice_month = args.invoice_month @@ -218,6 +231,7 @@ def main(): export_billables(credited_projects, args.output_file) export_pi_billables(credited_projects, args.output_folder, invoice_month) + export_BU_only(billable_projects, args.BU_invoice_file, args.BU_subsidy_amount) export_HU_BU(credited_projects, args.HU_BU_invoice_file) export_lenovo(credited_projects, args.Lenovo_file) @@ -416,6 +430,52 @@ def export_pi_billables(dataframe: pandas.DataFrame, output_folder, invoice_mont ) +def export_BU_only(dataframe: pandas.DataFrame, output_file, subsidy_amount): + def get_project(row): + project_alloc = row[PROJECT_FIELD] + if project_alloc.rfind("-") == -1: + return project_alloc + else: + return project_alloc[: project_alloc.rfind("-")] + + BU_projects = dataframe[dataframe[INSTITUTION_FIELD] == "Boston University"] + BU_projects["Project"] = BU_projects.apply(get_project, axis=1) + BU_projects[SUBSIDY_FIELD] = 0 + BU_projects = BU_projects[ + [ + INVOICE_DATE_FIELD, + PI_FIELD, + "Project", + COST_FIELD, + CREDIT_FIELD, + SUBSIDY_FIELD, + BALANCE_FIELD, + ] + ] + BU_projects = _apply_subsidy(BU_projects, subsidy_amount) + BU_projects.to_csv(output_file) + + +def _apply_subsidy(dataframe, subsidy_amount): + pi_list = dataframe[PI_FIELD].unique() + + for pi in pi_list: + pi_projects = dataframe[dataframe[PI_FIELD] == pi] + remaining_subsidy = subsidy_amount + for i, row in pi_projects.iterrows(): + project_remaining_cost = row[BALANCE_FIELD] + applied_subsidy = min(project_remaining_cost, remaining_subsidy) + + dataframe.at[i, SUBSIDY_FIELD] = applied_subsidy + dataframe.at[i, BALANCE_FIELD] = row[BALANCE_FIELD] - applied_subsidy + remaining_subsidy -= applied_subsidy + + if remaining_subsidy == 0: + break + + return dataframe + + def export_HU_BU(dataframe, output_file): HU_BU_projects = dataframe[ (dataframe[INSTITUTION_FIELD] == "Harvard University") diff --git a/process_report/tests/unit_tests.py b/process_report/tests/unit_tests.py index baa58f3..c0e1471 100644 --- a/process_report/tests/unit_tests.py +++ b/process_report/tests/unit_tests.py @@ -339,6 +339,89 @@ def test_apply_credit_error(self): process_report.is_old_pi(old_pi_dict, "PI1", invoice_month) +class TestBUSubsidy(TestCase): + def setUp(self): + data = { + "Invoice Month": [ + "2024-03", + "2024-03", + "2024-03", + "2024-03", + "2024-03", + "2024-03", + ], + "Manager (PI)": ["PI1", "PI2", "PI3", "PI3", "PI4", "PI4"], + "Institution": [ + "Boston University", + "Boston University", + "Harvard University", + "Harvard University", + "Boston University", + "Boston University", + ], + "Project - Allocation": [ + "ProjectA-e6413", + "ProjectB-5t143t", + "ProjectC-t14334", + "ProjectD", + "ProjectE-test-r25135", + "ProjectF", + ], + "Cost": [1050, 100, 10000, 1000, 1050, 100], + "Credit": [1000, 100, 0, 0, 1000, 0], + "Balance": [50, 0, 10000, 1000, 50, 100], + } + self.dataframe = pandas.DataFrame(data) + output_file = tempfile.NamedTemporaryFile(delete=False, mode="w", suffix=".csv") + self.output_file = output_file.name + self.subsidy = 100 + + def test_apply_BU_subsidy(self): + process_report.export_BU_only(self.dataframe, self.output_file, self.subsidy) + output_df = pandas.read_csv(self.output_file) + + self.assertTrue( + set( + [ + process_report.INVOICE_DATE_FIELD, + "Project", + process_report.PI_FIELD, + process_report.COST_FIELD, + process_report.CREDIT_FIELD, + process_report.SUBSIDY_FIELD, + process_report.BALANCE_FIELD, + ] + ).issubset(output_df) + ) + + self.assertTrue( + set(["PI1", "PI2", "PI4"]).issubset(output_df["Manager (PI)"].unique()) + ) + self.assertFalse("PI3" in output_df["Project"].unique()) + + self.assertTrue( + set(["ProjectA", "ProjectB", "ProjectE-test", "ProjectF"]).issubset( + output_df["Project"].unique() + ) + ) + self.assertFalse( + set(["ProjectC-t14334", "ProjectC", "ProjectD"]).intersection( + output_df["Project"].unique() + ) + ) + + self.assertEqual(4, len(output_df.index)) + self.assertEqual(50, output_df.loc[0, "Subsidy"]) + self.assertEqual(0, output_df.loc[1, "Subsidy"]) + self.assertEqual(50, output_df.loc[2, "Subsidy"]) + self.assertEqual(50, output_df.loc[3, "Subsidy"]) + + self.assertEqual(0, output_df.loc[0, "Balance"]) + self.assertEqual(0, output_df.loc[1, "Balance"]) + self.assertEqual(0, output_df.loc[2, "Balance"]) + self.assertEqual(50, output_df.loc[3, "Balance"]) + + class TestValidateBillables(TestCase): def setUp(self): data = {