From 852589143b426a6361aa80df43ae40458a1df5e1 Mon Sep 17 00:00:00 2001 From: Quan Pham Date: Thu, 14 Nov 2024 11:24:12 -0500 Subject: [PATCH] Implemented processor for prepayments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prepayments have been implemented by a new processor, `PrepaymentProcessor`. The full implementation for prepayments involved the following changes: - Several new columns names have been added to `invoices/invoice.py`, including column names for the prepay data files and for the exported invoices - Many invoices will now export with 4 new columns: `Prepaid Group Name`, `Prepaid Group Institution`, `Prepaid Group Balance`, `Prepaid Group Used` - 4 command line arguments have been added to `process_report.py`. 3 of them allows the user to pass in a local version of the prepay credits, contacts, and projects file. The last one (`—prepay-debits`) allows passing a local version of the prepay debits files, and defaults to fetching from s3 if not provided - A set of test cases have been added for `PrepaymentProcessor` Since the implementation of this feature required a lot of logic decisions (i.e What happens if a prepaid project is active, but has no funds?), below is (hopefully) an exhaustive list of code logic decisions that were made. These can also be inferred through the test cases. - Prepay projects listed in `prepaid_projects.csv` are identified by their project name, not project - allocation name - Attempting to process past invoices (“backprocessing”) may result in incorrect output due to the nature of the prepay debit ledger - While backprocessing is not supported for past months, processing the same invoice month twice will still return correct output. In this case, the month’s debit entry may be will be overwritten - Prepay balances can be used in the same month they are added. - The time range in which prepay projects are considered “active” includes their start and end date - After processing of any given invoice month, debit entries for that month will be added. I emphasize this for clarification. A debit entry such as: `2024-11,G1,1062.48` Should be interpreted as: In the period from 2024-11-01 to 2024-11-30, prepay group G1 spent $1062.48 As opposed to: In the period from 2024-10-01 to 2024-10-31, … - If prepay projects are “active” but their prepay group has $0 balance, their prepay info (group name, contact email) is still included, but the prepay balance will be displayed as $0 and the prepay used as an empty field --- process_report/invoices/NERC_total_invoice.py | 4 + process_report/invoices/billable_invoice.py | 4 + .../invoices/bu_internal_invoice.py | 4 + process_report/invoices/invoice.py | 17 + .../invoices/pi_specific_invoice.py | 4 + process_report/process_report.py | 65 ++- .../processors/prepayment_processor.py | 217 +++++++++ process_report/tests/unit_tests.py | 425 ++++++++++++++++++ process_report/tests/util.py | 29 ++ process_report/util.py | 22 + 10 files changed, 788 insertions(+), 3 deletions(-) create mode 100644 process_report/processors/prepayment_processor.py diff --git a/process_report/invoices/NERC_total_invoice.py b/process_report/invoices/NERC_total_invoice.py index e39e49b..a3ca5e9 100644 --- a/process_report/invoices/NERC_total_invoice.py +++ b/process_report/invoices/NERC_total_invoice.py @@ -30,7 +30,11 @@ class NERCTotalInvoice(invoice.Invoice): invoice.SU_HOURS_FIELD, invoice.SU_TYPE_FIELD, invoice.RATE_FIELD, + invoice.GROUP_NAME_FIELD, + invoice.GROUP_INSTITUTION_FIELD, + invoice.GROUP_BALANCE_FIELD, invoice.COST_FIELD, + invoice.GROUP_BALANCE_USED_FIELD, invoice.CREDIT_FIELD, invoice.CREDIT_CODE_FIELD, invoice.BALANCE_FIELD, diff --git a/process_report/invoices/billable_invoice.py b/process_report/invoices/billable_invoice.py index 8bab38a..281b68c 100644 --- a/process_report/invoices/billable_invoice.py +++ b/process_report/invoices/billable_invoice.py @@ -35,7 +35,11 @@ class BillableInvoice(invoice.Invoice): invoice.SU_HOURS_FIELD, invoice.SU_TYPE_FIELD, invoice.RATE_FIELD, + invoice.GROUP_NAME_FIELD, + invoice.GROUP_INSTITUTION_FIELD, + invoice.GROUP_BALANCE_FIELD, invoice.COST_FIELD, + invoice.GROUP_BALANCE_USED_FIELD, invoice.CREDIT_FIELD, invoice.CREDIT_CODE_FIELD, invoice.BALANCE_FIELD, diff --git a/process_report/invoices/bu_internal_invoice.py b/process_report/invoices/bu_internal_invoice.py index 0db03b9..e028dc4 100644 --- a/process_report/invoices/bu_internal_invoice.py +++ b/process_report/invoices/bu_internal_invoice.py @@ -15,7 +15,11 @@ class BUInternalInvoice(invoice.Invoice): invoice.INVOICE_DATE_FIELD, invoice.PI_FIELD, "Project", + invoice.GROUP_NAME_FIELD, + invoice.GROUP_INSTITUTION_FIELD, + invoice.GROUP_BALANCE_FIELD, invoice.COST_FIELD, + invoice.GROUP_BALANCE_USED_FIELD, invoice.CREDIT_FIELD, invoice.SUBSIDY_FIELD, invoice.PI_BALANCE_FIELD, diff --git a/process_report/invoices/invoice.py b/process_report/invoices/invoice.py index 47797c8..cdfbe11 100644 --- a/process_report/invoices/invoice.py +++ b/process_report/invoices/invoice.py @@ -12,6 +12,18 @@ PI_2ND_USED = "2nd Month Used" ### +### Prepay files fields +PREPAY_MONTH_FIELD = "Month" +PREPAY_CREDIT_FIELD = "Credit" +PREPAY_DEBIT_FIELD = "Debit" +PREPAY_GROUP_NAME_FIELD = "Group Name" +PREPAY_GROUP_CONTACT_FIELD = "Group Contact Email" +PREPAY_MANAGED_FIELD = "MGHPCC Managed" +PREPAY_PROJECT_FIELD = "Project" +PREPAY_START_DATE_FIELD = "Start Date" +PREPAY_END_DATE_FIELD = "End Date" +### + ### Invoice field names INVOICE_DATE_FIELD = "Invoice Month" PROJECT_FIELD = "Project - Allocation" @@ -21,6 +33,10 @@ INVOICE_ADDRESS_FIELD = "Invoice Address" INSTITUTION_FIELD = "Institution" INSTITUTION_ID_FIELD = "Institution - Specific Code" +GROUP_NAME_FIELD = "Prepaid Group Name" +GROUP_INSTITUTION_FIELD = "Prepaid Group Institution" +GROUP_BALANCE_FIELD = "Prepaid Group Balance" +GROUP_BALANCE_USED_FIELD = "Prepaid Group Used" SU_HOURS_FIELD = "SU Hours (GBhr or SUhr)" SU_TYPE_FIELD = "SU Type" SU_CHARGE_FIELD = "SU Charge" @@ -38,6 +54,7 @@ MISSING_PI_FIELD = "Missing PI" PI_BALANCE_FIELD = "PI Balance" PROJECT_NAME_FIELD = "Project" +GROUP_MANAGED_FIELD = "MGHPCC Managed" ### diff --git a/process_report/invoices/pi_specific_invoice.py b/process_report/invoices/pi_specific_invoice.py index 5fc2e1a..0df6bdb 100644 --- a/process_report/invoices/pi_specific_invoice.py +++ b/process_report/invoices/pi_specific_invoice.py @@ -27,7 +27,11 @@ class PIInvoice(invoice.Invoice): invoice.SU_HOURS_FIELD, invoice.SU_TYPE_FIELD, invoice.RATE_FIELD, + invoice.GROUP_NAME_FIELD, + invoice.GROUP_INSTITUTION_FIELD, + invoice.GROUP_BALANCE_FIELD, invoice.COST_FIELD, + invoice.GROUP_BALANCE_USED_FIELD, invoice.CREDIT_FIELD, invoice.CREDIT_CODE_FIELD, invoice.BALANCE_FIELD, diff --git a/process_report/process_report.py b/process_report/process_report.py index 5a1ed77..affa1f2 100644 --- a/process_report/process_report.py +++ b/process_report/process_report.py @@ -22,6 +22,7 @@ validate_billable_pi_processor, new_pi_credit_processor, bu_subsidy_processor, + prepayment_processor, ) ### PI file field names @@ -53,9 +54,8 @@ ### PI_S3_FILEPATH = "PIs/PI.csv" - - ALIAS_S3_FILEPATH = "PIs/alias.csv" +PREPAY_DEBITS_S3_FILEPATH = "Prepay/prepay_debits.csv" def load_alias(alias_file): @@ -73,6 +73,14 @@ def load_alias(alias_file): return alias_dict +def load_prepay_csv(prepay_credits_path, prepay_projects_path, prepay_contacts_path): + return ( + pandas.read_csv(prepay_credits_path), + pandas.read_csv(prepay_projects_path), + pandas.read_csv(prepay_contacts_path), + ) + + def get_iso8601_time(): return datetime.datetime.now().strftime("%Y%m%dT%H%M%SZ") @@ -117,6 +125,24 @@ def main(): required=True, help="File containing list of projects that are non-billable within a specified duration", ) + parser.add_argument( + "--prepay-credits", + required=False, + default="prepaid_credits.csv", + help="CSV listing all prepay group credits. Defaults to 'prepaid_credits.csv'", + ) + parser.add_argument( + "--prepay-projects", + required=False, + default="prepaid_projects.csv", + help="CSV listing all prepay group projects. Defaults to 'prepaid_projects.csv'", + ) + parser.add_argument( + "--prepay-contacts", + required=False, + default="prepaid_contacts.csv", + help="CSV listing all prepay group contact information. Defaults to 'prepaid_contacts.csv'", + ) parser.add_argument( "--nonbillable-file", @@ -164,6 +190,11 @@ def main(): required=False, help="Name of alias file listing PIs with aliases (and their aliases). If not provided, defaults to fetching from S3", ) + parser.add_argument( + "--prepay-debits", + required=False, + help="Name of csv file listing all prepay group debits. If not provided, defaults to fetching from S3", + ) parser.add_argument( "--BU-subsidy-amount", required=True, @@ -190,6 +221,15 @@ def main(): alias_file = fetch_s3_alias_file() alias_dict = load_alias(alias_file) + if args.prepay_debits: + prepay_debits_filepath = args.prepay_debits + else: + prepay_debits_filepath = fetch_s3_prepay_debits() + + prepay_credits, prepay_projects, prepay_info = load_prepay_csv( + args.prepay_credits, args.prepay_projects, args.prepay_contacts + ) + merged_dataframe = merge_csv(csv_files) pi = [] @@ -248,7 +288,19 @@ def main(): ) bu_subsidy_proc.process() - processed_data = bu_subsidy_proc.data + prepayment_proc = prepayment_processor.PrepaymentProcessor( + "", + invoice_month, + bu_subsidy_proc.data, + prepay_credits, + prepay_projects, + prepay_info, + prepay_debits_filepath, + args.upload_to_s3, + ) + prepayment_proc.process() + + processed_data = prepayment_proc.data ### Initialize invoices @@ -378,6 +430,13 @@ def fetch_s3_old_pi_file(): return local_name +def fetch_s3_prepay_debits(): + local_name = "prepay_debits.csv" + invoice_bucket = util.get_invoice_bucket() + invoice_bucket.download_file(PREPAY_DEBITS_S3_FILEPATH, local_name) + return local_name + + def backup_to_s3_old_pi_file(old_pi_file): invoice_bucket = util.get_invoice_bucket() invoice_bucket.upload_file(old_pi_file, f"PIs/Archive/PI {get_iso8601_time()}.csv") diff --git a/process_report/processors/prepayment_processor.py b/process_report/processors/prepayment_processor.py new file mode 100644 index 0000000..e8ee863 --- /dev/null +++ b/process_report/processors/prepayment_processor.py @@ -0,0 +1,217 @@ +import sys +import logging +from dataclasses import dataclass + +import pandas + +from process_report import util +from process_report.invoices import invoice +from process_report.processors import discount_processor + + +logger = logging.getLogger(__name__) +logging.basicConfig(level=logging.INFO) + + +@dataclass +class PrepaymentProcessor(discount_processor.DiscountProcessor): + IS_DISCOUNT_BY_NERC = True + PREPAY_DEBITS_S3_FILEPATH = "Prepay/prepay_debits.csv" + + @property + def PREPAY_DEBITS_S3_BACKUP_FILEPATH(self): + return f"Prepay/Archive/prepay_debits {util.get_iso8601_time()}.csv" + + prepay_credits: pandas.DataFrame + prepay_projects: pandas.DataFrame + prepay_contacts: pandas.DataFrame + prepay_debits_filepath: str + upload_to_s3: bool + + @staticmethod + def _load_prepay_debits(prepay_debits_filepath): + try: + prepay_debits = pandas.read_csv(prepay_debits_filepath) + except FileNotFoundError: + sys.exit("Applying prepayments failed. prepay debits file does not exist") + + return prepay_debits + + def _prepare(self): + self.data[invoice.GROUP_NAME_FIELD] = None + self.data[invoice.GROUP_INSTITUTION_FIELD] = None + self.data[invoice.GROUP_MANAGED_FIELD] = None + self.data[invoice.GROUP_BALANCE_FIELD] = None + self.data[invoice.GROUP_BALANCE_USED_FIELD] = None + + self.prepay_debits = self._load_prepay_debits(self.prepay_debits_filepath) + self.group_info_dict = self._get_prepay_group_dict() + if self.upload_to_s3: + self._backup_s3_prepay_debits() + + def _process(self): + self._add_prepay_info() + self._apply_prepayments() + + self._export_prepay_debits() + if self.upload_to_s3: + self._export_s3_prepay_debits() + + def _get_prepay_group_dict(self): + """Loads prepay info into a dict for simpler indexing + during processing step""" + prepay_group_dict = dict() + + # Load each group's contact info, and initialize $0 balance and empty project list + for _, group_info in self.prepay_contacts.iterrows(): + group_name = group_info[invoice.PREPAY_GROUP_NAME_FIELD] + prepay_group_dict[group_name] = dict() + prepay_group_dict[group_name][ + invoice.PREPAY_GROUP_CONTACT_FIELD + ] = group_info[invoice.PREPAY_GROUP_CONTACT_FIELD] + prepay_group_dict[group_name][invoice.PREPAY_MANAGED_FIELD] = group_info[ + invoice.PREPAY_MANAGED_FIELD + ] + prepay_group_dict[group_name][invoice.GROUP_BALANCE_FIELD] = 0 + prepay_group_dict[group_name][invoice.PREPAY_PROJECT_FIELD] = [] + + # Sum up each group's credits from current and past months + for _, group_credit in self.prepay_credits.iterrows(): + if ( + util.get_month_diff( + self.invoice_month, group_credit[invoice.PREPAY_MONTH_FIELD] + ) + >= 0 + ): + prepay_group_dict[group_credit[invoice.PREPAY_GROUP_NAME_FIELD]][ + invoice.GROUP_BALANCE_FIELD + ] += group_credit[invoice.PREPAY_CREDIT_FIELD] + + # Sum up each group's debits from past months. DOES NOT INCLUDE CURRENT MONTH + for _, group_debit in self.prepay_debits.iterrows(): + if ( + util.get_month_diff( + self.invoice_month, group_debit[invoice.PREPAY_MONTH_FIELD] + ) + > 0 + ): + prepay_group_dict[group_debit[invoice.PREPAY_GROUP_NAME_FIELD]][ + invoice.GROUP_BALANCE_FIELD + ] -= group_debit[invoice.PREPAY_DEBIT_FIELD] + + if ( + prepay_group_dict[group_debit[invoice.PREPAY_GROUP_NAME_FIELD]][ + invoice.GROUP_BALANCE_FIELD + ] + < 0 + ): + logger.error( + f"Balance for prepay group {group_credit[invoice.PREPAY_GROUP_NAME_FIELD]} is negative!" + ) + sys.exit(1) + + # Populate each group's list of "active" prepay projects + # Projects' "active" period includes their start and end dates + for _, group_project in self.prepay_projects.iterrows(): + if ( + util.get_month_diff( + self.invoice_month, group_project[invoice.PREPAY_START_DATE_FIELD] + ) + >= 0 + and util.get_month_diff( + group_project[invoice.PREPAY_END_DATE_FIELD], self.invoice_month + ) + >= 0 + ): + prepay_group_dict[group_project[invoice.PREPAY_GROUP_NAME_FIELD]][ + invoice.PREPAY_PROJECT_FIELD + ].append(group_project[invoice.PREPAY_PROJECT_FIELD]) + + return prepay_group_dict + + def _add_prepay_info(self): + """Populate prepaid group name, institute, and MGHPCC managed field""" + institute_list = util.load_institute_list() + institute_map = util.get_institute_mapping(institute_list) + + for group_name, group_dict in self.group_info_dict.items(): + group_institute = util.get_institution_from_pi( + institute_map, group_dict[invoice.PREPAY_GROUP_CONTACT_FIELD] + ) + + # Prepay projects are identified by project name, not project - allocation name + row_mask = self.data[invoice.PROJECT_NAME_FIELD].isin( + group_dict[invoice.PREPAY_PROJECT_FIELD] + ) + col_mask = [ + invoice.INVOICE_EMAIL_FIELD, + invoice.GROUP_NAME_FIELD, + invoice.GROUP_INSTITUTION_FIELD, + invoice.GROUP_MANAGED_FIELD, + ] + self.data.loc[row_mask, col_mask] = [ + group_dict[invoice.PREPAY_GROUP_CONTACT_FIELD], + group_name, + group_institute, + group_dict[invoice.PREPAY_MANAGED_FIELD], + ] + + def _apply_prepayments(self): + for group_name, group_dict in self.group_info_dict.items(): + group_projects = self.data[ + self.data[invoice.GROUP_NAME_FIELD] == group_name + ] + prepay_amount_used = self.apply_flat_discount( + self.data, + group_projects, + invoice.PI_BALANCE_FIELD, + group_dict[invoice.GROUP_BALANCE_FIELD], + invoice.GROUP_BALANCE_USED_FIELD, + invoice.BALANCE_FIELD, + ) + + remaining_prepay_balance = ( + group_dict[invoice.GROUP_BALANCE_FIELD] - prepay_amount_used + ) + self.data.loc[ + self.data[invoice.GROUP_NAME_FIELD] == group_name, + invoice.GROUP_BALANCE_FIELD, + ] = remaining_prepay_balance + + # If the group has used some prepay money, check if the group + # already has a debit entry for the current month to decide + # whether to append a new debit entry, or overwrite the old one + if prepay_amount_used > 0: + debit_entry_mask = ( + self.prepay_debits[invoice.PREPAY_MONTH_FIELD] == self.invoice_month + ) & (self.prepay_debits[invoice.PREPAY_GROUP_NAME_FIELD] == group_name) + if self.prepay_debits[debit_entry_mask].empty: + self.prepay_debits = pandas.concat( + [ + self.prepay_debits, + pandas.DataFrame( + [[self.invoice_month, group_name, prepay_amount_used]], + columns=self.prepay_debits.columns, + ), + ], + ignore_index=True, + ) + else: + self.prepay_debits.loc[ + debit_entry_mask, invoice.PREPAY_DEBIT_FIELD + ] = prepay_amount_used + + def _backup_s3_prepay_debits(self): + invoice_bucket = util.get_invoice_bucket() + invoice_bucket.upload_file( + self.prepay_debits_filepath, self.PREPAY_DEBITS_S3_BACKUP_FILEPATH + ) + + def _export_prepay_debits(self): + self.prepay_debits.to_csv(self.prepay_debits_filepath, index=False) + + def _export_s3_prepay_debits(self): + invoice_bucket = util.get_invoice_bucket() + invoice_bucket.upload_file( + self.prepay_debits_filepath, self.PREPAY_DEBITS_S3_FILEPATH + ) diff --git a/process_report/tests/unit_tests.py b/process_report/tests/unit_tests.py index 6ef99e6..c2a7e7b 100644 --- a/process_report/tests/unit_tests.py +++ b/process_report/tests/unit_tests.py @@ -932,6 +932,431 @@ def test_two_pi(self): self._assert_result_invoice(subsidy_amount, test_invoice, answer_invoice) +class TestPrepaymentProcessor(TestCase): + def _assert_result_invoice( + self, + test_invoice, + test_prepay_credits, + test_prepay_debits_filepath, + test_prepay_projects, + test_prepay_contacts, + answer_invoice, + answer_prepay_debits, + invoice_month="0000-00", + ): + new_prepayment_proc = test_utils.new_prepayment_processor( + "", + invoice_month, + test_invoice, + test_prepay_credits, + test_prepay_debits_filepath, + test_prepay_projects, + test_prepay_contacts, + ) + new_prepayment_proc.process() + output_invoice = new_prepayment_proc.data + output_prepay_debits = new_prepayment_proc.prepay_debits.sort_values( + by="Month", ignore_index=True + ) + + answer_invoice = answer_invoice.astype(output_invoice.dtypes) + answer_prepay_debits = answer_prepay_debits.astype( + output_prepay_debits.dtypes + ).sort_values(by="Month", ignore_index=True) + + self.assertTrue(output_invoice.equals(answer_invoice)) + self.assertTrue(output_prepay_debits.equals(answer_prepay_debits)) + + def _get_test_invoice(self, project_names, pi_balances, balances=None): + if not balances: + balances = pi_balances + + return pandas.DataFrame( + { + "Project": project_names, + "PI Balance": pi_balances, + "Balance": balances, + "Invoice Email": [None] * len(project_names), + } + ) + + def _get_test_prepay_credits(self, months, group_names, credits): + return pandas.DataFrame( + {"Month": months, "Group Name": group_names, "Credit": credits} + ) + + def _get_test_prepay_debits(self, months, group_names, debits): + return pandas.DataFrame( + {"Month": months, "Group Name": group_names, "Debit": debits} + ) + + def _get_test_prepay_projects( + self, group_names, project_names, start_dates, end_dates + ): + return pandas.DataFrame( + { + "Group Name": group_names, + "Project": project_names, + "Start Date": start_dates, + "End Date": end_dates, + } + ) + + def _get_test_prepay_contacts(self, group_names, emails, is_managed): + return pandas.DataFrame( + { + "Group Name": group_names, + "Group Contact Email": emails, + "MGHPCC Managed": is_managed, + } + ) + + def setUp(self) -> None: + self.test_prepay_debits_file = tempfile.NamedTemporaryFile( + delete=False, mode="w+", suffix=".csv" + ) + + def tearDown(self) -> None: + os.remove(self.test_prepay_debits_file.name) + + def test_one_group_one_project(self): + """Simple one project test and checks idempotentcy""" + invoice_month = "2024-10" + test_invoice = self._get_test_invoice(["P1"], [1000]) + test_prepay_credits = self._get_test_prepay_credits(["2024-01"], ["G1"], [1500]) + test_prepay_debits = self._get_test_prepay_debits([], [], []) + test_prepay_debits.to_csv(self.test_prepay_debits_file.name, index=False) + test_prepay_projects = self._get_test_prepay_projects( + ["G1"], ["P1"], ["2024-09"], ["2024-12"] + ) + test_prepay_contacts = self._get_test_prepay_contacts( + ["G1"], ["G1@bu.edu"], [True] + ) + + answer_invoice = test_invoice.copy() + answer_invoice["Prepaid Group Name"] = ["G1"] + answer_invoice["Prepaid Group Institution"] = ["Boston University"] + answer_invoice["MGHPCC Managed"] = [True] + answer_invoice["Prepaid Group Balance"] = [500] + answer_invoice["Prepaid Group Used"] = [1000] + answer_invoice["Invoice Email"] = ["G1@bu.edu"] + answer_invoice["PI Balance"] = [0] + answer_invoice["Balance"] = [0] + + answer_prepay_debits = self._get_test_prepay_debits( + [invoice_month], ["G1"], [1000] + ) + + self._assert_result_invoice( + test_invoice.copy(), + test_prepay_credits, + self.test_prepay_debits_file.name, + test_prepay_projects, + test_prepay_contacts, + answer_invoice, + answer_prepay_debits, + invoice_month, + ) + + # Is the output invoice and debits the same if + # processor is ran twice with same invoice but updated debits? + self._assert_result_invoice( + test_invoice, + test_prepay_credits, + self.test_prepay_debits_file.name, + test_prepay_projects, + test_prepay_contacts, + answer_invoice, + answer_prepay_debits, + invoice_month, + ) + + def test_project_active_periods(self): + """How is prepay handled for 2 projects in same group in different billing months?""" + # Prepay projects not in active period + project_names = ["P1", "P2"] + + invoice_month = "2024-06" + test_invoice = self._get_test_invoice(project_names, [1000, 2000]) + test_prepay_credits = self._get_test_prepay_credits(["2024-04"], ["G1"], [5000]) + test_prepay_debits = self._get_test_prepay_debits([], [], []) + test_prepay_debits.to_csv(self.test_prepay_debits_file.name, index=False) + test_prepay_projects = self._get_test_prepay_projects( + ["G1", "G1"], project_names, ["2024-08", "2024-10"], ["2024-12", "2025-02"] + ) + test_prepay_contacts = self._get_test_prepay_contacts( + ["G1"], ["G1@bu.edu"], [True] + ) + + answer_invoice = test_invoice.copy() + answer_invoice["Prepaid Group Name"] = [None, None] + answer_invoice["Prepaid Group Institution"] = [None, None] + answer_invoice["MGHPCC Managed"] = [None, None] + answer_invoice["Prepaid Group Balance"] = [None, None] + answer_invoice["Prepaid Group Used"] = [None, None] + + answer_prepay_debits = test_prepay_debits.copy() + + self._assert_result_invoice( + test_invoice.copy(), + test_prepay_credits, + self.test_prepay_debits_file.name, + test_prepay_projects, + test_prepay_contacts, + answer_invoice, + answer_prepay_debits, + invoice_month, + ) + + # One project in active period + invoice_month = "2024-08" + answer_invoice["Prepaid Group Name"] = ["G1", None] + answer_invoice["Prepaid Group Institution"] = ["Boston University", None] + answer_invoice["MGHPCC Managed"] = [True, None] + answer_invoice["Prepaid Group Balance"] = [4000, None] + answer_invoice["Prepaid Group Used"] = [1000, None] + answer_invoice["Invoice Email"] = ["G1@bu.edu", None] + answer_invoice["PI Balance"] = [0, 2000] + answer_invoice["Balance"] = [0, 2000] + + test_prepay_debits.to_csv( + self.test_prepay_debits_file.name, index=False + ) # Resetting debit file + answer_prepay_debits = self._get_test_prepay_debits( + [invoice_month], ["G1"], [1000] + ) + + self._assert_result_invoice( + test_invoice.copy(), + test_prepay_credits, + self.test_prepay_debits_file.name, + test_prepay_projects, + test_prepay_contacts, + answer_invoice, + answer_prepay_debits, + invoice_month, + ) + + # Both projects in active period + invoice_month = "2024-12" + answer_invoice["Prepaid Group Name"] = ["G1", "G1"] + answer_invoice["Prepaid Group Institution"] = [ + "Boston University", + "Boston University", + ] + answer_invoice["MGHPCC Managed"] = [True, True] + answer_invoice["Prepaid Group Balance"] = [2000, 2000] + answer_invoice["Prepaid Group Used"] = [1000, 2000] + answer_invoice["Invoice Email"] = ["G1@bu.edu", "G1@bu.edu"] + answer_invoice["PI Balance"] = [0, 0] + answer_invoice["Balance"] = [0, 0] + + test_prepay_debits.to_csv(self.test_prepay_debits_file.name, index=False) + answer_prepay_debits = self._get_test_prepay_debits( + [invoice_month], ["G1"], [3000] + ) + + self._assert_result_invoice( + test_invoice.copy(), + test_prepay_credits, + self.test_prepay_debits_file.name, + test_prepay_projects, + test_prepay_contacts, + answer_invoice, + answer_prepay_debits, + invoice_month, + ) + + # Both projects in active period, but before credits were given + test_prepay_credits = self._get_test_prepay_credits(["2026-04"], ["G1"], [5000]) + + # Still has group info, but group balance should be 0 + answer_invoice["Prepaid Group Balance"] = [0, 0] + answer_invoice["Prepaid Group Used"] = [None, None] + answer_invoice["PI Balance"] = [1000, 2000] + answer_invoice["Balance"] = [1000, 2000] + + test_prepay_debits.to_csv(self.test_prepay_debits_file.name, index=False) + answer_prepay_debits = self._get_test_prepay_debits([], [], []) + + self._assert_result_invoice( + test_invoice.copy(), + test_prepay_credits, + self.test_prepay_debits_file.name, + test_prepay_projects, + test_prepay_contacts, + answer_invoice, + answer_prepay_debits, + invoice_month, + ) + + def test_one_group_two_project_balances(self): + """Different scenarios for 2 projects' balances""" + # Prepayment partially covers projects + project_names = ["P1", "P2"] + + invoice_month = "2024-10" + test_invoice = self._get_test_invoice(project_names, [1000, 2000]) + test_prepay_credits = self._get_test_prepay_credits(["2024-04"], ["G1"], [1500]) + test_prepay_debits = self._get_test_prepay_debits([], [], []) + test_prepay_debits.to_csv(self.test_prepay_debits_file.name, index=False) + test_prepay_projects = self._get_test_prepay_projects( + ["G1", "G1"], project_names, ["2024-08", "2024-08"], ["2024-10", "2025-02"] + ) + test_prepay_contacts = self._get_test_prepay_contacts( + ["G1"], ["G1@bu.edu"], [True] + ) + + answer_invoice = test_invoice.copy() + answer_invoice["Prepaid Group Name"] = ["G1", "G1"] + answer_invoice["Prepaid Group Institution"] = [ + "Boston University", + "Boston University", + ] + answer_invoice["MGHPCC Managed"] = [True, True] + answer_invoice["Prepaid Group Balance"] = [0, 0] + answer_invoice["Prepaid Group Used"] = [1000, 500] + answer_invoice["Invoice Email"] = ["G1@bu.edu", "G1@bu.edu"] + answer_invoice["PI Balance"] = [0, 1500] + answer_invoice["Balance"] = [0, 1500] + + answer_prepay_debits = self._get_test_prepay_debits( + [invoice_month], ["G1"], [1500] + ) + + self._assert_result_invoice( + test_invoice, + test_prepay_credits, + self.test_prepay_debits_file.name, + test_prepay_projects, + test_prepay_contacts, + answer_invoice, + answer_prepay_debits, + invoice_month, + ) + + # PI balance != Balance + test_invoice = self._get_test_invoice(project_names, [1000, 2000], [2000, 2500]) + + answer_invoice["Balance"] = [1000, 2000] + + self._assert_result_invoice( + test_invoice, + test_prepay_credits, + self.test_prepay_debits_file.name, + test_prepay_projects, + test_prepay_contacts, + answer_invoice, + answer_prepay_debits, + invoice_month, + ) + + def test_two_group_one_project(self): + """How is prepay handled for two different groups with different credits and debits?""" + # Invoice month is before any credits are given + project_names = ["G1P1", "G2P1"] + + invoice_month = "2024-03" + test_invoice = self._get_test_invoice(project_names, [1000, 2000]) + test_prepay_credits = self._get_test_prepay_credits( + ["2024-04", "2024-04", "2024-06", "2024-08", "2024-10"], + ["G1", "G2", "G1", "G2", "G1"], + [700, 800, 1000, 2000, 3500], + ) + test_prepay_debits = self._get_test_prepay_debits( + ["2024-05", "2024-06", "2024-07", "2024-10"], + ["G1", "G2", "G2", "G1"], + [200, 300, 1000, 2000], + ) + test_prepay_debits.to_csv(self.test_prepay_debits_file.name, index=False) + test_prepay_projects = self._get_test_prepay_projects( + ["G1", "G2"], project_names, ["2024-01", "2024-01"], ["2024-12", "2024-12"] + ) + test_prepay_contacts = self._get_test_prepay_contacts( + ["G1", "G2"], ["G1@bu.edu", "G2@harvard.edu"], [True, False] + ) + + answer_invoice = test_invoice.copy() + answer_invoice["Prepaid Group Name"] = ["G1", "G2"] + answer_invoice["Prepaid Group Institution"] = [ + "Boston University", + "Harvard University", + ] + answer_invoice["MGHPCC Managed"] = [True, False] + answer_invoice["Prepaid Group Balance"] = [0, 0] + answer_invoice["Prepaid Group Used"] = [None, None] + answer_invoice["Invoice Email"] = ["G1@bu.edu", "G2@harvard.edu"] + + answer_prepay_debits = test_prepay_debits.copy() + + self._assert_result_invoice( + test_invoice.copy(), + test_prepay_credits, + self.test_prepay_debits_file.name, + test_prepay_projects, + test_prepay_contacts, + answer_invoice, + answer_prepay_debits, + invoice_month, + ) + + # Invoice month is after some credits and debits are given + invoice_month = "2024-08" + answer_invoice["Prepaid Group Balance"] = [500, 0] + answer_invoice["Prepaid Group Used"] = [1000, 1500] + answer_invoice["PI Balance"] = [0, 500] + answer_invoice["Balance"] = answer_invoice["PI Balance"] + + answer_prepay_debits = test_prepay_debits.copy() + answer_prepay_debits = pandas.concat( + [ + answer_prepay_debits, + self._get_test_prepay_debits( + ["2024-08", "2024-08"], ["G1", "G2"], [1000, 1500] + ), + ], + axis=0, + ).sort_values("Month", ignore_index=True) + + self._assert_result_invoice( + test_invoice.copy(), + test_prepay_credits, + self.test_prepay_debits_file.name, + test_prepay_projects, + test_prepay_contacts, + answer_invoice, + answer_prepay_debits, + invoice_month, + ) + + # Invoice month after all credits and debits are given. Debit entry should overwritten + invoice_month = "2024-10" + # Reset the debit file as it has been edited from previous test case + test_prepay_debits.to_csv(self.test_prepay_debits_file.name, index=False) + + answer_invoice["Prepaid Group Balance"] = [4000, 0] + answer_invoice["Prepaid Group Used"] = [1000, 1500] + answer_invoice["PI Balance"] = [0, 500] + answer_invoice["Balance"] = answer_invoice["PI Balance"] + + answer_prepay_debits = self._get_test_prepay_debits( + ["2024-05", "2024-06", "2024-07", "2024-10", "2024-10"], + ["G1", "G2", "G2", "G1", "G2"], + [200, 300, 1000, 1000, 1500], + ) + + self._assert_result_invoice( + test_invoice.copy(), + test_prepay_credits, + self.test_prepay_debits_file.name, + test_prepay_projects, + test_prepay_contacts, + answer_invoice, + answer_prepay_debits, + invoice_month, + ) + + class TestLenovoProcessor(TestCase): def test_process_lenovo(self): test_invoice = pandas.DataFrame( diff --git a/process_report/tests/util.py b/process_report/tests/util.py index cfe0bbb..057b9e5 100644 --- a/process_report/tests/util.py +++ b/process_report/tests/util.py @@ -13,6 +13,7 @@ validate_billable_pi_processor, new_pi_credit_processor, bu_subsidy_processor, + prepayment_processor, ) @@ -140,3 +141,31 @@ def new_bu_subsidy_processor( return bu_subsidy_processor.BUSubsidyProcessor( name, invoice_month, data, subsidy_amount ) + + +def new_prepayment_processor( + name="", + invoice_month="0000-00", + data=None, + prepay_credits=None, + prepay_debits_filepath="", + prepay_projects=None, + prepay_contacts=None, + upload_to_s3=False, +): + if prepay_credits is None: + prepay_credits = pandas.DataFrame() + if prepay_projects is None: + prepay_projects = pandas.DataFrame() + if prepay_contacts is None: + prepay_contacts = pandas.DataFrame() + return prepayment_processor.PrepaymentProcessor( + name, + invoice_month, + data, + prepay_credits, + prepay_projects, + prepay_contacts, + prepay_debits_filepath, + upload_to_s3, + ) diff --git a/process_report/util.py b/process_report/util.py index 41c7419..fd96bbd 100644 --- a/process_report/util.py +++ b/process_report/util.py @@ -34,6 +34,28 @@ def load_institute_list(): return institute_list +def get_institute_mapping(institute_list: list): + institute_map = dict() + for institute_info in institute_list: + for domain in institute_info["domains"]: + institute_map[domain] = institute_info["display_name"] + + return institute_map + + +def get_institution_from_pi(institute_map, pi_uname): + institution_domain = pi_uname.split("@")[-1] + for i in range(institution_domain.count(".") + 1): + if institution_name := institute_map.get(institution_domain, ""): + break + institution_domain = institution_domain[institution_domain.find(".") + 1 :] + + if institution_name == "": + print(f"Warning: PI name {pi_uname} does not match any institution!") + + return institution_name + + def get_iso8601_time(): return datetime.datetime.now().strftime("%Y%m%dT%H%M%SZ")