From 23b3a8ae0100aa5266cbecf1e509a7270a7eb91b Mon Sep 17 00:00:00 2001 From: Quan Pham Date: Wed, 4 Dec 2024 11:32:15 -0500 Subject: [PATCH] Invoices no longer assign to `self.data` during filtering step Instead, all invoice classes will now assign to `self.export_data` This removes ambiguity on whether the invoices will modify their internal dataframe and the need to call `copy()` on the processed dataframe --- process_report/invoices/NERC_total_invoice.py | 6 +++--- process_report/invoices/billable_invoice.py | 2 +- process_report/invoices/bu_internal_invoice.py | 8 ++++---- process_report/invoices/invoice.py | 7 ++++--- process_report/invoices/lenovo_invoice.py | 2 +- process_report/invoices/nonbillable_invoice.py | 2 +- process_report/invoices/pi_specific_invoice.py | 4 ++-- process_report/process_report.py | 12 ++++++------ process_report/tests/unit_tests.py | 6 ++++-- 9 files changed, 26 insertions(+), 23 deletions(-) diff --git a/process_report/invoices/NERC_total_invoice.py b/process_report/invoices/NERC_total_invoice.py index 92982da..e39e49b 100644 --- a/process_report/invoices/NERC_total_invoice.py +++ b/process_report/invoices/NERC_total_invoice.py @@ -51,9 +51,9 @@ def output_s3_archive_key(self): return f"Invoices/{self.invoice_month}/Archive/NERC-{self.invoice_month}-Total-Invoice {util.get_iso8601_time()}.csv" def _prepare_export(self): - self.data = self.data[ + self.export_data = self.data[ self.data[invoice.IS_BILLABLE_FIELD] & ~self.data[invoice.MISSING_PI_FIELD] ] - self.data = self.data[ - self.data[invoice.INSTITUTION_FIELD].isin(self.INCLUDED_INSTITUTIONS) + self.export_data = self.export_data[ + self.export_data[invoice.INSTITUTION_FIELD].isin(self.INCLUDED_INSTITUTIONS) ].copy() diff --git a/process_report/invoices/billable_invoice.py b/process_report/invoices/billable_invoice.py index 1761cc8..8bab38a 100644 --- a/process_report/invoices/billable_invoice.py +++ b/process_report/invoices/billable_invoice.py @@ -42,7 +42,7 @@ class BillableInvoice(invoice.Invoice): ] def _prepare_export(self): - self.data = self.data[ + self.export_data = self.data[ self.data[invoice.IS_BILLABLE_FIELD] & ~self.data[invoice.MISSING_PI_FIELD] ] self.updated_old_pi_df = self.updated_old_pi_df.astype( diff --git a/process_report/invoices/bu_internal_invoice.py b/process_report/invoices/bu_internal_invoice.py index 5bd820c..0db03b9 100644 --- a/process_report/invoices/bu_internal_invoice.py +++ b/process_report/invoices/bu_internal_invoice.py @@ -24,13 +24,13 @@ class BUInternalInvoice(invoice.Invoice): exported_columns_map = {invoice.PI_BALANCE_FIELD: "Balance"} def _prepare_export(self): - self.data = self.data[ + self.export_data = self.data[ self.data[invoice.IS_BILLABLE_FIELD] & ~self.data[invoice.MISSING_PI_FIELD] ] - self.data = self.data[ - self.data[invoice.INSTITUTION_FIELD] == "Boston University" + self.export_data = self.export_data[ + self.export_data[invoice.INSTITUTION_FIELD] == "Boston University" ] - self.data = self._sum_project_allocations(self.data) + self.export_data = self._sum_project_allocations(self.export_data) def _sum_project_allocations(self, dataframe): """A project may have multiple allocations, and therefore multiple rows diff --git a/process_report/invoices/invoice.py b/process_report/invoices/invoice.py index 7ffb6d8..47797c8 100644 --- a/process_report/invoices/invoice.py +++ b/process_report/invoices/invoice.py @@ -49,6 +49,7 @@ class Invoice: name: str invoice_month: str data: pandas.DataFrame + export_data = None def process(self): self._prepare() @@ -93,13 +94,13 @@ def _prepare_export(self): def _filter_columns(self): """Filters and renames columns before exporting""" - return self.data.copy()[self.export_columns_list].rename( + self.export_data = self.export_data[self.export_columns_list].rename( columns=self.exported_columns_map ) def export(self): - export_data = self._filter_columns() - export_data.to_csv(self.output_path, index=False) + self._filter_columns() + self.export_data.to_csv(self.output_path, index=False) def export_s3(self, s3_bucket): s3_bucket.upload_file(self.output_path, self.output_s3_key) diff --git a/process_report/invoices/lenovo_invoice.py b/process_report/invoices/lenovo_invoice.py index 1fbb5c7..fe96973 100644 --- a/process_report/invoices/lenovo_invoice.py +++ b/process_report/invoices/lenovo_invoice.py @@ -19,6 +19,6 @@ class LenovoInvoice(invoice.Invoice): exported_columns_map = {invoice.SU_HOURS_FIELD: "SU Hours"} def _prepare_export(self): - self.data = self.data[ + self.export_data = self.data[ self.data[invoice.SU_TYPE_FIELD].isin(self.LENOVO_SU_TYPES) ] diff --git a/process_report/invoices/nonbillable_invoice.py b/process_report/invoices/nonbillable_invoice.py index 578e1d2..350e290 100644 --- a/process_report/invoices/nonbillable_invoice.py +++ b/process_report/invoices/nonbillable_invoice.py @@ -24,4 +24,4 @@ class NonbillableInvoice(invoice.Invoice): ] def _prepare_export(self): - self.data = self.data[~self.data[invoice.IS_BILLABLE_FIELD]] + self.export_data = self.data[~self.data[invoice.IS_BILLABLE_FIELD]] diff --git a/process_report/invoices/pi_specific_invoice.py b/process_report/invoices/pi_specific_invoice.py index dabc0a0..5fc2e1a 100644 --- a/process_report/invoices/pi_specific_invoice.py +++ b/process_report/invoices/pi_specific_invoice.py @@ -34,10 +34,10 @@ class PIInvoice(invoice.Invoice): ] def _prepare(self): - self.data = self.data[ + self.export_data = self.data[ self.data[invoice.IS_BILLABLE_FIELD] & ~self.data[invoice.MISSING_PI_FIELD] ] - self.pi_list = self.data[invoice.PI_FIELD].unique() + self.pi_list = self.export_data[invoice.PI_FIELD].unique() def export(self): def _export_pi_invoice(pi): diff --git a/process_report/process_report.py b/process_report/process_report.py index 49b7644..5a1ed77 100644 --- a/process_report/process_report.py +++ b/process_report/process_report.py @@ -255,12 +255,12 @@ def main(): lenovo_inv = lenovo_invoice.LenovoInvoice( name=args.Lenovo_file, invoice_month=invoice_month, - data=processed_data.copy(), + data=processed_data, ) nonbillable_inv = nonbillable_invoice.NonbillableInvoice( name=args.nonbillable_file, invoice_month=invoice_month, - data=processed_data.copy(), + data=processed_data, nonbillable_pis=pi, nonbillable_projects=projects, ) @@ -271,7 +271,7 @@ def main(): billable_inv = billable_invoice.BillableInvoice( name=args.output_file, invoice_month=invoice_month, - data=processed_data.copy(), + data=processed_data, old_pi_filepath=old_pi_file, updated_old_pi_df=new_pi_credit_proc.updated_old_pi_df, ) @@ -279,17 +279,17 @@ def main(): nerc_total_inv = NERC_total_invoice.NERCTotalInvoice( name=args.NERC_total_invoice_file, invoice_month=invoice_month, - data=processed_data.copy(), + data=processed_data, ) bu_internal_inv = bu_internal_invoice.BUInternalInvoice( name=args.BU_invoice_file, invoice_month=invoice_month, - data=processed_data.copy(), + data=processed_data, ) pi_inv = pi_specific_invoice.PIInvoice( - name=args.output_folder, invoice_month=invoice_month, data=processed_data.copy() + name=args.output_folder, invoice_month=invoice_month, data=processed_data ) util.process_and_export_invoices( diff --git a/process_report/tests/unit_tests.py b/process_report/tests/unit_tests.py index 315ecaf..6ef99e6 100644 --- a/process_report/tests/unit_tests.py +++ b/process_report/tests/unit_tests.py @@ -1036,9 +1036,11 @@ class TestBaseInvoice(TestCase): def test_filter_exported_columns(self): test_invoice = pandas.DataFrame(columns=["C1", "C2", "C3", "C4", "C5"]) answer_invoice = pandas.DataFrame(columns=["C1", "C3R", "C5R"]) - inv = test_utils.new_base_invoice(data=test_invoice) + inv = test_utils.new_base_invoice() + inv.export_data = test_invoice inv.export_columns_list = ["C1", "C3", "C5"] inv.exported_columns_map = {"C3": "C3R", "C5": "C5R"} - result_invoice = inv._filter_columns() + inv._filter_columns() + result_invoice = inv.export_data self.assertTrue(result_invoice.equals(answer_invoice))