From 7812e4a006ed71a36b600b6eeae45c0a82ed5ed1 Mon Sep 17 00:00:00 2001 From: Quan Pham Date: Wed, 18 Sep 2024 15:28:46 -0400 Subject: [PATCH 1/2] Initialized processor for Lenovo processing --- process_report/processors/lenovo_processor.py | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 process_report/processors/lenovo_processor.py diff --git a/process_report/processors/lenovo_processor.py b/process_report/processors/lenovo_processor.py new file mode 100644 index 0000000..2a4b162 --- /dev/null +++ b/process_report/processors/lenovo_processor.py @@ -0,0 +1,30 @@ +from dataclasses import dataclass + + +from process_report.invoices import invoice +from process_report.processors import processor + + +@dataclass +class LenovoProcessor(processor.Processor): + LENOVO_SU_TYPES = ["OpenShift GPUA100SXM4", "OpenStack GPUA100SXM4"] + SU_CHARGE_MULTIPLIER = 1 + + def _prepare(self): + self.data = self.data[ + self.data[invoice.SU_TYPE_FIELD].isin(self.LENOVO_SU_TYPES) + ][ + [ + invoice.INVOICE_DATE_FIELD, + invoice.PROJECT_FIELD, + invoice.INSTITUTION_FIELD, + invoice.SU_HOURS_FIELD, + invoice.SU_TYPE_FIELD, + ] + ].copy() + + self.data.rename(columns={invoice.SU_HOURS_FIELD: "SU Hours"}, inplace=True) + self.data.insert(len(self.data.columns), "SU Charge", self.SU_CHARGE_MULTIPLIER) + + def _process(self): + self.data["Charge"] = self.data["SU Hours"] * self.data["SU Charge"] From d0d0d65f806ae8e986efa5383f1dd21ef184d9b8 Mon Sep 17 00:00:00 2001 From: Quan Pham Date: Wed, 18 Sep 2024 15:53:11 -0400 Subject: [PATCH 2/2] Implemented processor for Lenovo processing Note that, for now, only the Lenovo invoice will take the processed data from the `LenovoProcessor`. All other invoices will take the data from `AddInstituteProcessor`. This is due to the processors adding new columns. This odd code design will be removed once invoices gain the feature to filter out their exported columns. --- process_report/invoices/invoice.py | 2 + process_report/invoices/lenovo_invoice.py | 11 +-- process_report/process_report.py | 8 ++- process_report/processors/lenovo_processor.py | 22 ++---- process_report/tests/unit_tests.py | 71 ++++--------------- process_report/tests/util.py | 5 ++ 6 files changed, 34 insertions(+), 85 deletions(-) diff --git a/process_report/invoices/invoice.py b/process_report/invoices/invoice.py index 7a199d4..de2ef48 100644 --- a/process_report/invoices/invoice.py +++ b/process_report/invoices/invoice.py @@ -23,6 +23,8 @@ INSTITUTION_ID_FIELD = "Institution - Specific Code" SU_HOURS_FIELD = "SU Hours (GBhr or SUhr)" SU_TYPE_FIELD = "SU Type" +SU_CHARGE_FIELD = "SU Charge" +LENOVO_CHARGE_FIELD = "Charge" RATE_FIELD = "Rate" COST_FIELD = "Cost" CREDIT_FIELD = "Credit" diff --git a/process_report/invoices/lenovo_invoice.py b/process_report/invoices/lenovo_invoice.py index 80e6786..1fbb5c7 100644 --- a/process_report/invoices/lenovo_invoice.py +++ b/process_report/invoices/lenovo_invoice.py @@ -6,7 +6,6 @@ @dataclass class LenovoInvoice(invoice.Invoice): LENOVO_SU_TYPES = ["OpenShift GPUA100SXM4", "OpenStack GPUA100SXM4"] - SU_CHARGE_MULTIPLIER = 1 export_columns_list = [ invoice.INVOICE_DATE_FIELD, @@ -14,17 +13,11 @@ class LenovoInvoice(invoice.Invoice): invoice.INSTITUTION_FIELD, invoice.SU_HOURS_FIELD, invoice.SU_TYPE_FIELD, - "SU Charge", - "Charge", + invoice.SU_CHARGE_FIELD, + invoice.LENOVO_CHARGE_FIELD, ] exported_columns_map = {invoice.SU_HOURS_FIELD: "SU Hours"} - def _prepare(self): - self.data["SU Charge"] = self.SU_CHARGE_MULTIPLIER - - def _process(self): - self.data["Charge"] = self.data[invoice.SU_HOURS_FIELD] * self.data["SU Charge"] - def _prepare_export(self): self.data = self.data[ self.data[invoice.SU_TYPE_FIELD].isin(self.LENOVO_SU_TYPES) diff --git a/process_report/process_report.py b/process_report/process_report.py index be378eb..61463c4 100644 --- a/process_report/process_report.py +++ b/process_report/process_report.py @@ -18,6 +18,7 @@ from process_report.processors import ( validate_pi_alias_processor, add_institution_processor, + lenovo_processor, ) ### PI file field names @@ -215,7 +216,12 @@ def main(): ) add_institute_proc.process() - preliminary_processed_data = add_institute_proc.data + lenovo_proc = lenovo_processor.LenovoProcessor( + "", invoice_month, add_institute_proc.data + ) + lenovo_proc.process() + + preliminary_processed_data = lenovo_proc.data ### Finish preliminary processing diff --git a/process_report/processors/lenovo_processor.py b/process_report/processors/lenovo_processor.py index 2a4b162..a4bf792 100644 --- a/process_report/processors/lenovo_processor.py +++ b/process_report/processors/lenovo_processor.py @@ -7,24 +7,10 @@ @dataclass class LenovoProcessor(processor.Processor): - LENOVO_SU_TYPES = ["OpenShift GPUA100SXM4", "OpenStack GPUA100SXM4"] SU_CHARGE_MULTIPLIER = 1 - def _prepare(self): - self.data = self.data[ - self.data[invoice.SU_TYPE_FIELD].isin(self.LENOVO_SU_TYPES) - ][ - [ - invoice.INVOICE_DATE_FIELD, - invoice.PROJECT_FIELD, - invoice.INSTITUTION_FIELD, - invoice.SU_HOURS_FIELD, - invoice.SU_TYPE_FIELD, - ] - ].copy() - - self.data.rename(columns={invoice.SU_HOURS_FIELD: "SU Hours"}, inplace=True) - self.data.insert(len(self.data.columns), "SU Charge", self.SU_CHARGE_MULTIPLIER) - def _process(self): - self.data["Charge"] = self.data["SU Hours"] * self.data["SU Charge"] + self.data[invoice.SU_CHARGE_FIELD] = self.SU_CHARGE_MULTIPLIER + self.data[invoice.LENOVO_CHARGE_FIELD] = ( + self.data[invoice.SU_HOURS_FIELD] * self.data[invoice.SU_CHARGE_FIELD] + ) diff --git a/process_report/tests/unit_tests.py b/process_report/tests/unit_tests.py index 858600c..aff8c17 100644 --- a/process_report/tests/unit_tests.py +++ b/process_report/tests/unit_tests.py @@ -8,7 +8,7 @@ from textwrap import dedent from process_report import process_report, util -from process_report.invoices import lenovo_invoice, nonbillable_invoice +from process_report.invoices import nonbillable_invoice from process_report.tests import util as test_utils @@ -735,65 +735,22 @@ def test_validate_billables(self): ) -class TestExportLenovo(TestCase): - def setUp(self): - data = { - "Invoice Month": [ - "2023-01", - "2023-01", - "2023-01", - "2023-01", - "2023-01", - "2023-01", - ], - "Project - Allocation": [ - "ProjectA", - "ProjectB", - "ProjectC", - "ProjectD", - "ProjectE", - "ProjectF", - ], - "Institution": ["A", "B", "C", "D", "E", "F"], - "SU Hours (GBhr or SUhr)": [1, 10, 100, 4, 432, 10], - "SU Type": [ - "OpenShift GPUA100SXM4", - "OpenShift GPUA100", - "OpenShift GPUA100SXM4", - "OpenStack GPUA100SXM4", - "OpenStack CPU", - "OpenStack GPUK80", - ], - } - self.lenovo_invoice = lenovo_invoice.LenovoInvoice( - "Lenovo", "2023-01", pandas.DataFrame(data) - ) - self.lenovo_invoice.process() - +class TestLenovoProcessor(TestCase): def test_process_lenovo(self): - output_df = self.lenovo_invoice.data - self.assertTrue( - set( - [ - process_report.INVOICE_DATE_FIELD, - process_report.PROJECT_FIELD, - process_report.INSTITUTION_FIELD, - process_report.SU_TYPE_FIELD, - process_report.SU_HOURS_FIELD, - "SU Charge", - "Charge", - ] - ).issubset(output_df) + test_invoice = pandas.DataFrame( + { + "SU Hours (GBhr or SUhr)": [1, 10, 100, 4, 432, 10], + } + ) + answer_invoice = test_invoice.copy() + answer_invoice["SU Charge"] = 1 + answer_invoice["Charge"] = ( + answer_invoice["SU Hours (GBhr or SUhr)"] * answer_invoice["SU Charge"] ) - for i, row in output_df.iterrows(): - self.assertIn( - row[process_report.SU_TYPE_FIELD], - ["OpenShift GPUA100SXM4", "OpenStack GPUA100SXM4"], - ) - self.assertEqual( - row["Charge"], row["SU Charge"] * row["SU Hours (GBhr or SUhr)"] - ) + lenovo_proc = test_utils.new_lenovo_processor(data=test_invoice) + lenovo_proc.process() + self.assertTrue(lenovo_proc.data.equals(answer_invoice)) class TestUploadToS3(TestCase): diff --git a/process_report/tests/util.py b/process_report/tests/util.py index 81a27e1..124b051 100644 --- a/process_report/tests/util.py +++ b/process_report/tests/util.py @@ -10,6 +10,7 @@ from process_report.processors import ( add_institution_processor, validate_pi_alias_processor, + lenovo_processor, ) @@ -75,3 +76,7 @@ def new_validate_pi_alias_processor( return validate_pi_alias_processor.ValidatePIAliasProcessor( name, invoice_month, data, alias_map ) + + +def new_lenovo_processor(name="", invoice_month="0000-00", data=pandas.DataFrame()): + return lenovo_processor.LenovoProcessor(name, invoice_month, data)