diff --git a/process_report/invoices/invoice.py b/process_report/invoices/invoice.py new file mode 100644 index 0000000..446a6f6 --- /dev/null +++ b/process_report/invoices/invoice.py @@ -0,0 +1,78 @@ +from dataclasses import dataclass +import pandas + +import process_report.util as util + + +### Invoice field names +INVOICE_DATE_FIELD = "Invoice Month" +PROJECT_FIELD = "Project - Allocation" +PROJECT_ID_FIELD = "Project - Allocation ID" +PI_FIELD = "Manager (PI)" +INVOICE_EMAIL_FIELD = "Invoice Email" +INVOICE_ADDRESS_FIELD = "Invoice Address" +INSTITUTION_FIELD = "Institution" +INSTITUTION_ID_FIELD = "Institution - Specific Code" +SU_HOURS_FIELD = "SU Hours (GBhr or SUhr)" +SU_TYPE_FIELD = "SU Type" +COST_FIELD = "Cost" +CREDIT_FIELD = "Credit" +CREDIT_CODE_FIELD = "Credit Code" +SUBSIDY_FIELD = "Subsidy" +BALANCE_FIELD = "Balance" +### + + +@dataclass +class Invoice: + name: str + invoice_month: str + data: pandas.DataFrame + + def process(self): + self._prepare() + self._process() + self._prepare_export() + + @property + def output_path(self) -> str: + return f"{self.name} {self.invoice_month}.csv" + + @property + def output_s3_key(self) -> str: + return f"Invoices/{self.invoice_month}/{self.name} {self.invoice_month}.csv" + + @property + def output_s3_archive_key(self): + return f"Invoices/{self.invoice_month}/Archive/{self.name} {self.invoice_month} {util.get_iso8601_time()}.csv" + + def _prepare(self): + """Prepares the data for processing. + + Implement in subclass if necessary. May add or remove columns + necessary for processing, add or remove rows, validate the data, or + perform simple substitutions. + """ + pass + + def _process(self): + """Processes the data. + + Implement in subclass if necessary. Performs necessary calculations + on the data, e.g. applying subsidies or credits. + """ + pass + + def _prepare_export(self): + """Prepares the data for export. + + Implement in subclass if necessary. May add or remove columns or rows + that should or should not be exported after processing.""" + pass + + def export(self): + self.data.to_csv(self.output_path) + + def export_s3(self, s3_bucket): + s3_bucket.upload_file(self.output_path, self.output_s3_key) + s3_bucket.upload_file(self.output_path, self.output_s3_archive_key) diff --git a/process_report/invoices/lenovo_invoice.py b/process_report/invoices/lenovo_invoice.py new file mode 100644 index 0000000..fa3355f --- /dev/null +++ b/process_report/invoices/lenovo_invoice.py @@ -0,0 +1,28 @@ +from dataclasses import dataclass + +import process_report.invoices.invoice as invoice + + +@dataclass +class LenovoInvoice(invoice.Invoice): + LENOVO_SU_TYPES = ["OpenShift GPUA100SXM4", "OpenStack GPUA100SXM4"] + SU_CHARGE_MULTIPLIER = 1 + + def _prepare(self): + self.data = self.data[ + self.data[invoice.SU_TYPE_FIELD].isin(self.LENOVO_SU_TYPES) + ][ + [ + invoice.INVOICE_DATE_FIELD, + invoice.PROJECT_FIELD, + invoice.INSTITUTION_FIELD, + invoice.SU_HOURS_FIELD, + invoice.SU_TYPE_FIELD, + ] + ].copy() + + self.data.rename(columns={invoice.SU_HOURS_FIELD: "SU Hours"}, inplace=True) + self.data.insert(len(self.data.columns), "SU Charge", self.SU_CHARGE_MULTIPLIER) + + def _process(self): + self.data["Charge"] = self.data["SU Hours"] * self.data["SU Charge"] diff --git a/process_report/invoices/nonbillable_invoice.py b/process_report/invoices/nonbillable_invoice.py new file mode 100644 index 0000000..701d308 --- /dev/null +++ b/process_report/invoices/nonbillable_invoice.py @@ -0,0 +1,15 @@ +from dataclasses import dataclass + +import process_report.invoices.invoice as invoice + + +@dataclass +class NonbillableInvoice(invoice.Invoice): + nonbillable_pis: list[str] + nonbillable_projects: list[str] + + def _prepare_export(self): + self.data = self.data[ + self.data[invoice.PI_FIELD].isin(self.nonbillable_pis) + | self.data[invoice.PROJECT_FIELD].isin(self.nonbillable_projects) + ] diff --git a/process_report/process_report.py b/process_report/process_report.py index 0a412a3..ffdd6c6 100644 --- a/process_report/process_report.py +++ b/process_report/process_report.py @@ -9,6 +9,8 @@ import boto3 import pyarrow +from process_report.invoices import lenovo_invoice, nonbillable_invoice + ### Invoice field names INVOICE_DATE_FIELD = "Invoice Month" @@ -150,7 +152,7 @@ def main(): parser.add_argument( "--nonbillable-file", required=False, - default="nonbillable.csv", + default="nonbillable", help="Name of nonbillable file", ) parser.add_argument( @@ -180,7 +182,7 @@ def main(): parser.add_argument( "--Lenovo-file", required=False, - default="Lenovo.csv", + default="Lenovo", help="Name of output csv for Lenovo SU Types invoice", ) parser.add_argument( @@ -225,8 +227,22 @@ def main(): projects = list(set(projects + timed_projects_list)) merged_dataframe = add_institution(merged_dataframe) - export_lenovo(merged_dataframe, args.Lenovo_file) - remove_billables(merged_dataframe, pi, projects, args.nonbillable_file) + lenovo_inv = lenovo_invoice.LenovoInvoice( + name=args.Lenovo_file, invoice_month=invoice_month, data=merged_dataframe.copy() + ) + nonbillable_inv = nonbillable_invoice.NonbillableInvoice( + name=args.nonbillable_file, + invoice_month=invoice_month, + data=merged_dataframe.copy(), + nonbillable_pis=pi, + nonbillable_projects=projects, + ) + for invoice in [lenovo_inv, nonbillable_inv]: + invoice.process() + invoice.export() + if args.upload_to_s3: + bucket = get_invoice_bucket() + invoice.export_s3(bucket) billable_projects = remove_non_billables(merged_dataframe, pi, projects) billable_projects = validate_pi_names(billable_projects) @@ -239,9 +255,7 @@ def main(): if args.upload_to_s3: invoice_list = [ - args.nonbillable_file, args.output_file, - args.Lenovo_file, ] for pi_invoice in os.listdir(args.output_folder): @@ -315,17 +329,6 @@ def remove_non_billables(dataframe, pi, projects): return filtered_dataframe -def remove_billables(dataframe, pi, projects, output_file): - """Removes projects and PIs that should be billed from the dataframe - - So this *keeps* the projects/pis that should not be billed. - """ - filtered_dataframe = dataframe[ - dataframe[PI_FIELD].isin(pi) | dataframe[PROJECT_FIELD].isin(projects) - ] - filtered_dataframe.to_csv(output_file, index=False) - - def validate_pi_names(dataframe): invalid_pi_projects = dataframe[pandas.isna(dataframe[PI_FIELD])] for i, row in invalid_pi_projects.iterrows(): @@ -499,26 +502,6 @@ def export_HU_BU(dataframe, output_file): HU_BU_projects.to_csv(output_file) -def export_lenovo(dataframe: pandas.DataFrame, output_file): - LENOVO_SU_TYPES = ["OpenShift GPUA100SXM4", "OpenStack GPUA100SXM4"] - SU_CHARGE_MULTIPLIER = 1 - - lenovo_df = dataframe[dataframe[SU_TYPE_FIELD].isin(LENOVO_SU_TYPES)][ - [ - INVOICE_DATE_FIELD, - PROJECT_FIELD, - INSTITUTION_FIELD, - SU_HOURS_FIELD, - SU_TYPE_FIELD, - ] - ].copy() - - lenovo_df.rename(columns={SU_HOURS_FIELD: "SU Hours"}, inplace=True) - lenovo_df.insert(len(lenovo_df.columns), "SU Charge", SU_CHARGE_MULTIPLIER) - lenovo_df["Charge"] = lenovo_df["SU Hours"] * lenovo_df["SU Charge"] - lenovo_df.to_csv(output_file) - - def upload_to_s3(invoice_list: list, invoice_month): invoice_bucket = get_invoice_bucket() for invoice_filename in invoice_list: diff --git a/process_report/tests/unit_tests.py b/process_report/tests/unit_tests.py index 1ca797c..ce9bdb8 100644 --- a/process_report/tests/unit_tests.py +++ b/process_report/tests/unit_tests.py @@ -6,6 +6,7 @@ from textwrap import dedent from process_report import process_report +from process_report.invoices import lenovo_invoice, nonbillable_invoice class TestGetInvoiceDate(TestCase): @@ -72,6 +73,9 @@ def setUp(self): self.pi_to_exclude = ["PI2", "PI3"] self.projects_to_exclude = ["ProjectB", "ProjectD"] + self.nonbillable_invoice = nonbillable_invoice.NonbillableInvoice( + "Foo", "Foo", self.dataframe, self.pi_to_exclude, self.projects_to_exclude + ) self.output_file = tempfile.NamedTemporaryFile(delete=False) self.output_file2 = tempfile.NamedTemporaryFile(delete=False) @@ -105,14 +109,8 @@ def test_remove_non_billables(self): self.assertIn("ProjectE", result_df["Project - Allocation"].tolist()) def test_remove_billables(self): - process_report.remove_billables( - self.dataframe, - self.pi_to_exclude, - self.projects_to_exclude, - self.output_file2.name, - ) - - result_df = pandas.read_csv(self.output_file2.name) + self.nonbillable_invoice.process() + result_df = self.nonbillable_invoice.data self.assertIn("PI2", result_df["Manager (PI)"].tolist()) self.assertIn("PI3", result_df["Manager (PI)"].tolist()) @@ -568,18 +566,13 @@ def setUp(self): "OpenStack GPUK80", ], } - self.dataframe = pandas.DataFrame(data) - - output_file = tempfile.NamedTemporaryFile(delete=False, mode="w", suffix=".csv") - self.output_file = output_file.name - - def tearDown(self): - os.remove(self.output_file) - - def test_apply_credit_0002(self): - process_report.export_lenovo(self.dataframe, self.output_file) - output_df = pandas.read_csv(self.output_file) + self.lenovo_invoice = lenovo_invoice.LenovoInvoice( + "Lenovo", "2023-01", pandas.DataFrame(data) + ) + self.lenovo_invoice.process() + def test_process_lenovo(self): + output_df = self.lenovo_invoice.data self.assertTrue( set( [ diff --git a/process_report/util.py b/process_report/util.py new file mode 100644 index 0000000..0853ed0 --- /dev/null +++ b/process_report/util.py @@ -0,0 +1,35 @@ +import datetime +import json +import logging + + +logger = logging.getLogger(__name__) +logging.basicConfig(level=logging.INFO) + + +def get_institution_from_pi(institute_map, pi_uname): + institution_key = pi_uname.split("@")[-1] + institution_name = institute_map.get(institution_key, "") + + if institution_name == "": + logger.warn(f"PI name {pi_uname} does not match any institution!") + + return institution_name + + +def load_institute_map() -> dict: + with open("process_report/institute_map.json", "r") as f: + institute_map = json.load(f) + + return institute_map + + +def get_iso8601_time(): + return datetime.datetime.now().strftime("%Y%m%dT%H%M%SZ") + + +def compare_invoice_month(month_1, month_2): + """Returns True if 1st date is later than 2nd date""" + dt1 = datetime.datetime.strptime(month_1, "%Y-%m") + dt2 = datetime.datetime.strptime(month_2, "%Y-%m") + return dt1 > dt2