diff --git a/process_report/process_report.py b/process_report/process_report.py index b2d2dd9..17e5e86 100644 --- a/process_report/process_report.py +++ b/process_report/process_report.py @@ -1,9 +1,17 @@ import argparse import os import sys +import datetime import json import pandas +import boto3 +from botocore.config import Config + + +EXPORT_DATAFRAME = "dataframe" +EXPORT_LOCAL_PATH = "local_path" +EXPORT_S3_PATHS = "s3_paths" ### Invoice field names @@ -35,7 +43,7 @@ def get_institution_from_pi(institute_map, pi_uname): def load_institute_map() -> dict: - with open("institute_map.json", "r") as f: + with open("process_report/institute_map.json", "r") as f: institute_map = json.load(f) return institute_map @@ -62,6 +70,23 @@ def is_old_pi(old_pi_dict, pi, invoice_month): return False +def get_invoice_bucket(): + b2_resource = boto3.resource( + service_name="s3", + endpoint_url=os.environ["B2_ENDPOINT"], + aws_access_key_id=os.environ["B2_KEY_ID"], + aws_secret_access_key=os.environ["B2_APP_KEY"], + config=Config( + signature_version="s3v4", + ), + ) + return b2_resource.Bucket(os.environ["B2_BUCKET_NAME"]) + + +def get_iso8601_time(): + return datetime.datetime.now().strftime("%Y%m%dT%H%M%SZ") + + def main(): """Remove non-billable PIs and projects""" @@ -72,6 +97,12 @@ def main(): nargs="+", help="One or more CSV files that need to be processed", ) + parser.add_argument("--upload-to-s3", action="store_true") + parser.add_argument( + "--invoice-month", + required=True, + help="Invoice month to process", + ) parser.add_argument( "--pi-file", required=True, @@ -117,7 +148,15 @@ def main(): help="Name of csv file listing previously billed PIs", ) args = parser.parse_args() - merged_dataframe = merge_csv(args.csv_files) + + invoice_month = args.invoice_month + + if args.upload_to_s3: + csv_files = fetch_S3_invoices(invoice_month) + else: + csv_files = args.csv_files + + merged_dataframe = merge_csv(csv_files) pi = [] projects = [] @@ -126,26 +165,49 @@ def main(): with open(args.projects_file) as file: projects = [line.rstrip() for line in file] - invoice_date = get_invoice_date(merged_dataframe) - print("Invoice date: " + str(invoice_date)) + print("Invoice date: " + str(invoice_month)) - timed_projects_list = timed_projects(args.timed_projects_file, invoice_date) + timed_projects_list = timed_projects(args.timed_projects_file, invoice_month) print("The following timed-projects will not be billed for this period: ") print(timed_projects_list) projects = list(set(projects + timed_projects_list)) + invoice_list = list() + merged_dataframe = add_institution(merged_dataframe) - remove_billables(merged_dataframe, pi, projects, "non_billable.csv") + invoice_list.append( + remove_billables( + merged_dataframe, pi, projects, "nonbillable.csv", invoice_month + ) + ) billable_projects = remove_non_billables(merged_dataframe, pi, projects) billable_projects = validate_pi_names(billable_projects) credited_projects = apply_credits_new_pi(billable_projects, args.old_pi_file) - export_billables(credited_projects, args.output_file) - export_pi_billables(billable_projects, args.output_folder) - export_HU_only(billable_projects, args.HU_invoice_file) - export_HU_BU(billable_projects, args.HU_BU_invoice_file) - export_lenovo(billable_projects) + + invoice_list.append( + export_billables(credited_projects, args.output_file, invoice_month) + ) + export_pi_billables(credited_projects, args.output_folder, invoice_month) + export_HU_only(credited_projects, args.HU_invoice_file, invoice_month) + export_HU_BU(credited_projects, args.HU_BU_invoice_file, invoice_month) + export_lenovo(credited_projects, invoice_month) + export_invoices(invoice_list, args.upload_to_s3) + + +def fetch_S3_invoices(invoice_month): + """Fetches usage invoices from S3 given invoice month""" + s3_invoice_list = list() + invoice_bucket = get_invoice_bucket() + for obj in invoice_bucket.objects.filter( + Prefix=f"Invoices/{invoice_month}/Service Invoices/" + ): + local_name = obj.key.split("/")[-1] + s3_invoice_list.append(local_name) + invoice_bucket.download_file(obj.key, local_name) + + return s3_invoice_list def merge_csv(files): @@ -195,7 +257,7 @@ def remove_non_billables(dataframe, pi, projects): return filtered_dataframe -def remove_billables(dataframe, pi, projects, output_file): +def remove_billables(dataframe, pi, projects, output_file, invoice_month): """Removes projects and PIs that should be billed from the dataframe So this *keeps* the projects/pis that should not be billed. @@ -203,7 +265,17 @@ def remove_billables(dataframe, pi, projects, output_file): filtered_dataframe = dataframe[ dataframe[PI_FIELD].isin(pi) | dataframe[PROJECT_FIELD].isin(projects) ] - filtered_dataframe.to_csv(output_file, index=False) + + invoice_b2_path = ( + f"Invoices/{invoice_month}/NERC (Non-Billable) {invoice_month}.csv" + ) + invoice_b2_path_archive = f"Invoices/{invoice_month}/Archive/NERC (Non-Billable) {invoice_month} {get_iso8601_time()}.csv" + + return { + EXPORT_DATAFRAME: filtered_dataframe, + EXPORT_LOCAL_PATH: output_file, + EXPORT_S3_PATHS: [invoice_b2_path, invoice_b2_path_archive], + } def validate_pi_names(dataframe): @@ -215,27 +287,6 @@ def validate_pi_names(dataframe): return dataframe -def export_billables(dataframe, output_file): - dataframe.to_csv(output_file, index=False) - - -def export_pi_billables(dataframe: pandas.DataFrame, output_folder): - if not os.path.exists(output_folder): - os.mkdir(output_folder) - - invoice_month = dataframe[INVOICE_DATE_FIELD].iat[0] - pi_list = dataframe[PI_FIELD].unique() - - for pi in pi_list: - if pandas.isna(pi): - continue - pi_projects = dataframe[dataframe[PI_FIELD] == pi] - pi_instituition = pi_projects[INSTITUTION_FIELD].iat[0] - pi_projects.to_csv( - output_folder + f"/{pi_instituition}_{pi}_{invoice_month}.csv" - ) - - def apply_credits_new_pi(dataframe, old_pi_file): new_pi_credit_code = "0002" new_pi_credit_amount = 1000 @@ -297,23 +348,54 @@ def add_institution(dataframe: pandas.DataFrame): return dataframe -def export_HU_only(dataframe, output_file): +def export_billables(dataframe, output_file, invoice_month): + invoice_b2_path = f"Invoices/{invoice_month}/NERC {invoice_month}.csv" + invoice_b2_path_archive = ( + f"Invoices/{invoice_month}/Archive/" + + f"NERC {invoice_month} {get_iso8601_time()}.csv" + ) + + return { + EXPORT_DATAFRAME: dataframe, + EXPORT_LOCAL_PATH: output_file, + EXPORT_S3_PATHS: [invoice_b2_path, invoice_b2_path_archive], + } + + +def export_pi_billables(dataframe: pandas.DataFrame, output_folder, invoice_month): + if not os.path.exists(output_folder): + os.mkdir(output_folder) + + pi_list = dataframe[PI_FIELD].unique() + + for pi in pi_list: + if pandas.isna(pi): + continue + pi_projects = dataframe[dataframe[PI_FIELD] == pi] + pi_instituition = pi_projects[INSTITUTION_FIELD].iat[0] + pi_projects.to_csv( + output_folder + f"/{pi_instituition}_{pi}_{invoice_month}.csv" + ) + # TODO (Quan Pham) Where to place these + + +def export_HU_only(dataframe, output_file, invoice_month): HU_projects = dataframe[dataframe[INSTITUTION_FIELD] == "Harvard University"] HU_projects.to_csv(output_file) + # TODO (Quan Pham) Where to place these -def export_HU_BU(dataframe, output_file): +def export_HU_BU(dataframe, output_file, invoice_month): HU_BU_projects = dataframe[ (dataframe[INSTITUTION_FIELD] == "Harvard University") | (dataframe[INSTITUTION_FIELD] == "Boston University") ] HU_BU_projects.to_csv(output_file) + # TODO (Quan Pham) Where to place these -def export_lenovo(dataframe: pandas.DataFrame, output_file=None): - lenovo_file_name = ( - output_file or f"Lenovo_{dataframe[INVOICE_DATE_FIELD].iat[0]}.csv" - ) +def export_lenovo(dataframe: pandas.DataFrame, invoice_month, output_file=None): + lenovo_file_name = output_file or f"Lenovo_{invoice_month}.csv" LENOVO_SU_TYPES = ["OpenShift GPUA100SXM4", "OpenStack GPUA100SXM4"] SU_CHARGE_MULTIPLIER = 1 @@ -332,6 +414,16 @@ def export_lenovo(dataframe: pandas.DataFrame, output_file=None): lenovo_df.insert(len(lenovo_df.columns), "SU Charge", SU_CHARGE_MULTIPLIER) lenovo_df["Charge"] = lenovo_df["SU Hours"] * lenovo_df["SU Charge"] lenovo_df.to_csv(lenovo_file_name) + # TODO (Quan Pham) Where to place these + + +def export_invoices(invoice_list: list, upload_to_s3): + for invoice in invoice_list: + invoice[EXPORT_DATAFRAME].to_csv(invoice[EXPORT_LOCAL_PATH]) + if upload_to_s3: + invoice_bucket = get_invoice_bucket() + for s3_path in invoice[EXPORT_S3_PATHS]: + invoice_bucket.upload_file(invoice[EXPORT_LOCAL_PATH], s3_path) if __name__ == "__main__": diff --git a/process_report/tests/unit_tests.py b/process_report/tests/unit_tests.py index 8f0c381..89ce851 100644 --- a/process_report/tests/unit_tests.py +++ b/process_report/tests/unit_tests.py @@ -69,24 +69,17 @@ def setUp(self): "Untouch Data Column": ["DataA", "DataB", "DataC", "DataD", "DataE"], } self.dataframe = pandas.DataFrame(data) - + self.invoice_month = "2024-03" self.pi_to_exclude = ["PI2", "PI3"] self.projects_to_exclude = ["ProjectB", "ProjectD"] - self.output_file = tempfile.NamedTemporaryFile(delete=False) - self.output_file2 = tempfile.NamedTemporaryFile(delete=False) - - def tearDown(self): - os.remove(self.output_file.name) - os.remove(self.output_file2.name) - def test_remove_non_billables(self): billables_df = process_report.remove_non_billables( self.dataframe, self.pi_to_exclude, self.projects_to_exclude ) - process_report.export_billables(billables_df, self.output_file.name) - - result_df = pandas.read_csv(self.output_file.name) + result_df = process_report.export_billables( + billables_df, "fake_invoice", self.invoice_month + )["dataframe"] self.assertNotIn("PI2", result_df["Manager (PI)"].tolist()) self.assertNotIn("PI3", result_df["Manager (PI)"].tolist()) @@ -105,14 +98,13 @@ def test_remove_non_billables(self): self.assertIn("ProjectE", result_df["Project - Allocation"].tolist()) def test_remove_billables(self): - process_report.remove_billables( + result_df = process_report.remove_billables( self.dataframe, self.pi_to_exclude, self.projects_to_exclude, - self.output_file2.name, - ) - - result_df = pandas.read_csv(self.output_file2.name) + "fake_invoice", + self.invoice_month, + )["dataframe"] self.assertIn("PI2", result_df["Manager (PI)"].tolist()) self.assertIn("PI3", result_df["Manager (PI)"].tolist()) @@ -181,10 +173,13 @@ def setUp(self): "Untouch Data Column": ["DataA", "DataB", "DataC", "DataD", "DataE"], } self.dataframe = pandas.DataFrame(data) + self.invoice_month = data["Invoice Month"][0] def test_export_pi(self): output_dir = tempfile.TemporaryDirectory() - process_report.export_pi_billables(self.dataframe, output_dir.name) + process_report.export_pi_billables( + self.dataframe, output_dir.name, self.invoice_month + ) pi_csv_1 = f'{self.dataframe["Institution"][0]}_{self.dataframe["Manager (PI)"][0]}_{self.dataframe["Invoice Month"][0]}.csv' pi_csv_2 = f'{self.dataframe["Institution"][3]}_{self.dataframe["Manager (PI)"][3]}_{self.dataframe["Invoice Month"][3]}.csv' @@ -381,6 +376,7 @@ def setUp(self): ], } self.dataframe = pandas.DataFrame(data) + self.invoice_month = "2024-03" output_file = tempfile.NamedTemporaryFile(delete=False, mode="w", suffix=".csv") self.output_file = output_file.name @@ -389,7 +385,9 @@ def tearDown(self): os.remove(self.output_file) def test_apply_credit_0002(self): - process_report.export_lenovo(self.dataframe, self.output_file) + process_report.export_lenovo( + self.dataframe, self.invoice_month, self.output_file + ) output_df = pandas.read_csv(self.output_file) self.assertTrue( diff --git a/requirements.txt b/requirements.txt index fb6c7ed..b650973 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ pandas +boto3 diff --git a/tools/clone_nonbillables_and_process.sh b/tools/clone_nonbillables_and_process.sh deleted file mode 100644 index bcc77e2..0000000 --- a/tools/clone_nonbillables_and_process.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/sh -set -xe - -# Add deploy key to ssh config -mkdir -p ~/.ssh -if [ ! -e ~/.ssh/config ]; then - touch ~/.ssh/config - touch ~/.ssh/id_nonbillable - echo " - Host github-nonbillable - HostName github.com - IdentityFile ~/.ssh/id_nonbillable - " > ~/.ssh/config - echo "$GH_NONBILLABLE_DEPLOYKEY" > ~/.ssh/id_nonbillable - chmod 600 ~/.ssh/id_nonbillable -fi - -if [ ! -d ./non-billable-projects ]; then - git clone git@github-nonbillable:CCI-MOC/non-billable-projects.git ./non-billable-projects -fi diff --git a/tools/clone_nonbillables_and_process_invoice.sh b/tools/clone_nonbillables_and_process_invoice.sh new file mode 100644 index 0000000..75503b5 --- /dev/null +++ b/tools/clone_nonbillables_and_process_invoice.sh @@ -0,0 +1,40 @@ +#!/bin/sh +set -xe + +# Add deploy key to ssh config +mkdir -p ~/.ssh +if [ ! -e ~/.ssh/config ]; then + touch ~/.ssh/config + touch ~/.ssh/id_nonbillable + echo " + Host github-nonbillable + HostName github.com + IdentityFile ~/.ssh/id_nonbillable + " > ~/.ssh/config + echo "$GH_NONBILLABLE_DEPLOYKEY" > ~/.ssh/id_nonbillable + chmod 600 ~/.ssh/id_nonbillable +fi + +if [ ! -d ~/.ssh/known_hosts ]; then + touch ~/.ssh/known_hosts + echo "github.com ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOMqqnkVzrm0SdG6UOoqKLsabgH5C9okWi0dh2l9GKJl + github.com ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBEmKSENjQEezOmxkZMy7opKgwFB9nkt5YRrYMjNuG5N87uRgg6CLrbo5wAdT/y6v0mKV0U2w0WZ2YB/++Tpockg= + github.com ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQCj7ndNxQowgcQnjshcLrqPEiiphnt+VTTvDP6mHBL9j1aNUkY4Ue1gvwnGLVlOhGeYrnZaMgRK6+PKCUXaDbC7qtbW8gIkhL7aGCsOr/C56SJMy/BCZfxd1nWzAOxSDPgVsmerOBYfNqltV9/hWCqBywINIR+5dIg6JTJ72pcEpEjcYgXkE2YEFXV1JHnsKgbLWNlhScqb2UmyRkQyytRLtL+38TGxkxCflmO+5Z8CSSNY7GidjMIZ7Q4zMjA2n1nGrlTDkzwDCsw+wqFPGQA179cnfGWOWRVruj16z6XyvxvjJwbz0wQZ75XK5tKSb7FNyeIEs4TT4jk+S4dhPeAUC5y+bDYirYgM4GC7uEnztnZyaVWQ7B381AK4Qdrwt51ZqExKbQpTUNn+EjqoTwvqNj4kqx5QUCI0ThS/YkOxJCXmPUWZbhjpCg56i+2aB6CmK2JGhn57K5mj0MNdBXA4/WnwH6XoPWJzK5Nyu2zB3nAZp+S5hpQs+p1vN1/wsjk= + " >> ~/.ssh/known_hosts +fi + +if [ ! -d ./non-billable-projects ]; then + git clone git@github-nonbillable:CCI-MOC/non-billable-projects.git ./non-billable-projects +fi + +INVOICE_MONTH=$(date --date="$(date +%Y-%m-01) -1 month" +%Y-%m) +export B2_ENDPOINT +export B2_APP_KEY +export B2_KEY_ID +export B2_BUCKET_NAME +python process_report/process_report.py \ + --invoice-month $INVOICE_MONTH \ + --pi-file ./non-billable-projects/pi.txt \ + --projects-file ./non-billable-projects/projects.txt \ + --timed-projects-file ./non-billable-projects/timed_projects.txt \ + --old-pi-file old_pi.csv