Merge pull request #10 from QuanMPhm/7/pi_csv

Added ability to group invoices by PIs
CCI-MOC · Apr 1, 2024 · 1823ed2 · 1823ed2
2 parents bc176a2 + 54dc9fd
commit 1823ed2
Show file tree

Hide file tree

Showing 3 changed files with 67 additions and 2 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1 +1,3 @@
-*.csv
+*.csv
+__pycache__/
+*.py[cod]
diff --git a/process_report/process_report.py b/process_report/process_report.py
@@ -1,4 +1,5 @@
 import argparse
+import os
 
 import pandas
 
@@ -34,6 +35,12 @@ def main():
         default="filtered_output.csv",
         help="Name of output file",
     )
+    parser.add_argument(
+        "--output-folder",
+        required=False,
+        default="pi_invoices",
+        help="Name of output folder containing pi-specific invoice csvs"
+    )
     args = parser.parse_args()
     merged_dataframe = merge_csv(args.csv_files)
 
@@ -53,8 +60,9 @@ def main():
 
     projects = list(set(projects + timed_projects_list))
 
-    remove_non_billables(merged_dataframe, pi, projects, args.output_file)
+    billable_projects = remove_non_billables(merged_dataframe, pi, projects, args.output_file)
     remove_billables(merged_dataframe, pi, projects, "non_billable.csv")
+    export_pi_billables(billable_projects, args.output_folder)
 
 
 def merge_csv(files):
@@ -96,6 +104,7 @@ def remove_non_billables(dataframe, pi, projects, output_file):
     """Removes projects and PIs that should not be billed from the dataframe"""
     filtered_dataframe = dataframe[~dataframe['Manager (PI)'].isin(pi) & ~dataframe['Project - Allocation'].isin(projects)]
     filtered_dataframe.to_csv(output_file, index=False)
+    return filtered_dataframe
 
 
 def remove_billables(dataframe, pi, projects, output_file):
@@ -106,5 +115,18 @@ def remove_billables(dataframe, pi, projects, output_file):
     filtered_dataframe = dataframe[dataframe['Manager (PI)'].isin(pi) | dataframe['Project - Allocation'].isin(projects)]
     filtered_dataframe.to_csv(output_file, index=False)
 
+def export_pi_billables(dataframe: pandas.DataFrame, output_folder):
+    if not os.path.exists(output_folder):
+        os.mkdir(output_folder)
+
+    invoice_month = dataframe['Invoice Month'][0]
+    pi_list = dataframe['Manager (PI)'].unique()
+
+    for pi in pi_list:
+        pi_projects = dataframe[dataframe['Manager (PI)'] == pi]
+        pi_instituition = pi_projects['Institution'].unique()[0]
+        pi_projects.to_csv(output_folder + f"/{pi_instituition}_{pi}_{invoice_month}.csv")
+
+
 if __name__ == "__main__":
     main()
diff --git a/process_report/tests/unit_tests.py b/process_report/tests/unit_tests.py
@@ -134,3 +134,44 @@ def test_merge_csv(self):
 
         # Assert that the headers in the merged DataFrame match the expected headers
         self.assertListEqual(merged_dataframe.columns.tolist(), self.header)
+
+
+class TestExportPICSV(TestCase):
+    def setUp(self):
+
+        data = {
+            'Invoice Month': ['2023-01','2023-01','2023-01','2023-01','2023-01'],
+            'Manager (PI)': ['PI1', 'PI1', 'PI1', 'PI2', 'PI2'],
+            'Institution': ['BU', 'BU', 'BU', 'HU', 'HU'],
+            'Project - Allocation': ['ProjectA', 'ProjectB', 'ProjectC', 'ProjectD', 'ProjectE'],
+            'Untouch Data Column': ['DataA', 'DataB', 'DataC', 'DataD', 'DataE']
+        }
+        self.dataframe = pandas.DataFrame(data)
+
+    def test_export_pi(self):
+        output_dir = tempfile.TemporaryDirectory()
+        process_report.export_pi_billables(self.dataframe, output_dir.name)
+
+        pi_csv_1 = f'{self.dataframe["Institution"][0]}_{self.dataframe["Manager (PI)"][0]}_{self.dataframe["Invoice Month"][0]}.csv'
+        pi_csv_2 = f'{self.dataframe["Institution"][3]}_{self.dataframe["Manager (PI)"][3]}_{self.dataframe["Invoice Month"][3]}.csv'
+        self.assertIn(pi_csv_1, os.listdir(output_dir.name))
+        self.assertIn(pi_csv_2, os.listdir(output_dir.name))
+        self.assertEqual(len(os.listdir(output_dir.name)), len(self.dataframe['Manager (PI)'].unique()))
+
+        pi_df = pandas.read_csv(output_dir.name + '/' + pi_csv_1)
+        self.assertEqual(len(pi_df['Manager (PI)'].unique()), 1)
+        self.assertEqual(pi_df['Manager (PI)'].unique()[0], self.dataframe['Manager (PI)'][0])
+
+        self.assertIn('ProjectA', pi_df['Project - Allocation'].tolist())
+        self.assertIn('ProjectB', pi_df['Project - Allocation'].tolist())
+        self.assertIn('ProjectC', pi_df['Project - Allocation'].tolist())
+
+        pi_df = pandas.read_csv(output_dir.name + '/' + pi_csv_2)
+        self.assertEqual(len(pi_df['Manager (PI)'].unique()), 1)
+        self.assertEqual(pi_df['Manager (PI)'].unique()[0], self.dataframe['Manager (PI)'][3])
+
+        self.assertIn('ProjectD', pi_df['Project - Allocation'].tolist())
+        self.assertIn('ProjectE', pi_df['Project - Allocation'].tolist())
+        self.assertNotIn('ProjectA', pi_df['Project - Allocation'].tolist())
+        self.assertNotIn('ProjectB', pi_df['Project - Allocation'].tolist())
+        self.assertNotIn('ProjectC', pi_df['Project - Allocation'].tolist())