diff --git a/.gitignore b/.gitignore index 16f2dc5..7248b74 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ -*.csv \ No newline at end of file +*.csv +__pycache__/ +*.py[cod] diff --git a/process_report/process_report.py b/process_report/process_report.py index 647e5b0..0ea7cfa 100644 --- a/process_report/process_report.py +++ b/process_report/process_report.py @@ -1,4 +1,5 @@ import argparse +import os import pandas @@ -34,6 +35,12 @@ def main(): default="filtered_output.csv", help="Name of output file", ) + parser.add_argument( + "--output-folder", + required=False, + default="pi_invoices", + help="Name of output folder containing pi-specific invoice csvs" + ) args = parser.parse_args() merged_dataframe = merge_csv(args.csv_files) @@ -53,8 +60,9 @@ def main(): projects = list(set(projects + timed_projects_list)) - remove_non_billables(merged_dataframe, pi, projects, args.output_file) + billable_projects = remove_non_billables(merged_dataframe, pi, projects, args.output_file) remove_billables(merged_dataframe, pi, projects, "non_billable.csv") + export_pi_billables(billable_projects, args.output_folder) def merge_csv(files): @@ -96,6 +104,7 @@ def remove_non_billables(dataframe, pi, projects, output_file): """Removes projects and PIs that should not be billed from the dataframe""" filtered_dataframe = dataframe[~dataframe['Manager (PI)'].isin(pi) & ~dataframe['Project - Allocation'].isin(projects)] filtered_dataframe.to_csv(output_file, index=False) + return filtered_dataframe def remove_billables(dataframe, pi, projects, output_file): @@ -106,5 +115,18 @@ def remove_billables(dataframe, pi, projects, output_file): filtered_dataframe = dataframe[dataframe['Manager (PI)'].isin(pi) | dataframe['Project - Allocation'].isin(projects)] filtered_dataframe.to_csv(output_file, index=False) +def export_pi_billables(dataframe: pandas.DataFrame, output_folder): + if not os.path.exists(output_folder): + os.mkdir(output_folder) + + invoice_month = dataframe['Invoice Month'][0] + pi_list = dataframe['Manager (PI)'].unique() + + for pi in pi_list: + pi_projects = dataframe[dataframe['Manager (PI)'] == pi] + pi_instituition = pi_projects['Institution'].unique()[0] + pi_projects.to_csv(output_folder + f"/{pi_instituition}_{pi}_{invoice_month}.csv") + + if __name__ == "__main__": main() diff --git a/process_report/tests/unit_tests.py b/process_report/tests/unit_tests.py index 971577f..15768d9 100644 --- a/process_report/tests/unit_tests.py +++ b/process_report/tests/unit_tests.py @@ -2,6 +2,7 @@ import tempfile import pandas import os +import shutil from textwrap import dedent from process_report import process_report @@ -134,3 +135,43 @@ def test_merge_csv(self): # Assert that the headers in the merged DataFrame match the expected headers self.assertListEqual(merged_dataframe.columns.tolist(), self.header) + +class TestExportPICSV(TestCase): + def setUp(self): + + data = { + 'Invoice Month': ['2023-01','2023-01','2023-01','2023-01','2023-01'], + 'Manager (PI)': ['PI1', 'PI1', 'PI1', 'PI2', 'PI2'], + 'Institution': ['BU', 'BU', 'BU', 'HU', 'HU'], + 'Project - Allocation': ['ProjectA', 'ProjectB', 'ProjectC', 'ProjectD', 'ProjectE'], + 'Untouch Data Column': ['DataA', 'DataB', 'DataC', 'DataD', 'DataE'] + } + self.dataframe = pandas.DataFrame(data) + + def test_export_pi(self): + output_dir = tempfile.TemporaryDirectory() + process_report.export_pi_billables(self.dataframe, output_dir.name) + + pi_csv_1 = f'{self.dataframe["Institution"][0]}_{self.dataframe["Manager (PI)"][0]}_{self.dataframe["Invoice Month"][0]}.csv' + pi_csv_2 = f'{self.dataframe["Institution"][3]}_{self.dataframe["Manager (PI)"][3]}_{self.dataframe["Invoice Month"][3]}.csv' + self.assertIn(pi_csv_1, os.listdir(output_dir.name)) + self.assertIn(pi_csv_2, os.listdir(output_dir.name)) + self.assertEqual(len(os.listdir(output_dir.name)), len(self.dataframe['Manager (PI)'].unique())) + + pi_df = pandas.read_csv(output_dir.name + '/' + pi_csv_1) + self.assertEqual(len(pi_df['Manager (PI)'].unique()), 1) + self.assertEqual(pi_df['Manager (PI)'].unique()[0], self.dataframe['Manager (PI)'][0]) + + self.assertIn('ProjectA', pi_df['Project - Allocation'].tolist()) + self.assertIn('ProjectB', pi_df['Project - Allocation'].tolist()) + self.assertIn('ProjectC', pi_df['Project - Allocation'].tolist()) + + pi_df = pandas.read_csv(output_dir.name + '/' + pi_csv_2) + self.assertEqual(len(pi_df['Manager (PI)'].unique()), 1) + self.assertEqual(pi_df['Manager (PI)'].unique()[0], self.dataframe['Manager (PI)'][3]) + + self.assertIn('ProjectD', pi_df['Project - Allocation'].tolist()) + self.assertIn('ProjectE', pi_df['Project - Allocation'].tolist()) + self.assertNotIn('ProjectA', pi_df['Project - Allocation'].tolist()) + self.assertNotIn('ProjectB', pi_df['Project - Allocation'].tolist()) + self.assertNotIn('ProjectC', pi_df['Project - Allocation'].tolist())