From 5d1c124ffcc68bdf1b9042200fe82cb2dc5faa41 Mon Sep 17 00:00:00 2001
From: Rosie Wood <rwood@turing.ac.uk>
Date: Wed, 8 Oct 2025 19:58:55 +0100
Subject: [PATCH 1/2] adjustments to work with sacct file

---
 gracehpc/cli.py                         | 30 ++++++++++++++++++++-----
 gracehpc/core/backend_utils.py          |  8 +++++--
 gracehpc/core/emissions_calculator.py   |  9 ++++----
 gracehpc/core/job_log_manager.py        |  8 ++++++-
 gracehpc/interface/cli_script_output.py | 14 +++++++++---
 5 files changed, 54 insertions(+), 15 deletions(-)

diff --git a/gracehpc/cli.py b/gracehpc/cli.py
index 4f0e88e..9e5e994 100644
--- a/gracehpc/cli.py
+++ b/gracehpc/cli.py
@@ -108,6 +108,13 @@ def main():
                                 help="Comma-separated list (no spaces) of all the HPC job IDs to filter on. Default: 'all_jobs'",
                                 default = "all_jobs")
     
+    # If sacct output is already saved to a file, path to the file
+    run_subcommand.add_argument("--sacct_file",
+                                type=str,
+                                help="Path to a pre-saved sacct output file (as pipe-delimited text). If provided, this file will be used instead of calling sacct directly. Default: None",
+                                default = None)
+
+
     # Region argument for carbon intensity data 
     run_subcommand.add_argument("--Region",
                                 type=str,
@@ -138,6 +145,13 @@ def main():
                                     "'all' : all of the above datasets saved to CSV files. Default: 'no_save'."
                                 ))
     
+    run_subcommand.add_argument("--allow-multiple-users",
+                                action='store_true',
+                                help=(
+                                    "Process jobs from multiple users."
+                                    "By default this is false to avoid accidentally processing jobs from multiple users."
+                                ))
+
 
     # Parse CLI arguments
     arguments = arg_parser.parse_args()
@@ -153,11 +167,17 @@ def main():
     # Handle the 'gracehpc run' command
     elif arguments.command == "run":
 
-        try:
-            confirm_date_args(arguments)  # Check if the date arguments are valid
-        except ValueError as e:
-            print(f"❌ Date validation error: {e}")
-            sys.exit(1) # exit the script with an error code 
+        if arguments.sacct_file is None:
+            try:
+                confirm_date_args(arguments)  # Check if the date arguments are valid
+            except ValueError as e:
+                print(f"❌ Date validation error: {e}")
+                sys.exit(1) # exit the script with an error code 
+        else:
+            print("Ignoring StartDate and EndDate arguments since a sacct_file has been provided.")
+            if not os.path.isfile(arguments.sacct_file):
+                print(f"❌ The provided sacct_file path does not exist or is not a file: {arguments.sacct_file}")
+                sys.exit(1) # exit the script with an error code
         
         # Execute the entire backend (core_engine) by passing the arguments 
         full_df, daily_df, total_df = core_engine(arguments)
diff --git a/gracehpc/core/backend_utils.py b/gracehpc/core/backend_utils.py
index 2aafaf6..98dc0a4 100644
--- a/gracehpc/core/backend_utils.py
+++ b/gracehpc/core/backend_utils.py
@@ -151,6 +151,7 @@ def memory_conversion(self, value, unit_label):
         Args:
             value (float): The numeric value of memory
             unit_label (str): the unit associated with the memory value. Must be:
+                            - T (terabytes)
                             - M (megabytes)
                             - G (gigabytes)
                             - K (kilobytes)
@@ -159,8 +160,11 @@ def memory_conversion(self, value, unit_label):
             float: Memory value converted to gigabytes
         """
         # Check unit label is one of the expected
-        assert unit_label in ['M', 'G', 'K'], f"Invalid unit '{unit_label}. Expected to be either 'M', 'G', 'K']."
+        assert unit_label in ['T', 'M', 'G', 'K'], f"Invalid unit '{unit_label}. Expected to be either 'T', 'M', 'G', 'K']."
 
+        # If unit is terabytes, multiply by 1000
+        if unit_label == 'T':
+            value = value * 1e3        # 1 GB = 0.001 TB
         # If unit is megabytes, divide by 1000
         if unit_label == 'M':
             value = value / 1e3        # 1 GB = 1000 MB
@@ -205,7 +209,7 @@ def requested_memory(self, job_record):
             total_memory_gb = float(raw_memory_requested[:-2]) * total_cpus
         
         # If the memory string ends with a standard unit, parse directly
-        elif raw_memory_requested[-1] in ['M', 'G', 'K']:
+        elif raw_memory_requested[-1] in ['T', 'M', 'G', 'K']:
             memory_unit = raw_memory_requested[-1]      # extract unit (last character)
             total_memory_gb = float(raw_memory_requested[:-1])
 
diff --git a/gracehpc/core/emissions_calculator.py b/gracehpc/core/emissions_calculator.py
index 4b73076..e15172d 100644
--- a/gracehpc/core/emissions_calculator.py
+++ b/gracehpc/core/emissions_calculator.py
@@ -221,10 +221,11 @@ def get_job_logs(arguments, hpc_config):
     # Ensure the processed (aggregated) dataframe is also not empty
     exit_if_no_jobs(JLP.filtered_df, arguments)
 
-    # Verify that the final df only contains logs from a single user
-    if len(set(JLP.final_df.UserName)) > 1:
-        raise ValueError(f"Multiple users found in the job logs: {set(JLP.final_df.UserName)}. Please ensure you are only processing logs for a single user.")
-    
+    if not arguments.allow_multiple_users:
+        # Verify that the final df only contains logs from a single user
+        if len(set(JLP.final_df.UserName)) > 1:
+            raise ValueError(f"Multiple users found in the job logs: {set(JLP.final_df.UserName)}. Please ensure you are only processing logs for a single user or use the --allow-multiple-users flag.")
+        
     # Return the final processed/filtered dataframe
     return JLP.final_df
 
diff --git a/gracehpc/core/job_log_manager.py b/gracehpc/core/job_log_manager.py
index 6cfa3e5..f0ddc13 100644
--- a/gracehpc/core/job_log_manager.py
+++ b/gracehpc/core/job_log_manager.py
@@ -67,7 +67,13 @@ def retrieve_job_logs(self):
         This method retrieves the accounting logs based on the arguments (e.g. start and end dates). 
         The output includes raw job metadata which is parsed and processed later.
         """
-        # Construct the SLURM command with the user arguments and correct formatting
+        # If the user has provided a sacct file, read from that instead of running sacct command
+        if self.arguments.sacct_file:
+            with open(self.arguments.sacct_file, 'rb') as f:
+                self.sacct_data = f.read()
+            return
+
+        # Otherwise, construct the SLURM command with the user arguments and correct formatting
         slurm_command = [
             "sacct",
             "--start", self.arguments.StartDate,
diff --git a/gracehpc/interface/cli_script_output.py b/gracehpc/interface/cli_script_output.py
index 341868a..8e14565 100644
--- a/gracehpc/interface/cli_script_output.py
+++ b/gracehpc/interface/cli_script_output.py
@@ -104,9 +104,17 @@ def results_terminal_display(full_df, daily_df, total_df, arguments, hpc_config)
         return
 
     # Extract variables from arguments and data 
-    user_name = full_df.iloc[0].get("UserName", "N/A")
-    start_date = arguments.StartDate if hasattr(arguments, "StartDate") else "N/A"
-    end_date = arguments.EndDate if hasattr(arguments, "EndDate") else "N/A"
+    if arguments.allow_multiple_users:
+        user_name = "Multiple Users"
+    else:
+        user_name = full_df.iloc[0].get("UserName", "N/A")
+    if arguments.sacct_file is None:
+        start_date = arguments.StartDate if hasattr(arguments, "StartDate") else "N/A"
+        end_date = arguments.EndDate if hasattr(arguments, "EndDate") else "N/A"
+    else:
+        # See below
+        start_date = "See below"
+        end_date = "See below"
     hpc_name = hpc_config.get('hpc_system', 'Unknown HPC System')
 
     # Job ID to filter on 

From d1fcba013839d1fcc774d8d11b7b1801009a3a0c Mon Sep 17 00:00:00 2001
From: Rosie Wood <rwood@turing.ac.uk>
Date: Thu, 9 Oct 2025 15:00:31 +0100
Subject: [PATCH 2/2] Edit default end date

---
 gracehpc/cli.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/gracehpc/cli.py b/gracehpc/cli.py
index 9e5e994..7f1640d 100644
--- a/gracehpc/cli.py
+++ b/gracehpc/cli.py
@@ -90,7 +90,7 @@ def main():
     # ADD ARGUMENTS TO THE 'run' SUBCOMMAND
     # ---------------------------------------
     SD_default = f"{datetime.date.today().year}-01-01"      # Set the default start date January 1st of the current year    
-    ED_default = datetime.date.today().strftime("%Y-%m-%d")  # Set the default end date to the current date
+    ED_default = (datetime.date.today() + datetime.timedelta(days=1)).strftime("%Y-%m-%d") # Set the default end date to the current date +1 day to include todays jobs
 
     # Date range arguments 
     run_subcommand.add_argument("--StartDate", 
@@ -99,7 +99,11 @@ def main():
                                 default = SD_default)
     run_subcommand.add_argument("--EndDate",
                                 type=str,
-                                help=f"The final date of the range to process jobs for, in YYYY-MM-DD. Default: {ED_default}",
+                                help=(
+                                    "The final date of the range to process jobs for, in YYYY-MM-DD. "
+                                    "Note that this date is exclusive, so jobs submitted on this date will not be included. "
+                                    f"Default: {ED_default}"
+                                ),
                                 default = ED_default)
     
     # Filtering arguments