diff --git a/toolchain/mfc/args.py b/toolchain/mfc/args.py
index a7193a6f0..3da807a5b 100644
--- a/toolchain/mfc/args.py
+++ b/toolchain/mfc/args.py
@@ -66,7 +66,7 @@ def add_common_arguments(p, mask = None):
                 p.add_argument(f"--no-{target.name}", action="store_true", help=f"Do not build the {target.name} dependency. Use the system's instead.")
 
         if "g" not in mask:
-            p.add_argument("-g", "--gpus", nargs="+", type=int, default=[0], help="(GPU) List of GPU #s to use.")
+            p.add_argument("-g", "--gpus", nargs="+", type=int, default=None, help="(Optional GPU override) List of GPU #s to use (environment default if unspecified).")
 
     # === BUILD ===
     add_common_arguments(build, "g")
diff --git a/toolchain/mfc/run/engines.py b/toolchain/mfc/run/engines.py
index b8c45f772..b6d2f28ec 100644
--- a/toolchain/mfc/run/engines.py
+++ b/toolchain/mfc/run/engines.py
@@ -147,14 +147,16 @@ def run(self, targets: typing.List[MFCTarget]) -> None:
 
             if not ARG("dry_run"):
                 start_time = time.monotonic()
+                
+                env = os.environ.copy()
+                if ARG('gpus') is not None:
+                    env['CUDA_VISIBLE_DEVICES'] = ','.join([str(_) for _ in ARG('gpus')])
+
                 system(
                     self.get_exec_cmd(target), cwd=self.input.case_dirpath, 
-                    env={
-                        **os.environ.copy(),
-                        'CUDA_VISIBLE_DEVICES': ','.join([str(_) for _ in ARG('gpus')])
-                    }
+                    env=env
                 )
-                end_time   = time.monotonic()
+                end_time = time.monotonic()
                 cons.print(no_indent=True)
 
                 cons.print(f"[bold green]Done[/bold green] (in {datetime.timedelta(seconds=end_time - start_time)})")
@@ -307,11 +309,11 @@ def __create_batch_file(self, system: queues.QueueSystem, targets: typing.List[M
         cons.print("> Writing batch file...")
         file_write(filepath, content)
 
-    def __execute_batch_file(self, system: queues.QueueSystem):
+    def __execute_batch_file(self, queue: queues.QueueSystem):
         # We CD to the case directory before executing the batch file so that
         # any files the queue system generates (like .err and .out) are created
         # in the correct directory.
-        cmd = system.gen_submit_cmd(self.__get_batch_filename())
+        cmd = queue.gen_submit_cmd(self.__get_batch_filename())
 
         if system(cmd, cwd=self.__get_batch_dirpath()) != 0:
             raise MFCException(f"Submitting batch file for {system.name} failed. It can be found here: {self.__get_batch_filepath()}. Please check the file for errors.")
diff --git a/toolchain/mfc/sched.py b/toolchain/mfc/sched.py
index 71ad990f7..f1258ddab 100644
--- a/toolchain/mfc/sched.py
+++ b/toolchain/mfc/sched.py
@@ -35,11 +35,11 @@ class Task:
     load: float
 
 
-def sched(tasks: typing.List[Task], nThreads: int, devices: typing.Set[int]) -> None:
+def sched(tasks: typing.List[Task], nThreads: int, devices: typing.Set[int] = None) -> None:
     nAvailable: int = nThreads
     threads:    typing.List[WorkerThreadHolder] = []
 
-    sched.LOAD = { id: 0.0 for id in devices }
+    sched.LOAD = { id: 0.0 for id in devices or [] }
 
     def join_first_dead_thread(progress, complete_tracker) -> None:
         nonlocal threads, nAvailable
@@ -50,7 +50,7 @@ def join_first_dead_thread(progress, complete_tracker) -> None:
                     raise threadHolder.thread.exc
 
                 nAvailable += threadHolder.ppn
-                for device in threadHolder.devices:
+                for device in threadHolder.devices or set():
                     sched.LOAD[device] -= threadHolder.load / threadHolder.ppn
 
                 progress.advance(complete_tracker)
@@ -82,18 +82,21 @@ def join_first_dead_thread(progress, complete_tracker) -> None:
             # Launch Thread
             progress.advance(queue_tracker)
 
+            use_devices = None
             # Use the least loaded devices
-            devices = set()
-            for _ in range(task.ppn):
-                device = min(sched.LOAD.items(), key=lambda x: x[1])[0]
-                sched.LOAD[device] += task.load / task.ppn
+            if devices is not None:
+                use_devices = set()
+                for _ in range(task.ppn):
+                    device = min(sched.LOAD.items(), key=lambda x: x[1])[0]
+                    sched.LOAD[device] += task.load / task.ppn
+                    use_devices.add(device)
             
             nAvailable -= task.ppn
 
-            thread = WorkerThread(target=task.func, args=tuple(task.args) + (devices,))
+            thread = WorkerThread(target=task.func, args=tuple(task.args) + (use_devices,))
             thread.start()
 
-            threads.append(WorkerThreadHolder(thread, task.ppn, task.load, devices))
+            threads.append(WorkerThreadHolder(thread, task.ppn, task.load, use_devices))
 
 
         # Wait for the lasts tests to complete
diff --git a/toolchain/mfc/test/case.py b/toolchain/mfc/test/case.py
index d83927492..6c0af864b 100644
--- a/toolchain/mfc/test/case.py
+++ b/toolchain/mfc/test/case.py
@@ -100,7 +100,11 @@ def __init__(self, trace: str, mods: dict, ppn: int = None) -> None:
         super().__init__({**BASE_CFG.copy(), **mods})
 
     def run(self, targets: typing.List[str], gpus: typing.Set[int]) -> subprocess.CompletedProcess:
-        gpu_select        = f"CUDA_VISIBLE_DEVICES={','.join([str(_) for _ in gpus])}"
+        if gpus is not None and len(gpus) != 0:
+            gpus_select = f"--gpus {' '.join([str(_) for _ in gpus])}"
+        else:
+            gpus_select = ""
+        
         filepath          = f'"{self.get_dirpath()}/case.py"'
         tasks             = f"-n {self.ppn}"
         jobs              = f"-j {ARG('jobs')}"    if ARG("case_optimization")  else ""
@@ -110,8 +114,9 @@ def run(self, targets: typing.List[str], gpus: typing.Set[int]) -> subprocess.Co
         mfc_script = ".\mfc.bat" if os.name == 'nt' else "./mfc.sh"
         
         command: str = f'''\
-            {gpu_select} {mfc_script} run {filepath} {tasks} {binary_option} \
-            {case_optimization} {jobs} -t {' '.join(targets)} 2>&1\
+            {mfc_script} run {filepath} {tasks} {binary_option} \
+            {case_optimization} {jobs} -t {' '.join(targets)} \
+            {gpus_select} 2>&1\
             '''
 
         return subprocess.run(command, stdout=subprocess.PIPE,
diff --git a/toolchain/mfc/test/test.py b/toolchain/mfc/test/test.py
index 7f244e6a1..45a8905b0 100644
--- a/toolchain/mfc/test/test.py
+++ b/toolchain/mfc/test/test.py
@@ -105,8 +105,6 @@ def test():
     cons.print(f" tests/[bold magenta]UUID[/bold magenta]    Summary")
     cons.print()
     
-    _handle_case.GPU_LOAD = { id: 0 for id in ARG("gpus") }
-
     # Select the correct number of threads to use to launch test CASES
     # We can't use ARG("jobs") when the --case-optimization option is set
     # because running a test case may cause it to rebuild, and thus