diff --git a/toolchain/mfc/args.py b/toolchain/mfc/args.py index a7193a6f0..3da807a5b 100644 --- a/toolchain/mfc/args.py +++ b/toolchain/mfc/args.py @@ -66,7 +66,7 @@ def add_common_arguments(p, mask = None): p.add_argument(f"--no-{target.name}", action="store_true", help=f"Do not build the {target.name} dependency. Use the system's instead.") if "g" not in mask: - p.add_argument("-g", "--gpus", nargs="+", type=int, default=[0], help="(GPU) List of GPU #s to use.") + p.add_argument("-g", "--gpus", nargs="+", type=int, default=None, help="(Optional GPU override) List of GPU #s to use (environment default if unspecified).") # === BUILD === add_common_arguments(build, "g") diff --git a/toolchain/mfc/run/engines.py b/toolchain/mfc/run/engines.py index b8c45f772..b6d2f28ec 100644 --- a/toolchain/mfc/run/engines.py +++ b/toolchain/mfc/run/engines.py @@ -147,14 +147,16 @@ def run(self, targets: typing.List[MFCTarget]) -> None: if not ARG("dry_run"): start_time = time.monotonic() + + env = os.environ.copy() + if ARG('gpus') is not None: + env['CUDA_VISIBLE_DEVICES'] = ','.join([str(_) for _ in ARG('gpus')]) + system( self.get_exec_cmd(target), cwd=self.input.case_dirpath, - env={ - **os.environ.copy(), - 'CUDA_VISIBLE_DEVICES': ','.join([str(_) for _ in ARG('gpus')]) - } + env=env ) - end_time = time.monotonic() + end_time = time.monotonic() cons.print(no_indent=True) cons.print(f"[bold green]Done[/bold green] (in {datetime.timedelta(seconds=end_time - start_time)})") @@ -307,11 +309,11 @@ def __create_batch_file(self, system: queues.QueueSystem, targets: typing.List[M cons.print("> Writing batch file...") file_write(filepath, content) - def __execute_batch_file(self, system: queues.QueueSystem): + def __execute_batch_file(self, queue: queues.QueueSystem): # We CD to the case directory before executing the batch file so that # any files the queue system generates (like .err and .out) are created # in the correct directory. - cmd = system.gen_submit_cmd(self.__get_batch_filename()) + cmd = queue.gen_submit_cmd(self.__get_batch_filename()) if system(cmd, cwd=self.__get_batch_dirpath()) != 0: raise MFCException(f"Submitting batch file for {system.name} failed. It can be found here: {self.__get_batch_filepath()}. Please check the file for errors.") diff --git a/toolchain/mfc/sched.py b/toolchain/mfc/sched.py index 71ad990f7..f1258ddab 100644 --- a/toolchain/mfc/sched.py +++ b/toolchain/mfc/sched.py @@ -35,11 +35,11 @@ class Task: load: float -def sched(tasks: typing.List[Task], nThreads: int, devices: typing.Set[int]) -> None: +def sched(tasks: typing.List[Task], nThreads: int, devices: typing.Set[int] = None) -> None: nAvailable: int = nThreads threads: typing.List[WorkerThreadHolder] = [] - sched.LOAD = { id: 0.0 for id in devices } + sched.LOAD = { id: 0.0 for id in devices or [] } def join_first_dead_thread(progress, complete_tracker) -> None: nonlocal threads, nAvailable @@ -50,7 +50,7 @@ def join_first_dead_thread(progress, complete_tracker) -> None: raise threadHolder.thread.exc nAvailable += threadHolder.ppn - for device in threadHolder.devices: + for device in threadHolder.devices or set(): sched.LOAD[device] -= threadHolder.load / threadHolder.ppn progress.advance(complete_tracker) @@ -82,18 +82,21 @@ def join_first_dead_thread(progress, complete_tracker) -> None: # Launch Thread progress.advance(queue_tracker) + use_devices = None # Use the least loaded devices - devices = set() - for _ in range(task.ppn): - device = min(sched.LOAD.items(), key=lambda x: x[1])[0] - sched.LOAD[device] += task.load / task.ppn + if devices is not None: + use_devices = set() + for _ in range(task.ppn): + device = min(sched.LOAD.items(), key=lambda x: x[1])[0] + sched.LOAD[device] += task.load / task.ppn + use_devices.add(device) nAvailable -= task.ppn - thread = WorkerThread(target=task.func, args=tuple(task.args) + (devices,)) + thread = WorkerThread(target=task.func, args=tuple(task.args) + (use_devices,)) thread.start() - threads.append(WorkerThreadHolder(thread, task.ppn, task.load, devices)) + threads.append(WorkerThreadHolder(thread, task.ppn, task.load, use_devices)) # Wait for the lasts tests to complete diff --git a/toolchain/mfc/test/case.py b/toolchain/mfc/test/case.py index d83927492..6c0af864b 100644 --- a/toolchain/mfc/test/case.py +++ b/toolchain/mfc/test/case.py @@ -100,7 +100,11 @@ def __init__(self, trace: str, mods: dict, ppn: int = None) -> None: super().__init__({**BASE_CFG.copy(), **mods}) def run(self, targets: typing.List[str], gpus: typing.Set[int]) -> subprocess.CompletedProcess: - gpu_select = f"CUDA_VISIBLE_DEVICES={','.join([str(_) for _ in gpus])}" + if gpus is not None and len(gpus) != 0: + gpus_select = f"--gpus {' '.join([str(_) for _ in gpus])}" + else: + gpus_select = "" + filepath = f'"{self.get_dirpath()}/case.py"' tasks = f"-n {self.ppn}" jobs = f"-j {ARG('jobs')}" if ARG("case_optimization") else "" @@ -110,8 +114,9 @@ def run(self, targets: typing.List[str], gpus: typing.Set[int]) -> subprocess.Co mfc_script = ".\mfc.bat" if os.name == 'nt' else "./mfc.sh" command: str = f'''\ - {gpu_select} {mfc_script} run {filepath} {tasks} {binary_option} \ - {case_optimization} {jobs} -t {' '.join(targets)} 2>&1\ + {mfc_script} run {filepath} {tasks} {binary_option} \ + {case_optimization} {jobs} -t {' '.join(targets)} \ + {gpus_select} 2>&1\ ''' return subprocess.run(command, stdout=subprocess.PIPE, diff --git a/toolchain/mfc/test/test.py b/toolchain/mfc/test/test.py index 7f244e6a1..45a8905b0 100644 --- a/toolchain/mfc/test/test.py +++ b/toolchain/mfc/test/test.py @@ -105,8 +105,6 @@ def test(): cons.print(f" tests/[bold magenta]UUID[/bold magenta] Summary") cons.print() - _handle_case.GPU_LOAD = { id: 0 for id in ARG("gpus") } - # Select the correct number of threads to use to launch test CASES # We can't use ARG("jobs") when the --case-optimization option is set # because running a test case may cause it to rebuild, and thus