From d665f05e83874551bb6cd0cf02faffc03db6d484 Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Mon, 20 May 2024 17:36:54 -0400 Subject: [PATCH 1/7] Support Andes. --- doc/src/clusters/built-in.md | 7 +++++++ doc/src/guide/tutorial/submit.md | 1 + src/builtin.rs | 24 ++++++++++++++++++++++-- validate/validate.py | 9 +++++---- 4 files changed, 35 insertions(+), 6 deletions(-) diff --git a/doc/src/clusters/built-in.md b/doc/src/clusters/built-in.md index 06a3477..fd9a860 100644 --- a/doc/src/clusters/built-in.md +++ b/doc/src/clusters/built-in.md @@ -2,6 +2,13 @@ **Row** includes built-in support for the following clusters. +## Andes (OLCF) + +**Row automatically selects from the following partitions on [Andes]: +* `batch` + +> Note: Andes has no shared partition. All jobs must use 32 CPUs per node. + ## Anvil (Purdue) **Row** automatically selects from the following partitions on [Anvil]: diff --git a/doc/src/guide/tutorial/submit.md b/doc/src/guide/tutorial/submit.md index 515fe55..d56453f 100644 --- a/doc/src/guide/tutorial/submit.md +++ b/doc/src/guide/tutorial/submit.md @@ -7,6 +7,7 @@ This section explains how to **submit** jobs to the **scheduler** with **row**. ## Preliminary steps **Row** has built-in support for a number of [clusters](../../clusters/built-in.md): +* Andes (OLCF) * Anvil (Purdue) * Delta (NCSA) * Great Lakes (University of Michigan) diff --git a/src/builtin.rs b/src/builtin.rs index dbebbb0..39f3cc3 100644 --- a/src/builtin.rs +++ b/src/builtin.rs @@ -65,6 +65,26 @@ impl BuiltIn for launcher::Configuration { } } +fn andes() -> Cluster { + //////////////////////////////////////////////////////////////////////////////////////// + // OLCF Andes + Cluster { + name: "andes".into(), + identify: IdentificationMethod::ByEnvironment("LMOD_SYSTEM_NAME".into(), "andes".into()), + scheduler: SchedulerType::Slurm, + partition: vec![ + // Auto-detected partitions: batch + Partition { + name: "batch".into(), + maximum_gpus_per_job: Some(0), + require_cpus_multiple_of: Some(32), + cpus_per_node: Some(32), + ..Partition::default() + }, + ], + } +} + fn anvil() -> Cluster { //////////////////////////////////////////////////////////////////////////////////////// // Purdue Anvil @@ -250,7 +270,7 @@ fn greatlakes() -> Cluster { } } -// TODO: Add/test Frontier and Andes. +// TODO: Add/test Frontier. fn none() -> Cluster { // Fallback none cluster. @@ -267,7 +287,7 @@ fn none() -> Cluster { impl BuiltIn for cluster::Configuration { fn built_in() -> Self { - let cluster = vec![anvil(), delta(), greatlakes(), none()]; + let cluster = vec![andes(), anvil(), delta(), greatlakes(), none()]; cluster::Configuration { cluster } } diff --git a/validate/validate.py b/validate/validate.py index e20a38f..111a8bd 100644 --- a/validate/validate.py +++ b/validate/validate.py @@ -47,6 +47,7 @@ 'greatlakes': Cluster(cpus_per_node=36, gpus_per_node=2, gpu_arch='nvidia'), 'anvil': Cluster(cpus_per_node=128, gpus_per_node=0, gpu_arch='nvidia'), 'delta': Cluster(cpus_per_node=128, gpus_per_node=4, gpu_arch='nvidia'), + 'andes': Cluster(cpus_per_node=32, gpus_per_node=0, gpu_arch='none', no_shared=True), } N_THREADS = 4 @@ -132,7 +133,7 @@ def init(account, setup): """) ) - if cluster.cpus_per_node >= 1: + if cluster.cpus_per_node >= 1 and not cluster.get('no_shared', False): workflow.write( textwrap.dedent(""" [[action]] @@ -145,7 +146,7 @@ def init(account, setup): """) ) - if cluster.cpus_per_node >= N_THREADS: + if cluster.cpus_per_node >= N_THREADS and not cluster.get('no_shared', False): workflow.write( textwrap.dedent(f""" [[action]] @@ -159,7 +160,7 @@ def init(account, setup): """) ) - if cluster.cpus_per_node >= N_PROCESSES: + if cluster.cpus_per_node >= N_PROCESSES and not cluster.get('no_shared', False): workflow.write( textwrap.dedent(f""" [[action]] @@ -173,7 +174,7 @@ def init(account, setup): """) ) - if cluster.cpus_per_node >= N_PROCESSES * N_THREADS: + if cluster.cpus_per_node >= N_PROCESSES * N_THREADS and not cluster.get('no_shared', False): workflow.write( textwrap.dedent(f""" [[action]] From f41df59173674f98efaf3a48009886348fb81098 Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Mon, 20 May 2024 18:02:59 -0400 Subject: [PATCH 2/7] Fix validate script on Andes. --- validate/validate.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/validate/validate.py b/validate/validate.py index 111a8bd..245198f 100644 --- a/validate/validate.py +++ b/validate/validate.py @@ -42,12 +42,12 @@ # Set the number of cpus and gpus per node in the *default* partitions that row selects. # Testing non-default partitions is beyond the scope of this script. Set to 0 to prevent # CPU and/or GPU jobs from executing. -Cluster = collections.namedtuple('Cluster', ['cpus_per_node', 'gpus_per_node', 'gpu_arch']) +Cluster = collections.namedtuple('Cluster', ('cpus_per_node', 'gpus_per_node', 'gpu_arch', 'has_shared'), defaults=(None, None, 'nvidia', True)) CLUSTERS = { 'greatlakes': Cluster(cpus_per_node=36, gpus_per_node=2, gpu_arch='nvidia'), 'anvil': Cluster(cpus_per_node=128, gpus_per_node=0, gpu_arch='nvidia'), 'delta': Cluster(cpus_per_node=128, gpus_per_node=4, gpu_arch='nvidia'), - 'andes': Cluster(cpus_per_node=32, gpus_per_node=0, gpu_arch='none', no_shared=True), + 'andes': Cluster(cpus_per_node=32, gpus_per_node=0, gpu_arch='none', has_shared=False), } N_THREADS = 4 @@ -115,7 +115,7 @@ def init(account, setup): [workspace] path = "{cluster_name}" - [submit_options.{cluster_name}] + [default.action.submit_options.{cluster_name}] """) ) @@ -133,7 +133,7 @@ def init(account, setup): """) ) - if cluster.cpus_per_node >= 1 and not cluster.get('no_shared', False): + if cluster.cpus_per_node >= 1 and cluster.has_shared: workflow.write( textwrap.dedent(""" [[action]] @@ -146,7 +146,7 @@ def init(account, setup): """) ) - if cluster.cpus_per_node >= N_THREADS and not cluster.get('no_shared', False): + if cluster.cpus_per_node >= N_THREADS and cluster.has_shared: workflow.write( textwrap.dedent(f""" [[action]] @@ -160,7 +160,7 @@ def init(account, setup): """) ) - if cluster.cpus_per_node >= N_PROCESSES and not cluster.get('no_shared', False): + if cluster.cpus_per_node >= N_PROCESSES and cluster.has_shared: workflow.write( textwrap.dedent(f""" [[action]] @@ -174,7 +174,7 @@ def init(account, setup): """) ) - if cluster.cpus_per_node >= N_PROCESSES * N_THREADS and not cluster.get('no_shared', False): + if cluster.cpus_per_node >= N_PROCESSES * N_THREADS and cluster.has_shared: workflow.write( textwrap.dedent(f""" [[action]] From a1da112d373f5f80172b840904474d5f3a4a46b4 Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Tue, 21 May 2024 08:37:16 -0400 Subject: [PATCH 3/7] Add Frontier support. --- doc/src/clusters/built-in.md | 12 ++++++ doc/src/guide/tutorial/submit.md | 1 + src/builtin.rs | 22 ++++++++++- validate/validate.py | 66 +++++++++++++++++++++++++++++--- 4 files changed, 94 insertions(+), 7 deletions(-) diff --git a/doc/src/clusters/built-in.md b/doc/src/clusters/built-in.md index fd9a860..8ebd3e1 100644 --- a/doc/src/clusters/built-in.md +++ b/doc/src/clusters/built-in.md @@ -9,6 +9,8 @@ > Note: Andes has no shared partition. All jobs must use 32 CPUs per node. +[Andes]: https://docs.olcf.ornl.gov/systems/andes_user_guide.html + ## Anvil (Purdue) **Row** automatically selects from the following partitions on [Anvil]: @@ -40,6 +42,16 @@ allows full-node jobs and does not incur extra charges. [Delta]: https://docs.ncsa.illinois.edu/systems/delta +## Frontier (OLCF) + +**Row automatically selects from the following partitions on [Frontier]: +* `batch` + +> Note: Frontier has no shared partition. All jobs must use 8 GPUs per node. + +[Frontier]: https://docs.olcf.ornl.gov/systems/frontier_user_guide.html# + + ## Great Lakes (University of Michigan) **Row** automatically selects from the following partitions on [Great Lakes]: diff --git a/doc/src/guide/tutorial/submit.md b/doc/src/guide/tutorial/submit.md index d56453f..12eb4a0 100644 --- a/doc/src/guide/tutorial/submit.md +++ b/doc/src/guide/tutorial/submit.md @@ -10,6 +10,7 @@ This section explains how to **submit** jobs to the **scheduler** with **row**. * Andes (OLCF) * Anvil (Purdue) * Delta (NCSA) +* Frontier (OLCF) * Great Lakes (University of Michigan) You can skip to the [next heading](#checking-your-job-script) if you are using one of diff --git a/src/builtin.rs b/src/builtin.rs index 39f3cc3..0c6ed30 100644 --- a/src/builtin.rs +++ b/src/builtin.rs @@ -199,6 +199,26 @@ fn delta() -> Cluster { } } +fn frontier() -> Cluster { + //////////////////////////////////////////////////////////////////////////////////////// + // OLCF Frontier + Cluster { + name: "frontier".into(), + identify: IdentificationMethod::ByEnvironment("LMOD_SYSTEM_NAME".into(), "frontier".into()), + scheduler: SchedulerType::Slurm, + partition: vec![ + // Auto-detected partitions: batch + Partition { + name: "batch".into(), + minimum_gpus_per_job: Some(8), + require_gpus_multiple_of: Some(8), + gpus_per_node: Some(8), + ..Partition::default() + }, + ], + } +} + fn greatlakes() -> Cluster { //////////////////////////////////////////////////////////////////////////////////////// // Great Lakes @@ -287,7 +307,7 @@ fn none() -> Cluster { impl BuiltIn for cluster::Configuration { fn built_in() -> Self { - let cluster = vec![andes(), anvil(), delta(), greatlakes(), none()]; + let cluster = vec![andes(), anvil(), delta(), frontier(), greatlakes(), none()]; cluster::Configuration { cluster } } diff --git a/validate/validate.py b/validate/validate.py index 245198f..1d25070 100644 --- a/validate/validate.py +++ b/validate/validate.py @@ -42,12 +42,17 @@ # Set the number of cpus and gpus per node in the *default* partitions that row selects. # Testing non-default partitions is beyond the scope of this script. Set to 0 to prevent # CPU and/or GPU jobs from executing. -Cluster = collections.namedtuple('Cluster', ('cpus_per_node', 'gpus_per_node', 'gpu_arch', 'has_shared'), defaults=(None, None, 'nvidia', True)) +Cluster = collections.namedtuple( + 'Cluster', + ('cpus_per_node', 'gpus_per_node', 'gpu_arch', 'has_shared'), + defaults=(None, None, 'nvidia', True), +) CLUSTERS = { - 'greatlakes': Cluster(cpus_per_node=36, gpus_per_node=2, gpu_arch='nvidia'), + 'andes': Cluster(cpus_per_node=32, gpus_per_node=0, gpu_arch='none', has_shared=False), 'anvil': Cluster(cpus_per_node=128, gpus_per_node=0, gpu_arch='nvidia'), 'delta': Cluster(cpus_per_node=128, gpus_per_node=4, gpu_arch='nvidia'), - 'andes': Cluster(cpus_per_node=32, gpus_per_node=0, gpu_arch='none', has_shared=False), + 'frontier': Cluster(cpus_per_node=0, gpus_per_node=8, gpu_arch='amd', has_shared=False), + 'greatlakes': Cluster(cpus_per_node=36, gpus_per_node=2, gpu_arch='nvidia'), } N_THREADS = 4 @@ -94,6 +99,24 @@ def get_nvidia_gpus(): return gpus +def get_amd_gpus(): + """Get the assigned AMD GPUs.""" + result = subprocess.run( + ['rocm-smi', '--showuniqueid'], capture_output=True, check=True, text=True + ) + + gpus = [] + pattern = re.compile(r'.*\(Unique ID: (.*)$') + + # TODO: Do we need to parse ROCR_VISIBLE_DEVICES and match GPU[id] lines? + for line in result.stdout.splitlines(): + match = pattern.match(line) + + gpus.append(match.group(1)) + + return gpus + + def init(account, setup): """Initialize the project.""" cluster_name = get_cluster_name() @@ -218,7 +241,7 @@ def init(account, setup): """) ) - if cluster.gpus_per_node >= 1 and cluster.gpu_arch == 'nvidia': + if cluster.gpus_per_node >= 1 and cluster.gpu_arch == 'nvidia' and cluster.has_shared: workflow.write( textwrap.dedent(""" [[action]] @@ -232,7 +255,7 @@ def init(account, setup): """) ) - if cluster.gpus_per_node >= N_GPUS and cluster.gpu_arch == 'nvidia': + if cluster.gpus_per_node >= N_GPUS and cluster.gpu_arch == 'nvidia' and cluster.has_shared: workflow.write( textwrap.dedent(f""" [[action]] @@ -246,7 +269,7 @@ def init(account, setup): """) ) - if cluster.gpus_per_node >= 1 and cluster.gpu_arch == 'nvidia': + if cluster.gpus_per_node >= 1 and cluster.gpu_arch == 'nvidia' and cluster.has_shared: workflow.write( textwrap.dedent(f""" [[action]] @@ -261,6 +284,20 @@ def init(account, setup): """) ) + if cluster.gpus_per_node >= 1 and cluster.gpu_arch == 'amd' and : + workflow.write( + textwrap.dedent(f""" + [[action]] + name = "mpi_wholenode_amd_gpus" + command = "python validate.py execute mpi_wholenode_amd_gpus {{directory}}" + products = ["mpi_wholenode_amd_gpus.out"] + launchers = ["mpi"] + [action.resources] + processes.per_submission = {cluster.gpus_per_node} + walltime.per_submission = "00:05:00" + """) + ) + def serial(directory): """Validate serial jobs.""" @@ -327,6 +364,8 @@ def check_mpi(directory, n_processes, n_threads, n_hosts, name, n_gpus=0, gpu_ar gpus = [] if n_gpus > 0 and gpu_arch == 'nvidia': gpus = comm.gather(get_nvidia_gpus(), root=0) + if n_gpus > 0 and gpu_arch == 'amd': + gpus = comm.gather(get_amd_gpus(), root=0) if comm.Get_rank() == 0: cpuset_sizes = [len(s) for s in cpusets] @@ -463,6 +502,21 @@ def nvidia_gpus(directory): """Validate multi-GPU jobs.""" check_nvidia_gpu(directory, n_gpus=N_GPUS, name='nvidia_gpus') +def mpi_wholenode_amd_gpus(directory): + """Check that MPI allocates processes correctly to all AMD GPUs on one node.""" + cluster_name = get_cluster_name() + cluster = CLUSTERS.get(cluster_name) + + check_mpi( + directory, + n_processes=cluster.gpus_per_node * N_NODES, + n_threads=1, + n_hosts=1, + name='mpi_wholenode_amd_gpus', + n_gpus=1, + gpu_arch='amd', + ) + if __name__ == '__main__': # Parse the command line arguments: From fb5dc20158faf727fbe28243b1b6d6b34fbb48eb Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Tue, 21 May 2024 09:24:46 -0400 Subject: [PATCH 4/7] Update frontier tests. --- validate/validate.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/validate/validate.py b/validate/validate.py index 1d25070..dadcd54 100644 --- a/validate/validate.py +++ b/validate/validate.py @@ -106,13 +106,15 @@ def get_amd_gpus(): ) gpus = [] - pattern = re.compile(r'.*\(Unique ID: (.*)$') + pattern = re.compile(r'.*Unique ID: (.*)$') # TODO: Do we need to parse ROCR_VISIBLE_DEVICES and match GPU[id] lines? for line in result.stdout.splitlines(): + print(line) match = pattern.match(line) - gpus.append(match.group(1)) + if match: + gpus.append(match.group(1)) return gpus @@ -284,7 +286,7 @@ def init(account, setup): """) ) - if cluster.gpus_per_node >= 1 and cluster.gpu_arch == 'amd' and : + if cluster.gpus_per_node >= 1 and cluster.gpu_arch == 'amd': workflow.write( textwrap.dedent(f""" [[action]] @@ -294,6 +296,7 @@ def init(account, setup): launchers = ["mpi"] [action.resources] processes.per_submission = {cluster.gpus_per_node} + gpus_per_process = 1 walltime.per_submission = "00:05:00" """) ) From 3575fe03aca9de48104260c23b79682d36e39072 Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Tue, 21 May 2024 10:12:05 -0400 Subject: [PATCH 5/7] Fix frontier validation. --- validate/validate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validate/validate.py b/validate/validate.py index dadcd54..447880c 100644 --- a/validate/validate.py +++ b/validate/validate.py @@ -512,7 +512,7 @@ def mpi_wholenode_amd_gpus(directory): check_mpi( directory, - n_processes=cluster.gpus_per_node * N_NODES, + n_processes=cluster.gpus_per_node, n_threads=1, n_hosts=1, name='mpi_wholenode_amd_gpus', From c3b841958fffe9f81c4e36ea326a619befd56929 Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Tue, 21 May 2024 10:13:29 -0400 Subject: [PATCH 6/7] Remove TODO. --- src/builtin.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/builtin.rs b/src/builtin.rs index 0c6ed30..5235a90 100644 --- a/src/builtin.rs +++ b/src/builtin.rs @@ -290,8 +290,6 @@ fn greatlakes() -> Cluster { } } -// TODO: Add/test Frontier. - fn none() -> Cluster { // Fallback none cluster. Cluster { From 3951f07496a131fc1286ac47cc6b258ea71010e6 Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Tue, 21 May 2024 14:49:43 -0400 Subject: [PATCH 7/7] Run pre-commit. --- validate/validate.py | 1 + 1 file changed, 1 insertion(+) diff --git a/validate/validate.py b/validate/validate.py index 447880c..da6db40 100644 --- a/validate/validate.py +++ b/validate/validate.py @@ -505,6 +505,7 @@ def nvidia_gpus(directory): """Validate multi-GPU jobs.""" check_nvidia_gpu(directory, n_gpus=N_GPUS, name='nvidia_gpus') + def mpi_wholenode_amd_gpus(directory): """Check that MPI allocates processes correctly to all AMD GPUs on one node.""" cluster_name = get_cluster_name()