Skip to content

Commit 146ebbe

Browse files
authored
Merge pull request #2544 from GoogleCloudPlatform/release-candidate
Release v1.33.0
2 parents bec99bb + 7b7378b commit 146ebbe

File tree

198 files changed

+2045
-946
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

198 files changed

+2045
-946
lines changed

cmd/root.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ HPC deployments on the Google Cloud Platform.`,
5252
logging.Fatal("cmd.Help function failed: %s", err)
5353
}
5454
},
55-
Version: "v1.32.1",
55+
Version: "v1.33.0",
5656
Annotations: annotation,
5757
}
5858
)

community/examples/AMD/README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
# AMD solutions for the HPC Toolkit
22

3+
> [!NOTE]
4+
> This document uses Slurm-GCP v5. A newer [blueprint](./hpc-amd-slurm-v6.yaml)
5+
> using v6 has been published. This documentation will be updated and the v5
6+
> blueprint deprecated.
7+
38
## AMD-Optimized Slurm Cluster
49

510
This example provisions a Slurm cluster using the AMD-based Computed Optimized
Lines changed: 227 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,227 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
---
16+
blueprint_name: hpc-amd-slurm-v6
17+
18+
vars:
19+
project_id: ## Set GCP Project ID Here ##
20+
deployment_name: amd-v6
21+
region: us-east4
22+
zone: us-east4-c
23+
24+
deployment_groups:
25+
- group: primary
26+
modules:
27+
- id: network
28+
source: modules/network/vpc
29+
30+
- id: homefs
31+
source: modules/file-system/filestore
32+
use: [network]
33+
settings:
34+
local_mount: /home
35+
36+
- id: swfs
37+
source: modules/file-system/filestore
38+
use: [network]
39+
settings:
40+
local_mount: /sw
41+
42+
- id: spack-setup
43+
source: community/modules/scripts/spack-setup
44+
settings:
45+
install_dir: /sw/spack
46+
47+
- id: spack-execute
48+
source: community/modules/scripts/spack-execute
49+
use: [spack-setup]
50+
settings:
51+
log_file: /var/log/spack.log
52+
data_files:
53+
- destination: /tmp/projections-config.yaml
54+
content: |
55+
modules:
56+
default:
57+
tcl:
58+
hash_length: 0
59+
all:
60+
conflict:
61+
- '{name}'
62+
projections:
63+
all: '{name}/{version}-{compiler.name}-{compiler.version}'
64+
- destination: /tmp/slurm-external-config.yaml
65+
content: |
66+
packages:
67+
slurm:
68+
externals:
69+
- spec: slurm@23-11-3
70+
prefix: /usr/local
71+
buildable: False
72+
- destination: /sw/spack/openfoam_env.yaml
73+
content: |
74+
spack:
75+
definitions:
76+
- compilers:
77+
- gcc@13.1.0
78+
- mpis:
79+
- openmpi@4.1.3+legacylaunchers+pmi fabrics=none schedulers=slurm
80+
- packages:
81+
- flex@2.6.4
82+
- mpi_packages:
83+
- openfoam-org@8 ^flex@2.6.4 target=zen3
84+
specs:
85+
- matrix:
86+
- - $mpis
87+
- - $%compilers
88+
- matrix:
89+
- - $packages
90+
- - $%compilers
91+
- matrix:
92+
- - $mpi_packages
93+
- - $%compilers
94+
- - $^mpis
95+
concretizer:
96+
unify: when_possible
97+
commands: |
98+
spack config --scope defaults add config:build_stage:/opt/spack_build_stage
99+
spack config --scope defaults add -f /tmp/projections-config.yaml
100+
spack config --scope site add -f /tmp/slurm-external-config.yaml
101+
spack config --scope site add concretizer:targets:host_compatible:false
102+
103+
# gcc 12.1.0 is known to have runtime failures with OpenFOAM 8
104+
spack install gcc@13.1.0 %gcc@8.5.0 target=x86_64
105+
spack load gcc@13.1.0 %gcc@8.5.0 target=x86_64
106+
spack compiler find --scope site
107+
108+
if ! spack env list | grep -q openfoam; then
109+
spack env create openfoam /sw/spack/openfoam_env.yaml
110+
spack env activate openfoam
111+
spack concretize
112+
spack install
113+
fi
114+
115+
- id: spack-startup
116+
source: modules/scripts/startup-script
117+
settings:
118+
runners:
119+
- $(spack-execute.spack_runner)
120+
- type: shell
121+
destination: shutdown.sh
122+
content: |
123+
#!/bin/bash
124+
if [ ! -f /etc/block_auto_shutdown ]; then
125+
touch /etc/block_auto_shutdown
126+
shutdown -h +1
127+
fi
128+
129+
- id: slurm_startup
130+
source: modules/scripts/startup-script
131+
settings:
132+
runners:
133+
- $(spack-setup.spack_runner)
134+
# the following installation of AOCC may be automated in the future
135+
# with a clear direction to the user to read the EULA at
136+
# https://developer.amd.com/aocc-compiler-eula/
137+
- type: data
138+
destination: /var/tmp/install_aocc.sh
139+
content: |
140+
#!/bin/bash
141+
source /sw/spack/share/spack/setup-env.sh
142+
spack install aocc@3.2.0 +license-agreed
143+
spack load aocc@3.2.0
144+
spack compiler find --scope site
145+
spack -d install -v openmpi@4.1.3 %aocc@3.2.0 +legacylaunchers +pmi schedulers=slurm
146+
- type: data
147+
destination: /var/tmp/openfoam_test.sh
148+
content: |
149+
#!/bin/bash
150+
# the following line works around a problem activating environments
151+
# before directory is accessed
152+
ls -lha /sw/spack/var/spack/environments/openfoam/ &>/dev/null
153+
spack env activate openfoam
154+
DIR=$HOME/openfoam_test
155+
mkdir -p $DIR
156+
cd $DIR
157+
cp -fr $WM_PROJECT_DIR/tutorials/incompressible/simpleFoam/motorBike .
158+
cd motorBike
159+
./Allrun
160+
161+
- id: spack_builder
162+
source: modules/compute/vm-instance
163+
use: [network, swfs, spack-startup]
164+
settings:
165+
name_prefix: spack-builder
166+
machine_type: c2d-standard-16
167+
instance_image:
168+
# these images must match the images used by Slurm modules below because
169+
# we are building OpenMPI with PMI support in libraries contained in
170+
# Slurm installation
171+
family: slurm-gcp-6-4-hpc-rocky-linux-8
172+
project: schedmd-slurm-public
173+
174+
- id: low_cost_nodeset
175+
source: community/modules/compute/schedmd-slurm-gcp-v6-nodeset
176+
use: [network]
177+
settings:
178+
machine_type: c2d-standard-4
179+
node_count_dynamic_max: 10
180+
bandwidth_tier: gvnic_enabled
181+
enable_placement: false
182+
183+
- id: low_cost_partition
184+
source: community/modules/compute/schedmd-slurm-gcp-v6-partition
185+
use: [low_cost_nodeset]
186+
settings:
187+
partition_name: lowcost
188+
189+
- id: compute_nodeset
190+
source: community/modules/compute/schedmd-slurm-gcp-v6-nodeset
191+
use: [network]
192+
settings:
193+
machine_type: c2d-standard-112
194+
node_count_dynamic_max: 50
195+
bandwidth_tier: gvnic_enabled
196+
enable_placement: true
197+
198+
# Because is_default is set to true, jobs will run on this partition unless an
199+
# alternative partition is specified using, for example, "srun -p lowcost"
200+
- id: compute_partition
201+
source: community/modules/compute/schedmd-slurm-gcp-v6-partition
202+
use: [compute_nodeset]
203+
settings:
204+
partition_name: compute
205+
is_default: true
206+
207+
- id: slurm_login
208+
source: community/modules/scheduler/schedmd-slurm-gcp-v6-login
209+
use: [network]
210+
settings:
211+
# need at least 8 physical cores to run OpenFOAM test
212+
machine_type: c2d-standard-16
213+
name_prefix: login
214+
215+
- id: slurm_controller
216+
source: community/modules/scheduler/schedmd-slurm-gcp-v6-controller
217+
use:
218+
- network
219+
- homefs
220+
- swfs
221+
- low_cost_partition
222+
- compute_partition
223+
- slurm_login
224+
settings:
225+
machine_type: c2d-standard-4
226+
login_startup_script: $(slurm_startup.startup_script)
227+
login_startup_scripts_timeout: 21600

community/examples/hpc-build-slurm-image.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ vars:
2323
image_build_machine_type: n2d-standard-16
2424
build_from_image_family: hpc-rocky-linux-8
2525
build_from_image_project: cloud-hpc-image-public
26-
build_from_git_ref: 6.4.3
26+
build_from_git_ref: 6.4.4
2727
built_image_family: my-custom-slurm
2828
built_instance_image:
2929
family: $(vars.built_image_family)

community/examples/hpc-slurm-gromacs.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ deployment_groups:
110110
use: [network1]
111111
settings:
112112
name_prefix: login
113-
disable_login_public_ips: false
113+
enable_login_public_ips: true
114114

115115
- id: slurm_controller
116116
source: community/modules/scheduler/schedmd-slurm-gcp-v6-controller
@@ -121,6 +121,6 @@ deployment_groups:
121121
- homefs
122122
- appsfs
123123
settings:
124-
disable_controller_public_ips: false
124+
enable_controller_public_ips: true
125125
login_startup_script: $(script.startup_script)
126126
login_startup_scripts_timeout: 21600

community/examples/hpc-slurm-local-ssd-v6.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ deployment_groups:
103103
settings:
104104
name_prefix: login
105105
machine_type: n1-standard-4
106-
disable_login_public_ips: false
106+
enable_login_public_ips: true
107107

108108
- id: slurm_controller
109109
source: community/modules/scheduler/schedmd-slurm-gcp-v6-controller
@@ -116,4 +116,4 @@ deployment_groups:
116116
suspend_timeout: 300
117117
no_comma_params: false
118118
machine_type: n1-standard-4
119-
disable_controller_public_ips: false
119+
enable_controller_public_ips: true

community/examples/hpc-slurm-ramble-gromacs.yaml renamed to community/examples/hpc-slurm-ramble-gromacs-v6.yaml

Lines changed: 26 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright 2022 Google LLC
1+
# Copyright 2024 Google LLC
22
#
33
# Licensed under the Apache License, Version 2.0 (the "License");
44
# you may not use this file except in compliance with the License.
@@ -32,7 +32,7 @@ deployment_groups:
3232
# Source is an embedded module, denoted by "modules/*" without ./, ../, /
3333
# as a prefix. To refer to a local module, prefix with ./, ../ or /
3434
# Example - ./modules/network/vpc
35-
- id: network1
35+
- id: network
3636
source: modules/network/vpc
3737

3838
## Install Scripts
@@ -68,6 +68,7 @@ deployment_groups:
6868
commands: |
6969
ramble workspace create gromacs -c /opt/apps/gromacs_scaling.yaml -t /opt/apps/execute_experiment.tpl
7070
ramble workspace activate gromacs
71+
ramble workspace concretize -f
7172
ramble workspace info -v
7273
7374
- id: login-startup
@@ -81,7 +82,7 @@ deployment_groups:
8182
ramble:
8283
variables:
8384
mpi_command: 'mpirun -n {n_ranks} -ppn {processes_per_node}'
84-
batch_submit: 'sbatch {execute_experiment}'
85+
batch_submit: '{execute_experiment}'
8586
processes_per_node: 30
8687
applications:
8788
gromacs:
@@ -92,7 +93,6 @@ deployment_groups:
9293
variables:
9394
n_nodes: [1, 2, 4, 8, 16]
9495
spack:
95-
concretized: false
9696
packages: {}
9797
environments: {}
9898
- type: data
@@ -109,37 +109,39 @@ deployment_groups:
109109
source: modules/scripts/startup-script
110110
settings:
111111
runners:
112+
- $(spack-install.spack_runner)
112113
- $(ramble-setup.ramble_runner)
113114

114-
- id: compute_node_group
115-
source: community/modules/compute/schedmd-slurm-gcp-v5-node-group
115+
- id: compute_nodeset
116+
source: community/modules/compute/schedmd-slurm-gcp-v6-nodeset
117+
use: [network]
116118
settings:
117119
node_count_dynamic_max: 20
118120
bandwidth_tier: gvnic_enabled
119121

120122
- id: compute_partition
121-
source: community/modules/compute/schedmd-slurm-gcp-v5-partition
122-
use:
123-
- network1
124-
- compute_node_group
123+
source: community/modules/compute/schedmd-slurm-gcp-v6-partition
124+
use: [compute_nodeset]
125125
settings:
126126
partition_name: compute
127127

128-
- id: slurm_controller
129-
source: community/modules/scheduler/schedmd-slurm-gcp-v5-controller
130-
use:
131-
- network1
132-
- compute_partition
133-
- access-startup
128+
- id: slurm_login
129+
source: community/modules/scheduler/schedmd-slurm-gcp-v6-login
130+
use: [network]
134131
settings:
135-
disable_controller_public_ips: false
132+
name_prefix: login
133+
machine_type: n2-standard-4
134+
enable_login_public_ips: true
136135

137-
- id: slurm_login
138-
source: community/modules/scheduler/schedmd-slurm-gcp-v5-login
136+
- id: slurm_controller
137+
source: community/modules/scheduler/schedmd-slurm-gcp-v6-controller
139138
use:
140-
- network1
141-
- slurm_controller
142-
- login-startup
139+
- network
140+
- compute_partition
141+
- slurm_login
143142
settings:
144-
machine_type: n2-standard-4
145-
disable_login_public_ips: false
143+
enable_controller_public_ips: true
144+
login_startup_script: $(login-startup.startup_script)
145+
controller_startup_script: $(access-startup.startup_script)
146+
controller_startup_scripts_timeout: 1000
147+
login_startup_scripts_timeout: 1000

0 commit comments

Comments
 (0)