diff --git a/rose-stem/app/lfric_atm/file/file_def_diags_flexchem.xml b/rose-stem/app/lfric_atm/file/file_def_diags_flexchem.xml index a59d27821..7ffa3a145 100644 --- a/rose-stem/app/lfric_atm/file/file_def_diags_flexchem.xml +++ b/rose-stem/app/lfric_atm/file/file_def_diags_flexchem.xml @@ -1,7 +1,7 @@ - + diff --git a/rose-stem/app/lfric_atm/file/file_def_diags_gal_clim.xml b/rose-stem/app/lfric_atm/file/file_def_diags_gal_clim.xml index 495721086..c84243062 100644 --- a/rose-stem/app/lfric_atm/file/file_def_diags_gal_clim.xml +++ b/rose-stem/app/lfric_atm/file/file_def_diags_gal_clim.xml @@ -1,7 +1,7 @@ - + diff --git a/rose-stem/app/lfric_atm/file/file_def_diags_gal_clim_chem.xml b/rose-stem/app/lfric_atm/file/file_def_diags_gal_clim_chem.xml index c1d628a90..45ef77ea2 100644 --- a/rose-stem/app/lfric_atm/file/file_def_diags_gal_clim_chem.xml +++ b/rose-stem/app/lfric_atm/file/file_def_diags_gal_clim_chem.xml @@ -1,7 +1,7 @@ - + diff --git a/rose-stem/app/lfric_atm/file/file_def_diags_gal_nwp.xml b/rose-stem/app/lfric_atm/file/file_def_diags_gal_nwp.xml index 1ba9df43e..6b88af04e 100644 --- a/rose-stem/app/lfric_atm/file/file_def_diags_gal_nwp.xml +++ b/rose-stem/app/lfric_atm/file/file_def_diags_gal_nwp.xml @@ -1,7 +1,7 @@ - + diff --git a/rose-stem/app/lfric_atm/file/file_def_diags_idealised.xml b/rose-stem/app/lfric_atm/file/file_def_diags_idealised.xml index 7887a0d2a..76e92d218 100644 --- a/rose-stem/app/lfric_atm/file/file_def_diags_idealised.xml +++ b/rose-stem/app/lfric_atm/file/file_def_diags_idealised.xml @@ -1,7 +1,7 @@ - + diff --git a/rose-stem/app/lfric_atm/file/file_def_diags_idealised1.xml b/rose-stem/app/lfric_atm/file/file_def_diags_idealised1.xml index c0d16815f..089f2a300 100644 --- a/rose-stem/app/lfric_atm/file/file_def_diags_idealised1.xml +++ b/rose-stem/app/lfric_atm/file/file_def_diags_idealised1.xml @@ -1,7 +1,7 @@ - + diff --git a/rose-stem/app/lfric_atm/file/file_def_diags_ls_and_jedi.xml b/rose-stem/app/lfric_atm/file/file_def_diags_ls_and_jedi.xml index e4bbc8b2d..e45ce213d 100644 --- a/rose-stem/app/lfric_atm/file/file_def_diags_ls_and_jedi.xml +++ b/rose-stem/app/lfric_atm/file/file_def_diags_ls_and_jedi.xml @@ -1,7 +1,7 @@ - + diff --git a/rose-stem/app/lfric_atm/file/file_def_diags_name.xml b/rose-stem/app/lfric_atm/file/file_def_diags_name.xml index 1c25a9f43..fbe16d0e1 100644 --- a/rose-stem/app/lfric_atm/file/file_def_diags_name.xml +++ b/rose-stem/app/lfric_atm/file/file_def_diags_name.xml @@ -1,7 +1,7 @@ - + diff --git a/rose-stem/app/lfric_atm/file/file_def_diags_oper_nwp_gl.xml b/rose-stem/app/lfric_atm/file/file_def_diags_oper_nwp_gl.xml index 661368f12..cfbe24d38 100644 --- a/rose-stem/app/lfric_atm/file/file_def_diags_oper_nwp_gl.xml +++ b/rose-stem/app/lfric_atm/file/file_def_diags_oper_nwp_gl.xml @@ -1,7 +1,14 @@ + + + + + + + - + @@ -132,7 +139,7 @@ - + @@ -194,7 +201,7 @@ - + @@ -231,7 +238,7 @@ - + diff --git a/rose-stem/app/lfric_atm/file/file_def_diags_ral.xml b/rose-stem/app/lfric_atm/file/file_def_diags_ral.xml index 34951193f..baf6aa3ca 100644 --- a/rose-stem/app/lfric_atm/file/file_def_diags_ral.xml +++ b/rose-stem/app/lfric_atm/file/file_def_diags_ral.xml @@ -1,7 +1,7 @@ - + diff --git a/rose-stem/app/lfric_atm/file/file_def_diags_ral_ls_and_jedi.xml b/rose-stem/app/lfric_atm/file/file_def_diags_ral_ls_and_jedi.xml index affa03622..28457a494 100644 --- a/rose-stem/app/lfric_atm/file/file_def_diags_ral_ls_and_jedi.xml +++ b/rose-stem/app/lfric_atm/file/file_def_diags_ral_ls_and_jedi.xml @@ -1,7 +1,7 @@ - + diff --git a/rose-stem/app/lfric_atm/file/file_def_diags_var_ops.xml b/rose-stem/app/lfric_atm/file/file_def_diags_var_ops.xml index fc0cfc9c7..4d49c665b 100644 --- a/rose-stem/app/lfric_atm/file/file_def_diags_var_ops.xml +++ b/rose-stem/app/lfric_atm/file/file_def_diags_var_ops.xml @@ -2,7 +2,7 @@ - + @@ -29,7 +29,7 @@ - + @@ -45,7 +45,7 @@ - + @@ -89,7 +89,7 @@ - + diff --git a/rose-stem/app/lfric_atm/file/file_def_diags_ver.xml b/rose-stem/app/lfric_atm/file/file_def_diags_ver.xml index 206cd0278..5e28ad32e 100644 --- a/rose-stem/app/lfric_atm/file/file_def_diags_ver.xml +++ b/rose-stem/app/lfric_atm/file/file_def_diags_ver.xml @@ -1,7 +1,7 @@ - + @@ -35,7 +35,7 @@ - + diff --git a/rose-stem/app/lfric_atm/file/iodef_gal_nwp_hres.xml b/rose-stem/app/lfric_atm/file/iodef_gal_nwp_hres.xml index 1cb914b2b..df5e2ad13 100644 --- a/rose-stem/app/lfric_atm/file/iodef_gal_nwp_hres.xml +++ b/rose-stem/app/lfric_atm/file/iodef_gal_nwp_hres.xml @@ -98,6 +98,9 @@ performance 1.0 + + + 10485760 diff --git a/rose-stem/app/lfric_atm/file/iodef_gal_nwp_oper_hres.xml b/rose-stem/app/lfric_atm/file/iodef_gal_nwp_oper_hres.xml new file mode 100644 index 000000000..881c26f5d --- /dev/null +++ b/rose-stem/app/lfric_atm/file/iodef_gal_nwp_oper_hres.xml @@ -0,0 +1,114 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + performance + + + 10485760 + + + + true + 1 + false + + + + + diff --git a/rose-stem/app/lfric_atm/opt/rose-app-mg3.conf b/rose-stem/app/lfric_atm/opt/rose-app-mg3.conf new file mode 100644 index 000000000..ce1c2df6e --- /dev/null +++ b/rose-stem/app/lfric_atm/opt/rose-app-mg3.conf @@ -0,0 +1,3 @@ +[namelist:multigrid] +chain_mesh_tags='dynamics','multigrid_l1','multigrid_l2' +multigrid_chain_nitems=3 diff --git a/rose-stem/app/lfric_atm/opt/rose-app-oper_diags_hres.conf b/rose-stem/app/lfric_atm/opt/rose-app-oper_diags_hres.conf new file mode 100644 index 000000000..f999b8f91 --- /dev/null +++ b/rose-stem/app/lfric_atm/opt/rose-app-oper_diags_hres.conf @@ -0,0 +1,2 @@ +[file:iodef.xml] +source=$ROSE_SUITE_DIR/app/lfric_atm/file/iodef_gal_nwp_oper_hres.xml diff --git a/rose-stem/app/memory_profile/bin/plot_memory_ex1a.py b/rose-stem/app/memory_profile/bin/plot_memory_ex1a.py index c3cf079ef..70a95c91a 100755 --- a/rose-stem/app/memory_profile/bin/plot_memory_ex1a.py +++ b/rose-stem/app/memory_profile/bin/plot_memory_ex1a.py @@ -84,6 +84,11 @@ def plot_run_job(run, out_filename): if matcher: mpiprocs = int(matcher.group(2)) + if not os.path.exists(os.path.join(run, 'job.out')): + time.sleep(5) + if not os.path.exists(os.path.join(run, 'job.out')): + raise FileNotFoundError(f'{run}/job.out does not exist') + with open(os.path.join(run, 'job.out'), encoding="utf-8") as jof: jofr = jof.read() # re pattern to find max memory usage report by node diff --git a/rose-stem/app/plot/bin/baroclinic.py b/rose-stem/app/plot/bin/baroclinic.py index 305454901..56d05a32e 100755 --- a/rose-stem/app/plot/bin/baroclinic.py +++ b/rose-stem/app/plot/bin/baroclinic.py @@ -88,7 +88,7 @@ def make_figures(filein, plotpath, fields, vertical_spacing, formulation): else: combined_fields = [field] - interp_fig = plt.figure(figsize=(20, 10)) + fig1, ax = plt.subplots(figsize=(20, 10)) for cfield in combined_fields: cube = read_ugrid_data(filein, cfield) @@ -166,63 +166,58 @@ def make_figures(filein, plotpath, fields, vertical_spacing, formulation): kappa = rd/1005.0 plot_data[:, :, level] = 0.01*fi**(1.0/kappa) * p0 - nplots = 1 - nxplots = 1 - nyplots = 1 - - for iplot in range(nplots): - ax = interp_fig.add_subplot(nxplots, nyplots, iplot+1) - level = iplot - if (cfield == 'm_v' or cfield == 'm_cl'): - # Plot level 10 for mositure fields - level = 10 - cmap = magma.reversed() - else: - cmap = magma - ys = np.tile(yi, (n_levs, 1)) + if (cfield == 'm_v' or cfield == 'm_cl'): + # Plot level 10 for mositure fields + level = 10 + cmap = magma.reversed() + else: + level = 0 + cmap = magma + ys = np.tile(yi, (n_levs, 1)) - if direction == 'xz': - lon, height = np.meshgrid(xi, zi) - CS = plt.contourf(lon, height, - plot_data[:, plot_lat, :].T, - levels=levels, cmap=cmap) - plt.colorbar(cmap=cmap) - CL = plt.contour(lat, height, - plot_data[:, plot_lat, :].T, - levels=levels, linewidths=0.5, - colors='k') - plt.title(['lat = ', yi[plot_lat]*360./np.real(nx)]) - if direction == 'yz': - lat, height = np.meshgrid(yi, zi) - CS = plt.contourf(lat, height, - plot_data[:, plot_long, :].T, - levels=levels, cmap=cmap) - plt.colorbar(cmap=cmap) - CL = plt.contour(lat, height, - plot_data[:, plot_long, :].T, - levels=levels, linewidths=0.5, - colors='k') - plt.title(['long = ', xi[plot_long]*360./np.real(nx)]) - if direction == 'xy': - lat, lon = np.meshgrid(yi, xi) - if cfield == 'exner' and iplot == 0: - # Extrapolate data to the surface - dz = plot_data[:, :, 0] + (zi_f[0] - zi_h[0]) * \ + if direction == 'xz': + lon, height = np.meshgrid(xi, zi) + CS = ax.contourf(lon, height, + plot_data[:, plot_lat, :].T, + levels=levels, cmap=cmap) + fig1.colorbar(CS, cmap=cmap) + CL = ax.contour(lat, height, + plot_data[:, plot_lat, :].T, + levels=levels, linewidths=0.5, + colors='k') + ax.set_title(['lat = ', yi[plot_lat]*360./np.real(nx)]) + if direction == 'yz': + lat, height = np.meshgrid(yi, zi) + CS = ax.contourf(lat, height, + plot_data[:, plot_long, :].T, + levels=levels, cmap=cmap) + fig1.colorbar(CS, cmap=cmap) + CL = ax.contour(lat, height, + plot_data[:, plot_long, :].T, + levels=levels, linewidths=0.5, + colors='k') + ax.set_title(['long = ', xi[plot_long]*360./np.real(nx)]) + if direction == 'xy': + lat, lon = np.meshgrid(yi, xi) + if cfield == 'exner': + # Extrapolate data to the surface + dz = plot_data[:, :, 0] + (zi_f[0] - zi_h[0]) * \ (plot_data[:, :, 0] - plot_data[:, :, level]) \ / (zi_h[0] - zi_h[1]) - else: - dz = plot_data[:, :, level] - if cfield != 'exner': - CS = plt.contourf(lon, lat, - plot_data[:, :, level].T, - levels=levels, cmap=cmap) - plt.colorbar(cmap=cmap) - if cfield != 'theta': - CL = plt.contour(lon, lat, dz.T, levels=levels, - linewidths=1.0, colors='k') - plt.clabel(CL, CL.levels[1::2], fontsize=15, - inline=1, fmt='%3.1f') + else: + dz = plot_data[:, :, level] + + if cfield != 'exner': + CS = ax.contourf(lon, lat, + plot_data[:, :, level].T, + levels=levels, cmap=cmap) + fig1.colorbar(CS, cmap=cmap) + if cfield != 'theta': + CL = ax.contour(lon, lat, dz.T, levels=levels, + linewidths=1.0, colors='k') + ax.clabel(CL, CL.levels[1::2], fontsize=15, + inline=1, fmt='%3.1f') pngfile = '%s/baroclinic_plot-%s-time%s-%s.png' % \ (plotpath, cfield, time[t], direction) diff --git a/rose-stem/app/test_launch-exe/bin/test_launch_exe_configuration_meto.py b/rose-stem/app/test_launch-exe/bin/test_launch_exe_configuration_meto.py index d77f3275f..87a7ab265 100755 --- a/rose-stem/app/test_launch-exe/bin/test_launch_exe_configuration_meto.py +++ b/rose-stem/app/test_launch-exe/bin/test_launch_exe_configuration_meto.py @@ -637,7 +637,7 @@ def test_run_lfric_atm_nwp_gal9_C224_MG_ex1a_cce_fast_debug_64bit_rbl32(self, mo monkeypatch.setenv("BIN_DIR", "$OUTPUT_ROOT/bin/lfric_atm/cce_fast-debug-64bit-rbl32") monkeypatch.setenv("EXEC_NAME", "lfric_atm") monkeypatch.setenv("PAT_EXE_EXTEN", "") - monkeypatch.setenv("TEST_LAUNCH_EXE_EXEC", "mpiexec --cpu-bind=depth --np 1176 --depth 1 --ppn 128 $OUTPUT_ROOT/bin/lfric_atm/cce_fast-debug-64bit-rbl32/lfric_atm configuration.nml : --cpu-bind=depth --np 16 --ppn 4 xios_server.exe") + monkeypatch.setenv("TEST_LAUNCH_EXE_EXEC", "mpiexec --cpu-bind=depth --np 1176 --depth 1 --ppn 128 $OUTPUT_ROOT/bin/lfric_atm/cce_fast-debug-64bit-rbl32/lfric_atm configuration.nml : --cpu-bind=depth --np 16 --depth 32 --ppn 4 xios_server.exe") sr = subprocess.run(self.launch_exe, capture_output=True) assert sr.returncode == 0, sr.stderr.decode("UTF-8") @@ -659,7 +659,7 @@ def test_run_mem_profile_lfric_atm_nwp_gal9_C224_MG_ex1a_cce_fast_debug_64bit_rb monkeypatch.setenv("EXEC_NAME", "lfric_atm") monkeypatch.setenv("PAT_EXE_EXTEN", "") monkeypatch.setenv("MEMORY_PROFILE", "True") - monkeypatch.setenv("TEST_LAUNCH_EXE_EXEC", "mpiexec --line-buffer --label --cpu-bind=depth --np 1176 --depth 1 --ppn 128 /usr/bin/time -f max_mem_lfric_atm_%Mkb $OUTPUT_ROOT/bin/lfric_atm/cce_fast-debug-64bit-rbl32/lfric_atm configuration.nml : --cpu-bind=depth --np 16 --ppn 4 /usr/bin/time -f max_mem_xios_server_%Mkb xios_server.exe") + monkeypatch.setenv("TEST_LAUNCH_EXE_EXEC", "mpiexec --line-buffer --label --cpu-bind=depth --np 1176 --depth 1 --ppn 128 /usr/bin/time -f max_mem_lfric_atm_%Mkb $OUTPUT_ROOT/bin/lfric_atm/cce_fast-debug-64bit-rbl32/lfric_atm configuration.nml : --cpu-bind=depth --np 16 --depth 32 --ppn 4 /usr/bin/time -f max_mem_xios_server_%Mkb xios_server.exe") sr = subprocess.run(self.launch_exe, capture_output=True) assert sr.returncode == 0, sr.stderr.decode("UTF-8") @@ -681,7 +681,28 @@ def test_run_mem_profile_false_lfric_atm_nwp_gal9_C224_MG_ex1a_cce_fast_debug_64 monkeypatch.setenv("EXEC_NAME", "lfric_atm") monkeypatch.setenv("PAT_EXE_EXTEN", "") monkeypatch.setenv("MEMORY_PROFILE", "False") - monkeypatch.setenv("TEST_LAUNCH_EXE_EXEC", "mpiexec --cpu-bind=depth --np 1176 --depth 1 --ppn 128 $OUTPUT_ROOT/bin/lfric_atm/cce_fast-debug-64bit-rbl32/lfric_atm configuration.nml : --cpu-bind=depth --np 16 --ppn 4 xios_server.exe") + monkeypatch.setenv("TEST_LAUNCH_EXE_EXEC", "mpiexec --cpu-bind=depth --np 1176 --depth 1 --ppn 128 $OUTPUT_ROOT/bin/lfric_atm/cce_fast-debug-64bit-rbl32/lfric_atm configuration.nml : --cpu-bind=depth --np 16 --depth 32 --ppn 4 xios_server.exe") + sr = subprocess.run(self.launch_exe, + capture_output=True) + assert sr.returncode == 0, sr.stderr.decode("UTF-8") + + def test_run_lfric_atm_nwp_gal9_C896_MG_ex1a_cce_production_32bit(self, monkeypatch): + monkeypatch.setenv("TARGET_PLATFORM", "meto-ex1a") + monkeypatch.setenv("RUN_METHOD", "mpiexec") + monkeypatch.setenv("HYPERTHREADS", "1") + monkeypatch.setenv("CORES_PER_NODE", "128") + monkeypatch.setenv("NUMA_REGIONS_PER_NODE", "2") + monkeypatch.setenv("OMP_NUM_THREADS", "1") + monkeypatch.setenv("TOTAL_RANKS", "4704") + monkeypatch.setenv("XIOS_SERVER_MODE", "True") + monkeypatch.setenv("XIOS_SERVER_RANKS", "128") + monkeypatch.setenv("xios_nodes", "4") + monkeypatch.setenv("mpi_parts_xios", "128") + monkeypatch.setenv("CORES_PER_NODE_OVERRIDE", "0") + monkeypatch.setenv("BIN_DIR", "$OUTPUT_ROOT/bin/lfric_atm/cce_fast-debug-64bit-rbl32") + monkeypatch.setenv("EXEC_NAME", "lfric_atm") + monkeypatch.setenv("PAT_EXE_EXTEN", "") + monkeypatch.setenv("TEST_LAUNCH_EXE_EXEC", "mpiexec --cpu-bind=depth --np 4704 --depth 1 --ppn 128 $OUTPUT_ROOT/bin/lfric_atm/cce_fast-debug-64bit-rbl32/lfric_atm configuration.nml : --cpu-bind=depth --np 128 --depth 4 --ppn 32 xios_server.exe") sr = subprocess.run(self.launch_exe, capture_output=True) assert sr.returncode == 0, sr.stderr.decode("UTF-8") diff --git a/rose-stem/bin/application_results_setup.sh b/rose-stem/bin/application_results_setup.sh index 7363fd63d..4ed4e8a15 100755 --- a/rose-stem/bin/application_results_setup.sh +++ b/rose-stem/bin/application_results_setup.sh @@ -8,18 +8,16 @@ # Prepare results location mkdir -p $TASK_OUTPUT_DIR/results/ +# Symbolic link to results directory, so all files can write to /work/results/ +ln -sf $TASK_OUTPUT_DIR/results $CYLC_TASK_WORK_DIR/results +# Specific results directory symbolic link exceptions to support +# `lfric_diag.nc`, `lfric_averages.nc` & `lfric_initial` across many tests +# In general output files should target `name="results/fname"` in XIOS xml +ln -sf $TASK_OUTPUT_DIR/results/lfric_diag.nc $CYLC_TASK_WORK_DIR/lfric_diag.nc +ln -sf $TASK_OUTPUT_DIR/results/lfric_averages.nc $CYLC_TASK_WORK_DIR/lfric_averages.nc +ln -sf $TASK_OUTPUT_DIR/results/lfric_initial.nc $CYLC_TASK_WORK_DIR/lfric_initial.nc if [ $LUSTRE_FILESYSTEM ]; then # Set Lustre striping to maximum for results (performance) - lfs setstripe -c -1 $TASK_OUTPUT_DIR/results/ + lfs setstripe -c 32 -S 4m -p flash $TASK_OUTPUT_DIR/results/ + lfs setstripe -c 32 -S 4m -p flash $CYLC_SUITE_SHARE_DIR/data/ fi - -# Symbolic link for each potential output file, -# from `work` to `results` -# avoiding cp copy commands, as these are very -# storage & wall clock intensive -ln -sf $TASK_OUTPUT_DIR/results/lfric_diagnostics.nc $CYLC_TASK_WORK_DIR/lfric_diagnostics.nc -ln -sf $TASK_OUTPUT_DIR/results/lfric_diag.nc $CYLC_TASK_WORK_DIR/lfric_diag.nc -ln -sf $TASK_OUTPUT_DIR/results/lfric_ver.nc $CYLC_TASK_WORK_DIR/lfric_ver.nc -ln -sf $TASK_OUTPUT_DIR/results/lfric_ver_tp0.nc $CYLC_TASK_WORK_DIR/lfric_ver_tp0.nc -ln -sf $TASK_OUTPUT_DIR/results/lfric_initial.nc $CYLC_TASK_WORK_DIR/lfric_initial.nc -ln -sf $TASK_OUTPUT_DIR/results/lfric_averages.nc $CYLC_TASK_WORK_DIR/lfric_averages.nc diff --git a/rose-stem/site/meto/common/bin/launch-exe b/rose-stem/site/meto/common/bin/launch-exe index 192900191..18abe84af 100755 --- a/rose-stem/site/meto/common/bin/launch-exe +++ b/rose-stem/site/meto/common/bin/launch-exe @@ -149,8 +149,28 @@ if [ "${RUN_METHOD}" = "mpiexec" ] ; then if [ "${xios_nodes}" = "0" ] ; then XIOS_OPTS=" : --cpu-bind=depth --np ${XIOS_SERVER_RANKS} " else - XIOS_CORES_PER_NODE="$(( mpi_parts_xios / xios_nodes ))" - XIOS_OPTS=" : --cpu-bind=depth --np ${XIOS_SERVER_RANKS} --ppn ${XIOS_CORES_PER_NODE} " + XIOS_CORES_PER_NODE="$(( mpi_parts_xios / xios_nodes ))" + + XIOS_FRACTIONAL_DEPTH="$(( CORES_PER_NODE / XIOS_CORES_PER_NODE ))" + if (( "${XIOS_FRACTIONAL_DEPTH}" >= 128 )) ; then + XIOS_DEPTH=" --depth 128" + elif (( "${XIOS_FRACTIONAL_DEPTH}" >= 64 )) ; then + XIOS_DEPTH=" --depth 64" + elif (( "${XIOS_FRACTIONAL_DEPTH}" >= 32 )) ; then + XIOS_DEPTH=" --depth 32" + elif (( "${XIOS_FRACTIONAL_DEPTH}" >= 16 )) ; then + XIOS_DEPTH=" --depth 16" + elif (( "${XIOS_FRACTIONAL_DEPTH}" >= 8 )) ; then + XIOS_DEPTH=" --depth 8" + elif (( "${XIOS_FRACTIONAL_DEPTH}" >= 4 )) ; then + XIOS_DEPTH=" --depth 4" + elif (( "${XIOS_FRACTIONAL_DEPTH}" >= 2 )) ; then + XIOS_DEPTH=" --depth 2" + else + XIOS_DEPTH="" + fi + + XIOS_OPTS=" : --cpu-bind=depth --np ${XIOS_SERVER_RANKS}${XIOS_DEPTH} --ppn ${XIOS_CORES_PER_NODE} " fi fi diff --git a/rose-stem/site/meto/common/suite_config_ex1a.cylc b/rose-stem/site/meto/common/suite_config_ex1a.cylc index 604eff7ad..15841b09d 100644 --- a/rose-stem/site/meto/common/suite_config_ex1a.cylc +++ b/rose-stem/site/meto/common/suite_config_ex1a.cylc @@ -10,11 +10,13 @@ {% set ex1a_compiler_gnu = 'module switch cpe/22.11 cpe/23.05 ; ' ~ 'module load PrgEnv-gnu ; '~ 'module load gcc/12.2.0 ; '~ + 'module load cray-mpich/8.1.27 ; '~ 'module load lfric-gnu/12.2.0/3.0+ || true' %} {% set ex1a_compiler_cce = 'module switch PrgEnv-cray PrgEnv-cray/8.4.0 ; ' ~ 'module load cpe/23.05 ; '~ 'module switch cce cce/15.0.0 ; '~ + 'module load cray-mpich/8.1.27 ; '~ 'module load lfric-cray/15.0.0/3.0+ || true ' %} {% set ex1a_coupled_cce = 'module unload xios/2701-h57fm7d || true ; ' ~ @@ -40,7 +42,7 @@ [[[environment]]] BUILD_ROOT = ${TMPDIR} HYPERTHREADS = 1 - CORES_PER_NODE = 128 + CORES_PER_NODE = {{site_vars.node_cores}} NUMA_REGIONS_PER_NODE = 2 LUSTRE_FILESYSTEM = true ROSE_LAUNCHER = 'mpiexec' diff --git a/rose-stem/site/meto/lfric_atm/tasks_lfric_atm_ex1a.cylc b/rose-stem/site/meto/lfric_atm/tasks_lfric_atm_ex1a.cylc index 4054439c6..776d176ed 100644 --- a/rose-stem/site/meto/lfric_atm/tasks_lfric_atm_ex1a.cylc +++ b/rose-stem/site/meto/lfric_atm/tasks_lfric_atm_ex1a.cylc @@ -76,6 +76,14 @@ "memory": [24, "GB"], }) %} +{% elif task_ns.conf_name == "nwp_gal9-C896_MG" %} + + {% if "-64bit" in task_ns.extras %} + {% do task_dict.update({ + "task_ranks_per_node": 112, + }) %} + {% endif %} + {% endif %} {% if "perftools" in task_ns.compiler %} diff --git a/rose-stem/site/meto/macros/macros_ex1a.cylc b/rose-stem/site/meto/macros/macros_ex1a.cylc index 70a1229e8..d7c8dc02d 100644 --- a/rose-stem/site/meto/macros/macros_ex1a.cylc +++ b/rose-stem/site/meto/macros/macros_ex1a.cylc @@ -5,7 +5,11 @@ {# ########################################################################### #} {% do LOG.debug("Entered site/meto/macros/macros_ex1a.cylc") %} -{% set ex1a_cores_per_node = 128 %} +{% if site_vars.node_type == "genoa" %} + {% set ex1a_cores_per_node = 192 %} +{% else %} + {% set ex1a_cores_per_node = 128 %} +{% endif %} {% set ex1a_socket_per_node = 2 %} {% macro normal_queue(mpi_ranks, @@ -74,7 +78,7 @@ -l select={{ lfric_nodes + ocean_nodes|int + river_nodes|int + - xios_nodes|int }} + xios_nodes|int }}:coretype={{site_vars.node_type}}:mem={{site_vars.node_mem}}GB {% endmacro %} diff --git a/rose-stem/site/meto/variables.cylc b/rose-stem/site/meto/variables.cylc index 41a935e6f..a9cf189f1 100644 --- a/rose-stem/site/meto/variables.cylc +++ b/rose-stem/site/meto/variables.cylc @@ -60,6 +60,17 @@ {% endif %} {% do LOG.info("Host EX: " + site_vars.host_ex) %} +{# Set node type target #} +{% if USE_GENOA is defined and USE_GENOA %} + {% do site_vars.update({"node_type": "genoa"}) %} + {% do site_vars.update({"node_cores": 192 }) %} + {% do site_vars.update({"node_mem": 720}) %} +{% else %} + {% do site_vars.update({"node_type": "milan"}) %} + {% do site_vars.update({"node_cores": 128 }) %} + {% do site_vars.update({"node_mem": 238}) %} +{% endif %} + {% do site_vars.update({"lfricinputs_kgo_base": "$UMDIR/standard_jobs/lfricinputs/kgo"}) %} {# Set fixed release version the KGO corresponds to #} diff --git a/rose-stem/templates/common_macros.cylc b/rose-stem/templates/common_macros.cylc index ed9efabee..9d166a54b 100644 --- a/rose-stem/templates/common_macros.cylc +++ b/rose-stem/templates/common_macros.cylc @@ -169,7 +169,7 @@ {# Remove a task prefix if listed above #} {% for prefix in task_prefixes %} {% if ns.str.startswith(prefix) %} - {% set ns.str = ns.str|replace(prefix~"_", "") %} + {% set ns.str = ns.str|replace(prefix~"_", "", 1) %} {% endif %} {% endfor %} diff --git a/rose-stem/templates/default_task_definitions.cylc b/rose-stem/templates/default_task_definitions.cylc index be9cabd69..93f479a55 100644 --- a/rose-stem/templates/default_task_definitions.cylc +++ b/rose-stem/templates/default_task_definitions.cylc @@ -234,4 +234,12 @@ {% do task_dict.update({"rose_ana_families": [task_ns.platform|upper~"_ROSE_ANA"]}) %} {% endif %} +{# ###################################### #} +{# MPI & LibFabric Configuration Settings #} +{# ###################################### #} + +{% if "CXI_MATCH_hybrid" not in task_dict %} + {% do task_dict.update({"CXI_MATCH_hybrid": false}) %} +{% endif %} + {% do LOG.debug("Finished in templates/default_task_definitions.cylc") %} diff --git a/rose-stem/templates/runtime/generate_runtime_application.cylc b/rose-stem/templates/runtime/generate_runtime_application.cylc index d7a2406b4..b9e507f15 100644 --- a/rose-stem/templates/runtime/generate_runtime_application.cylc +++ b/rose-stem/templates/runtime/generate_runtime_application.cylc @@ -71,7 +71,8 @@ 'echo $RELATIVE_LOG_ROOT > $TASK_OUTPUT_DIR/run.log.path', 'find . -regex ".*PET0+\..+\.Log" -exec cp {} $ROSE_TASK_LOG_DIR \;', 'find . -regex ".*PET0+\..+\.Log" -exec cp {} $TASK_OUTPUT_DIR \;', - 'find . -regex ".*PET0+\..+\.Log" -exec cat {} \;', + 'ls -sh $TASK_OUTPUT_DIR/results > $ROSE_TASK_LOG_DIR/results_file_sizes.log', + 'find . -name timer.txt -exec cp {} $ROSE_TASK_LOG_DIR \;', 'test -f '~task_values["app_name"]~ '-checksums.txt && cp $CYLC_TASK_WORK_DIR/'~ task_values["app_name"]~'-checksums.txt $TASK_OUTPUT_DIR/checksum.txt', @@ -144,6 +145,11 @@ {% if "memory_plot_ex" in task_values %} MEMORY_PROFILE = {{task_values["memory_plot_ex"]}} {% endif %} +{% if SITE~"-"~task_ns.platform == 'meto-ex1a' %} + {% if "CXI_MATCH_hybrid" in task_values and task_values["CXI_MATCH_hybrid"] %} + FI_CXI_RX_MATCH_MODE = "hybrid" + {% endif %} +{% endif %} {% if task_ns.application == "lfricinputs" %} {% include "templates/runtime/lfricinputs_task_environment.cylc" %}