-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
1 Provide automated scripts for benchmarking and visualizing results (#2
) - Creation of a script for benchmarking GMGPolar with MUMPs. - Creation of python utitilies to extract LIKWID outputs and plot basic curves. - Added some additional documentation.
- Loading branch information
Showing
57 changed files
with
518 additions
and
93 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,58 +1,133 @@ | ||
#!/bin/bash | ||
#SBATCH --job-name=gmgpolar-setup | ||
#SBATCH --output=slurm-%A-setup.out | ||
#SBATCH --error=slurm-%A-setup.err | ||
#SBATCH -N 1 | ||
#SBATCH -n 1 | ||
#SBATCH -c 1 | ||
#SBATCH -t 5 | ||
|
||
#fixed variables | ||
# If Origin is chosen, the node can be set as coarse or fine. Default: Coarse. | ||
origin_NOT_coarse=0 # origin_NOT_coarse | ||
# Choose anisotropy in angle-direction. Default: Off. | ||
theta_aniso=0 # theta_aniso | ||
# Smoother 3 is our default, 13 is used for some testing, should be not used | ||
# for production. -> TODO: Rename smoother names | ||
smoother=3 # smoother (3,13) | ||
|
||
# changing variables ?! | ||
prob=5 # prob | ||
R=1.3 # R | ||
kappa_eps=0 # k | ||
delta_e=0 # d | ||
discr=3 # discr | ||
fac_ani=3 # a | ||
nr_exp=4 # n | ||
# default variables | ||
# If origin is not a particular node of the mesh, Dirichlet boundary conditions | ||
# can be implemented on the most inner circle | ||
DirBC_Interior=1 # DirBC_Interior (0/1) | ||
# Generalized radius of most inner circle. Defines if origin will be a particular node. | ||
R0=1e-8 # r (1e-8/1e-5/1e-2) | ||
# Generalized radius of maximum outer circle. | ||
R=1.3 # R | ||
# Anisotropy in radial direction. | ||
fac_ani=3 # a | ||
# TODO: which nr_exp and divideby2 do we want to consider? | ||
nr_exp=4 # n | ||
|
||
#changing variables | ||
mod_pk=0 # mod_pk (0/1) | ||
R0=0.1 # r (1e-8/1e-5/1e-2) | ||
DirBC_Interior=0 # DirBC_Interior (0/1) | ||
divideBy2=0 # divideBy2 (3/4/5/6) | ||
smoother=3 # smoother (3,13) | ||
extrapolation=0 # E | ||
mod_pk=1 # mod_pk=1: Shafranov geometry | ||
prob=7 # Prob=7: Simulate solution (23) of Bourne et al. | ||
# TODO: which alpha and beta to simulate? Alpha aligned with anisotropy? | ||
alpha_coeff=2 | ||
beta_coeff=1 | ||
|
||
# set to on | ||
extrapolation=1 # E | ||
|
||
debug=0 | ||
v1=1 | ||
v2=1 | ||
maxiter=300 | ||
res_norm=3 | ||
rel_red_conv=1e-11 | ||
|
||
nodes=1 | ||
ranks=1 # number of MPI Ranks | ||
cores=128 # set OpenMP Num Threads to maximum number of cores requested | ||
|
||
#################################### | ||
## create grids ## | ||
#################################### | ||
create_grid=0 | ||
if [ $create_grid ] | ||
then | ||
cd .. | ||
mkdir -p angles_files/Rmax"$R"/aniso"$fac_ani"/ | ||
mkdir -p radii_files/Rmax"$R"/aniso"$fac_ani"/ | ||
# Costly function as setup as expensive and sequential. Only run once. | ||
for divideBy2 in 0 1 2 3 4 5 6 7 8 # create different grid sizes | ||
do | ||
## ATTENTION / REMARK: | ||
## Please note that these calls will abort/segfault as creation of grids and computation in one step | ||
## is not yet supported by GMGPolar. We will make this functionality available in a future commit. | ||
## Please ignore abort/segfault for the calls in this loop. | ||
# mod_pk has no effect on the creation of grids as the set of (r,theta) is | ||
# the same for all geometries, only the mapping F(r,theta) -> (x,y) changes. | ||
./build/gmgpolar_simulation -n $nr_exp -a $fac_ani --mod_pk 0 --DirBC_Interior $DirBC_Interior --divideBy2 $divideBy2 -r $R0 --smoother $smoother --verbose 2 --debug $debug --extrapolation $extrapolation --optimized 1 $ --v1 $v1 --v2 $v2 -R $R --prob $prob --maxiter $maxiter --alpha_coeff $alpha_coeff --beta_coeff $beta_coeff --res_norm $res_norm --write_radii_angles 1 --f_grid_r "radii_files/Rmax"$R"/aniso"$fac_ani"/divide"$divideBy2".txt" --f_grid_theta "angles_files/Rmax"$R"/aniso"$fac_ani"/divide"$divideBy2".txt" | ||
done | ||
fi | ||
|
||
echo "#!/bin/bash" > run_gmgpolar_sbatch.sh | ||
# create a short name for your job | ||
echo "#SBATCH --job-name=gmgpolar" >> run_gmgpolar_sbatch.sh | ||
# stdout file %A=job id | ||
echo "#SBATCH --output=slurm-%A-p$prob-fa$fac_ani-r$nr_exp-mpk$mod_pk-s$smoother-e$extrapolation.out" >> run_gmgpolar_sbatch.sh | ||
echo "#SBATCH --output=slurm-%A-p$prob-r$nr_exp-mpk$mod_pk-s$smoother-e$extrapolation--N$nodes-R$ranks-maxC$cores.out" >> run_gmgpolar_sbatch.sh | ||
# stderr file | ||
echo "#SBATCH --error=slurm-%A-p$prob-fa$fac_ani-r$nr_exp-mpk$mod_pk-s$smoother-e$extrapolation.err" >> run_gmgpolar_sbatch.sh | ||
echo "#SBATCH --error=slurm-%A-p$prob-r$nr_exp-mpk$mod_pk-s$smoother-e$extrapolation--N$nodes-R$ranks-maxC$cores.err" >> run_gmgpolar_sbatch.sh | ||
|
||
echo "#SBATCH -N 1" >> run_gmgpolar_sbatch.sh | ||
echo "#SBATCH -n 1" >> run_gmgpolar_sbatch.sh | ||
echo "#SBATCH -c 14" >> run_gmgpolar_sbatch.sh | ||
echo "#SBATCH -t 6000" >> run_gmgpolar_sbatch.sh | ||
echo "#SBATCH -N $nodes" >> run_gmgpolar_sbatch.sh | ||
echo "#SBATCH -n $ranks" >> run_gmgpolar_sbatch.sh | ||
echo "#SBATCH -c $cores" >> run_gmgpolar_sbatch.sh | ||
# fix to one thread per core | ||
echo "#SBATCH --threads-per-core=1" >> run_gmgpolar_sbatch.sh | ||
# fix CPU frequency to 1.8 Mhz | ||
echo "#SBATCH --cpu-freq=1800000" >> run_gmgpolar_sbatch.sh | ||
echo "#SBATCH -t 600" >> run_gmgpolar_sbatch.sh | ||
echo "#SBATCH --exclusive" >> run_gmgpolar_sbatch.sh | ||
|
||
# remove potentially loaded and conflicting modules | ||
echo "module purge" >> run_gmgpolar_sbatch.sh | ||
|
||
# gcc10 | ||
echo "module load PrgEnv/gcc10-openmpi" >> run_gmgpolar_sbatch.sh | ||
# CARO | ||
#echo "module load rev/23.05" >> run_gmgpolar_sbatch.sh | ||
# spack install mumps@XXX+metis~mpi | ||
echo "module load likwid/5.2.2" >> run_gmgpolar_sbatch.sh | ||
# Local machine | ||
# echo "module load PrgEnv/gcc10-openmpi" >> run_gmgpolar_sbatch.sh | ||
|
||
# echo "cd .." >> run_gmgpolar_sbatch.sh | ||
# echo "make -j16" >> run_gmgpolar_sbatch.sh | ||
|
||
# to be defined for use case (3/4/5/6) | ||
# Attention: divideBy is used as a dummy variable to access folders as grids are read in | ||
echo "let divideBy2=4" >> run_gmgpolar_sbatch.sh | ||
|
||
echo "cd .." >> run_gmgpolar_sbatch.sh | ||
echo "make -j16" >> run_gmgpolar_sbatch.sh | ||
#################################### | ||
## solve system ## | ||
#################################### | ||
|
||
# FLOPS-DP counter | ||
echo "for m in {0..1}; do" >> run_gmgpolar_sbatch.sh | ||
echo "likwid-perfctr -C 0-$m -g FLOPS_DP ./build_gnu/main --matrix_free 1 -n $nr_exp -a $fac_ani --mod_pk $mod_pk --DirBC_Interior $DirBC_Interior --divideBy2 $divideBy2 -r $R0 --smoother $smoother -E $extrapolation" >> run_gmgpolar_sbatch.sh | ||
# reduce cores as cores count from 0 | ||
max_threads=$((cores)) | ||
echo "let m=1" >> run_gmgpolar_sbatch.sh | ||
# FLOPS-DP counter from 1 to cores many threads | ||
echo "while [ \$m -le $max_threads ]; do" >> run_gmgpolar_sbatch.sh | ||
echo "let mminus1=m-1" >> run_gmgpolar_sbatch.sh | ||
echo "srun --cpus-per-task=\$m likwid-perfctr -f -C 0-\$mminus1 -g FLOPS_DP ./build/gmgpolar_simulation --openmp \$m --matrix_free 1 -n $nr_exp -a $fac_ani --mod_pk $mod_pk --DirBC_Interior $DirBC_Interior --divideBy2 0 -r $R0 --smoother $smoother -E $extrapolation --verbose 2 --debug $debug --optimized 1 --v1 $v1 --v2 $v2 -R $R --prob $prob --maxiter $maxiter --alpha_coeff $alpha_coeff --beta_coeff $beta_coeff --res_norm $res_norm --f_grid_r "radii_files/Rmax"$R"/aniso"$fac_ani"/divide"\$divideBy2".txt" --f_grid_theta "angles_files/Rmax"$R"/aniso"$fac_ani"/divide"\$divideBy2".txt" --rel_red_conv $rel_red_conv" >> run_gmgpolar_sbatch.sh | ||
echo "let m=m*2" >> run_gmgpolar_sbatch.sh | ||
echo "done;" >> run_gmgpolar_sbatch.sh | ||
|
||
# memory (saturation) benchmarks | ||
echo "for m in {0..1}; do" >> run_gmgpolar_sbatch.sh | ||
echo "likwid-perfctr -C 0-$m -g CACHES ./build_gnu/main -n $nr_exp -a $fac_ani --mod_pk $mod_pk --DirBC_Interior $DirBC_Interior --divideBy2 $divideBy2 -r $R0 --smoother $smoother -E $extrapol" >> run_gmgpolar_sbatch.sh | ||
# # Memory (saturation) benchmarks | ||
echo "let m=1" >> run_gmgpolar_sbatch.sh | ||
echo "while [ \$m -le $max_threads ]; do" >> run_gmgpolar_sbatch.sh | ||
echo "let mminus1=m-1" >> run_gmgpolar_sbatch.sh | ||
echo "srun --cpus-per-task=\$m likwid-perfctr -f -C 0-\$mminus1 -g CACHE ./build/gmgpolar_simulation --openmp \$m --matrix_free 1 -n $nr_exp -a $fac_ani --mod_pk $mod_pk --DirBC_Interior $DirBC_Interior --divideBy2 0 -r $R0 --smoother $smoother -E $extrapolation --verbose 2 --debug $debug --optimized 1 --v1 $v1 --v2 $v2 -R $R --prob $prob --maxiter $maxiter --alpha_coeff $alpha_coeff --beta_coeff $beta_coeff --res_norm $res_norm --f_grid_r "radii_files/Rmax"$R"/aniso"$fac_ani"/divide"\$divideBy2".txt" --f_grid_theta "angles_files/Rmax"$R"/aniso"$fac_ani"/divide"\$divideBy2".txt" --rel_red_conv $rel_red_conv" >> run_gmgpolar_sbatch.sh | ||
echo "let m=m*2" >> run_gmgpolar_sbatch.sh | ||
echo "done;" >> run_gmgpolar_sbatch.sh | ||
|
||
#submit the job | ||
sbatch run_gmgpolar_sbatch.sh | ||
sbatch run_gmgpolar_sbatch.sh |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
import pandas as pd | ||
import numpy as np | ||
import sys | ||
# Use backend to not plot on UI | ||
# import matplotlib | ||
# matplotlib.use('Agg') | ||
import matplotlib.pyplot as plt | ||
from os.path import join, exists, dirname | ||
from os import makedirs | ||
|
||
### Plots scaling of FLOPS and Caches (saturation) scaling from 0 to n Cores | ||
### as read from data frame | ||
|
||
colors = [ | ||
[1.00, 0.49, 0.06], | ||
[0.17, 0.63, 0.18], | ||
[0.83, 0.15, 0.16], | ||
[0.13, 0.47, 0.69], | ||
[0.58, 0.40, 0.74], | ||
[0.53, 0.35, 0.27], | ||
[0.92, 0.46, 0.77], | ||
[0.50, 0.50, 0.50], | ||
[0.66, 0.85, 0.06], | ||
[0.06, 0.85, 0.85], | ||
[0.85, 0.15, 0.85], | ||
[0.75, 0.75, 0.75]]; | ||
|
||
def plot_perf_per_core(path_out, fname, df, benchname, saturation_limit=0, colors=colors): | ||
fontsize = 16 | ||
|
||
fig = plt.figure(figsize=(10, 10)) | ||
ax = fig.add_subplot() | ||
plt.plot(df['Cores'], df[benchname[0]]) | ||
|
||
if benchname[0] == 'CACHES': | ||
plt.plot(df['Cores'], saturation_limit * np.ones(len(df['Cores'])), linestyle='dotted', linewidth=3, color=[0, 0, 0]) | ||
ax.text(1, saturation_limit+3, 'Memory bandwith (AXPY) (' + str(saturation_limit) + ' GBytes/s)', fontsize=14) | ||
ax.set_ylim(0, 90) | ||
|
||
|
||
ax.set_title(benchname[1][0], fontsize=fontsize+6) | ||
ax.set_ylabel(benchname[1][0], fontsize=fontsize) | ||
|
||
ax.set_xlabel('Number of cores used', fontsize=fontsize) | ||
|
||
|
||
path_out = join(path_out, 'figures') | ||
if not exists(path_out): | ||
makedirs(path_out) | ||
plt.savefig(join(path_out, fname + '_' + benchname[0].lower()), bbox_inches='tight') | ||
plt.close() | ||
|
||
|
||
def main(): | ||
|
||
problem = 5 | ||
nr_exp = 4 | ||
mod_pk = 1 | ||
smoother = 3 | ||
extrapolation = 1 | ||
|
||
nodes = 1 | ||
ranks = 1 | ||
maxCores = 14 | ||
|
||
## saturation_limit is node specific and needs to be adapted. | ||
saturation_limit = 80 | ||
|
||
fname = 'p' + str(problem) + '-r' + str(nr_exp) + '-mpk' + str(mod_pk) + '-s' + str( | ||
smoother) + '-e' + str(extrapolation) + '--N' + str(nodes) + '-R' + str(ranks) + '-maxC' + str(maxCores) | ||
path_to_files_rel = '' # relative to plot script | ||
path_to_files = join(dirname(__file__), join(path_to_files_rel)) | ||
|
||
df = pd.read_csv( | ||
join(path_to_files, fname + '_benchmarks.csv'), | ||
dtype={'Problem': int, 'rExp': int, 'ModPK': int, | ||
'Extrapolation': int, 'Nodes': int, 'Ranks': int, | ||
'Cores': int, 'its': int}) | ||
|
||
# Likwid benchmark columns, more benchmarks are in timings. | ||
likwid_benchmarks = {'FLOPS_DP': ['Flop performance in Multi-Threading', 'Flops (GFlops/s)'], 'CACHES': [ | ||
'Memory bandwidth saturation', 'Memory bandwidth (GBytes/s)']} # benchmark : [plot title, plot y-label] | ||
timing_benchmarks = {'Total_execution_time' : ['Total execution time in Multi-Threading', 'Execution time']} | ||
# Problem setting columns | ||
setting_cols = ['Problem', 'rExp', 'ModPK', 'Extrapolation', 'Nodes', 'Ranks'] | ||
|
||
# check content | ||
for bench in likwid_benchmarks.items(): | ||
bench_rows = np.where(df[bench[0]].isnull()!=True)[0] | ||
if len(bench_rows) > 0: | ||
df_subframe = df.iloc[bench_rows].copy() | ||
|
||
# Check that the different number of threads/cores where only conducted | ||
# on one particular setting of the above columns | ||
if np.max(df_subframe.loc[:,setting_cols].nunique()) > 1: | ||
sys.exit('Error in input data, more than one setting found.') | ||
|
||
# TODO or not TODO: If the same run was done multiple times with different LIKWID benchmarks | ||
# it is not clear which line to take. | ||
# This could be extended to take the weighted sum or minimum of all corresponding lines. | ||
# However, these timings should be identical or in the same region... | ||
# Nonetheless, there could be a nicer table format to store the results ;-) | ||
cores_used = df_subframe['Cores'].unique() | ||
if len(cores_used) != len(df_subframe['Cores']): | ||
sys.exit('Error: Multiple times computed with the same number of threads.') | ||
|
||
plot_perf_per_core(path_to_files, fname, df_subframe, bench, saturation_limit=saturation_limit) | ||
|
||
# Plot particular timings from table for first benchmark. Timings should be similar for FLOPS_DP and CACHES. | ||
if bench[0] == list(likwid_benchmarks.keys())[0]: | ||
for timing_benchmark in timing_benchmarks.items(): | ||
plot_perf_per_core(path_to_files, fname, df_subframe, timing_benchmark) | ||
|
||
|
||
|
||
if __name__ == '__main__': | ||
main() | ||
|
Oops, something went wrong.