Skip to content

Commit

Permalink
1 Provide automated scripts for benchmarking and visualizing results (#2
Browse files Browse the repository at this point in the history
)

- Creation of a script for benchmarking GMGPolar with MUMPs.
- Creation of python utitilies to extract LIKWID outputs and plot basic curves.
- Added some additional documentation.
  • Loading branch information
mknaranja authored Oct 6, 2023
1 parent ad33dde commit 6a0625f
Show file tree
Hide file tree
Showing 57 changed files with 518 additions and 93 deletions.
17 changes: 6 additions & 11 deletions batch.sh_paper
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,8 @@
debug=0
v1=1
v2=1
cycle=1
compute_rho=0
level=-1
maxiter=300
periodic=1
theta_aniso=0
discr=1
nr_exp=4
ntheta_exp=4
res_norm=3
R0=1e-6
DirBC_Interior=0
Expand Down Expand Up @@ -80,28 +73,30 @@ done
mkdir -p outputs

echo "prob alpha_coeff beta_coeff fac_ani extrapolation mod_pk"
# Triangular-Shafranov
for mod_pk in 2 1
# 1) Triangular/Czarny 2) Shafranov
for mod_pk in 2 1 # 2=Triangular/Czarny, 1=Shafranov
do
# Cartesian + beta 0 + ani 0
prob=7
prob=7 # Solution (23) of Bourne et al.
echo $prob $alpha_coeff $beta_coeff $fac_ani $extrapolation $mod_pk
for extrapolation in 1
do
for divideBy2 in 0 1 2 3 4 5 6 #iterate over the different grid sizes
do
# note that the divideBy2 option here is only used as a dummy for looping. Grids need to be stored beforehand and are loaded here.
echo "./${build_dir}/gmgpolar_simulation -n "$nr_exp" -a "$fac_ani" --mod_pk "$mod_pk" --DirBC_Interior "$DirBC_Interior" --divideBy2 0 -r "$R0" --smoother "$smoother" --verbose 2 --debug "$debug" --extrapolation "$extrapolation" --optimized 1 --openmp "$openmp" --v1 "$v1" --v2 "$v2" -R "$R" --prob "$prob" --maxiter "$maxiter" --alpha_coeff "$alpha_coeff" --beta_coeff "$beta_coeff" --res_norm "$res_norm" --f_grid_r radii_files/Rmax"$R"/aniso"$fac_ani"/divide"$divideBy2".txt --f_grid_theta angles_files/Rmax"$R"/aniso"$fac_ani"/divide"$divideBy2".txt --rel_red_conv "$rel_red_conv" 1> outputs/job.out_"$fac_ani"_"$mod_pk"_"$prob"_"$beta_coeff"_"$extrapolation"_"$divideBy2"_"$rel_red_conv".txt 2> outputs/job.err_"$fac_ani"_"$mod_pk"_"$prob"_"$beta_coeff"_"$extrapolation"_"$divideBy2"_"$rel_red_conv".txt"
./${build_dir}/gmgpolar_simulation -n $nr_exp -a $fac_ani --mod_pk $mod_pk --DirBC_Interior $DirBC_Interior --divideBy2 0 -r $R0 --smoother $smoother --verbose 2 --debug $debug --extrapolation $extrapolation --optimized 1 --openmp $openmp --v1 $v1 --v2 $v2 -R $R --prob $prob --maxiter $maxiter --alpha_coeff $alpha_coeff --beta_coeff $beta_coeff --res_norm $res_norm --f_grid_r "radii_files/Rmax"$R"/aniso"$fac_ani"/divide"$divideBy2".txt" --f_grid_theta "angles_files/Rmax"$R"/aniso"$fac_ani"/divide"$divideBy2".txt" --rel_red_conv $rel_red_conv 1> "outputs/job.out_"$fac_ani"_"$mod_pk"_"$prob"_"$beta_coeff"_"$extrapolation"_"$divideBy2"_"$rel_red_conv".txt" 2> "outputs/job.err_"$fac_ani"_"$mod_pk"_"$prob"_"$beta_coeff"_"$extrapolation"_"$divideBy2"_"$rel_red_conv".txt"
done
done

# Polar + beta 0-1 + ani 0-1
prob=6
prob=6 # Solution (22) of Bourne et al.
echo $prob $alpha_coeff $beta_coeff $fac_ani $extrapolation $mod_pk
for extrapolation in 1
do
for divideBy2 in 0 1 2 3 4 5 6 #iterate over the different grid sizes
do
# note that the divideBy2 option here is only used as a dummy for looping. Grids need to be stored beforehand and are loaded here.
echo "./${build_dir}/gmgpolar_simulation -n "$nr_exp" -a "$fac_ani" --mod_pk "$mod_pk" --DirBC_Interior "$DirBC_Interior" --divideBy2 0 -r "$R0" --smoother "$smoother" --verbose 2 --debug "$debug" --extrapolation "$extrapolation" --optimized 1 --openmp "$openmp" --v1 "$v1" --v2 "$v2" -R "$R" --prob "$prob" --maxiter "$maxiter" --alpha_coeff "$alpha_coeff" --beta_coeff "$beta_coeff" --res_norm "$res_norm" --f_grid_r radii_files/Rmax"$R"/aniso"$fac_ani"/divide"$divideBy2".txt --f_grid_theta angles_files/Rmax"$R"/aniso"$fac_ani"/divide"$divideBy2".txt --rel_red_conv "$rel_red_conv" 1> outputs/job.out_"$fac_ani"_"$mod_pk"_"$prob"_"$beta_coeff"_"$extrapolation"_"$divideBy2"_"$rel_red_conv".txt 2> outputs/job.err_"$fac_ani"_"$mod_pk"_"$prob"_"$beta_coeff"_"$extrapolation"_"$divideBy2"_"$rel_red_conv".txt"
./${build_dir}/gmgpolar_simulation -n $nr_exp -a $fac_ani --mod_pk $mod_pk --DirBC_Interior $DirBC_Interior --divideBy2 0 -r $R0 --smoother $smoother --verbose 2 --debug $debug --extrapolation $extrapolation --optimized 1 --openmp $openmp --v1 $v1 --v2 $v2 -R $R --prob $prob --maxiter $maxiter --alpha_coeff $alpha_coeff --beta_coeff $beta_coeff --res_norm $res_norm --f_grid_r "radii_files/Rmax"$R"/aniso"$fac_ani"/divide"$divideBy2".txt" --f_grid_theta "angles_files/Rmax"$R"/aniso"$fac_ani"/divide"$divideBy2".txt" --rel_red_conv $rel_red_conv 1> "outputs/job.out_"$fac_ani"_"$mod_pk"_"$prob"_"$beta_coeff"_"$extrapolation"_"$divideBy2"_"$rel_red_conv".txt" 2> "outputs/job.err_"$fac_ani"_"$mod_pk"_"$prob"_"$beta_coeff"_"$extrapolation"_"$divideBy2"_"$rel_red_conv".txt"
done
Expand Down
19 changes: 12 additions & 7 deletions include/constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -350,12 +350,14 @@ enum stencil
//const double PI = 3.141592653589793238463;
const double PI = M_PI;

// See Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249
// used as Param::mod_pk ("modified polar coordinates") in GMGPolar
enum geometry_type
{
CIRCULAR = 0,
SHAFRANOV = 1,
TRIANGULAR = 2,
CULHAM = 3
CIRCULAR = 0, // simple circular domain
SHAFRANOV = 1, // Fig. 6a
TRIANGULAR = 2, // Fig. 6b (also denoted Czarny)
CULHAM = 3 // Fig. 18
};

enum alpha_val
Expand All @@ -366,12 +368,15 @@ enum alpha_val
POISSON = 3,
};

// Defines the manufactured solution to compare the computed error against.
// see Kuehn et al. https://doi.org/10.1007/s10915-022-01802-1
// or Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249
enum problem_type
{
FLAT = 1,
REFINED_RADIUS = 4,
CARTESIAN_R2 = 5,
POLAR_R6 = 6,
CARTESIAN_R6 = 7,
CARTESIAN_R2 = 5, //
POLAR_R6 = 6, // Bourne et al., Eq. (22)
CARTESIAN_R6 = 7, // Bourne et al., Eq. (23)
};
#endif // CONSTANTS_HXX
137 changes: 106 additions & 31 deletions performance/run_gmgpolar.sh
Original file line number Diff line number Diff line change
@@ -1,58 +1,133 @@
#!/bin/bash
#SBATCH --job-name=gmgpolar-setup
#SBATCH --output=slurm-%A-setup.out
#SBATCH --error=slurm-%A-setup.err
#SBATCH -N 1
#SBATCH -n 1
#SBATCH -c 1
#SBATCH -t 5

#fixed variables
# If Origin is chosen, the node can be set as coarse or fine. Default: Coarse.
origin_NOT_coarse=0 # origin_NOT_coarse
# Choose anisotropy in angle-direction. Default: Off.
theta_aniso=0 # theta_aniso
# Smoother 3 is our default, 13 is used for some testing, should be not used
# for production. -> TODO: Rename smoother names
smoother=3 # smoother (3,13)

# changing variables ?!
prob=5 # prob
R=1.3 # R
kappa_eps=0 # k
delta_e=0 # d
discr=3 # discr
fac_ani=3 # a
nr_exp=4 # n
# default variables
# If origin is not a particular node of the mesh, Dirichlet boundary conditions
# can be implemented on the most inner circle
DirBC_Interior=1 # DirBC_Interior (0/1)
# Generalized radius of most inner circle. Defines if origin will be a particular node.
R0=1e-8 # r (1e-8/1e-5/1e-2)
# Generalized radius of maximum outer circle.
R=1.3 # R
# Anisotropy in radial direction.
fac_ani=3 # a
# TODO: which nr_exp and divideby2 do we want to consider?
nr_exp=4 # n

#changing variables
mod_pk=0 # mod_pk (0/1)
R0=0.1 # r (1e-8/1e-5/1e-2)
DirBC_Interior=0 # DirBC_Interior (0/1)
divideBy2=0 # divideBy2 (3/4/5/6)
smoother=3 # smoother (3,13)
extrapolation=0 # E
mod_pk=1 # mod_pk=1: Shafranov geometry
prob=7 # Prob=7: Simulate solution (23) of Bourne et al.
# TODO: which alpha and beta to simulate? Alpha aligned with anisotropy?
alpha_coeff=2
beta_coeff=1

# set to on
extrapolation=1 # E

debug=0
v1=1
v2=1
maxiter=300
res_norm=3
rel_red_conv=1e-11

nodes=1
ranks=1 # number of MPI Ranks
cores=128 # set OpenMP Num Threads to maximum number of cores requested

####################################
## create grids ##
####################################
create_grid=0
if [ $create_grid ]
then
cd ..
mkdir -p angles_files/Rmax"$R"/aniso"$fac_ani"/
mkdir -p radii_files/Rmax"$R"/aniso"$fac_ani"/
# Costly function as setup as expensive and sequential. Only run once.
for divideBy2 in 0 1 2 3 4 5 6 7 8 # create different grid sizes
do
## ATTENTION / REMARK:
## Please note that these calls will abort/segfault as creation of grids and computation in one step
## is not yet supported by GMGPolar. We will make this functionality available in a future commit.
## Please ignore abort/segfault for the calls in this loop.
# mod_pk has no effect on the creation of grids as the set of (r,theta) is
# the same for all geometries, only the mapping F(r,theta) -> (x,y) changes.
./build/gmgpolar_simulation -n $nr_exp -a $fac_ani --mod_pk 0 --DirBC_Interior $DirBC_Interior --divideBy2 $divideBy2 -r $R0 --smoother $smoother --verbose 2 --debug $debug --extrapolation $extrapolation --optimized 1 $ --v1 $v1 --v2 $v2 -R $R --prob $prob --maxiter $maxiter --alpha_coeff $alpha_coeff --beta_coeff $beta_coeff --res_norm $res_norm --write_radii_angles 1 --f_grid_r "radii_files/Rmax"$R"/aniso"$fac_ani"/divide"$divideBy2".txt" --f_grid_theta "angles_files/Rmax"$R"/aniso"$fac_ani"/divide"$divideBy2".txt"
done
fi

echo "#!/bin/bash" > run_gmgpolar_sbatch.sh
# create a short name for your job
echo "#SBATCH --job-name=gmgpolar" >> run_gmgpolar_sbatch.sh
# stdout file %A=job id
echo "#SBATCH --output=slurm-%A-p$prob-fa$fac_ani-r$nr_exp-mpk$mod_pk-s$smoother-e$extrapolation.out" >> run_gmgpolar_sbatch.sh
echo "#SBATCH --output=slurm-%A-p$prob-r$nr_exp-mpk$mod_pk-s$smoother-e$extrapolation--N$nodes-R$ranks-maxC$cores.out" >> run_gmgpolar_sbatch.sh
# stderr file
echo "#SBATCH --error=slurm-%A-p$prob-fa$fac_ani-r$nr_exp-mpk$mod_pk-s$smoother-e$extrapolation.err" >> run_gmgpolar_sbatch.sh
echo "#SBATCH --error=slurm-%A-p$prob-r$nr_exp-mpk$mod_pk-s$smoother-e$extrapolation--N$nodes-R$ranks-maxC$cores.err" >> run_gmgpolar_sbatch.sh

echo "#SBATCH -N 1" >> run_gmgpolar_sbatch.sh
echo "#SBATCH -n 1" >> run_gmgpolar_sbatch.sh
echo "#SBATCH -c 14" >> run_gmgpolar_sbatch.sh
echo "#SBATCH -t 6000" >> run_gmgpolar_sbatch.sh
echo "#SBATCH -N $nodes" >> run_gmgpolar_sbatch.sh
echo "#SBATCH -n $ranks" >> run_gmgpolar_sbatch.sh
echo "#SBATCH -c $cores" >> run_gmgpolar_sbatch.sh
# fix to one thread per core
echo "#SBATCH --threads-per-core=1" >> run_gmgpolar_sbatch.sh
# fix CPU frequency to 1.8 Mhz
echo "#SBATCH --cpu-freq=1800000" >> run_gmgpolar_sbatch.sh
echo "#SBATCH -t 600" >> run_gmgpolar_sbatch.sh
echo "#SBATCH --exclusive" >> run_gmgpolar_sbatch.sh

# remove potentially loaded and conflicting modules
echo "module purge" >> run_gmgpolar_sbatch.sh

# gcc10
echo "module load PrgEnv/gcc10-openmpi" >> run_gmgpolar_sbatch.sh
# CARO
#echo "module load rev/23.05" >> run_gmgpolar_sbatch.sh
# spack install mumps@XXX+metis~mpi
echo "module load likwid/5.2.2" >> run_gmgpolar_sbatch.sh
# Local machine
# echo "module load PrgEnv/gcc10-openmpi" >> run_gmgpolar_sbatch.sh

# echo "cd .." >> run_gmgpolar_sbatch.sh
# echo "make -j16" >> run_gmgpolar_sbatch.sh

# to be defined for use case (3/4/5/6)
# Attention: divideBy is used as a dummy variable to access folders as grids are read in
echo "let divideBy2=4" >> run_gmgpolar_sbatch.sh

echo "cd .." >> run_gmgpolar_sbatch.sh
echo "make -j16" >> run_gmgpolar_sbatch.sh
####################################
## solve system ##
####################################

# FLOPS-DP counter
echo "for m in {0..1}; do" >> run_gmgpolar_sbatch.sh
echo "likwid-perfctr -C 0-$m -g FLOPS_DP ./build_gnu/main --matrix_free 1 -n $nr_exp -a $fac_ani --mod_pk $mod_pk --DirBC_Interior $DirBC_Interior --divideBy2 $divideBy2 -r $R0 --smoother $smoother -E $extrapolation" >> run_gmgpolar_sbatch.sh
# reduce cores as cores count from 0
max_threads=$((cores))
echo "let m=1" >> run_gmgpolar_sbatch.sh
# FLOPS-DP counter from 1 to cores many threads
echo "while [ \$m -le $max_threads ]; do" >> run_gmgpolar_sbatch.sh
echo "let mminus1=m-1" >> run_gmgpolar_sbatch.sh
echo "srun --cpus-per-task=\$m likwid-perfctr -f -C 0-\$mminus1 -g FLOPS_DP ./build/gmgpolar_simulation --openmp \$m --matrix_free 1 -n $nr_exp -a $fac_ani --mod_pk $mod_pk --DirBC_Interior $DirBC_Interior --divideBy2 0 -r $R0 --smoother $smoother -E $extrapolation --verbose 2 --debug $debug --optimized 1 --v1 $v1 --v2 $v2 -R $R --prob $prob --maxiter $maxiter --alpha_coeff $alpha_coeff --beta_coeff $beta_coeff --res_norm $res_norm --f_grid_r "radii_files/Rmax"$R"/aniso"$fac_ani"/divide"\$divideBy2".txt" --f_grid_theta "angles_files/Rmax"$R"/aniso"$fac_ani"/divide"\$divideBy2".txt" --rel_red_conv $rel_red_conv" >> run_gmgpolar_sbatch.sh
echo "let m=m*2" >> run_gmgpolar_sbatch.sh
echo "done;" >> run_gmgpolar_sbatch.sh

# memory (saturation) benchmarks
echo "for m in {0..1}; do" >> run_gmgpolar_sbatch.sh
echo "likwid-perfctr -C 0-$m -g CACHES ./build_gnu/main -n $nr_exp -a $fac_ani --mod_pk $mod_pk --DirBC_Interior $DirBC_Interior --divideBy2 $divideBy2 -r $R0 --smoother $smoother -E $extrapol" >> run_gmgpolar_sbatch.sh
# # Memory (saturation) benchmarks
echo "let m=1" >> run_gmgpolar_sbatch.sh
echo "while [ \$m -le $max_threads ]; do" >> run_gmgpolar_sbatch.sh
echo "let mminus1=m-1" >> run_gmgpolar_sbatch.sh
echo "srun --cpus-per-task=\$m likwid-perfctr -f -C 0-\$mminus1 -g CACHE ./build/gmgpolar_simulation --openmp \$m --matrix_free 1 -n $nr_exp -a $fac_ani --mod_pk $mod_pk --DirBC_Interior $DirBC_Interior --divideBy2 0 -r $R0 --smoother $smoother -E $extrapolation --verbose 2 --debug $debug --optimized 1 --v1 $v1 --v2 $v2 -R $R --prob $prob --maxiter $maxiter --alpha_coeff $alpha_coeff --beta_coeff $beta_coeff --res_norm $res_norm --f_grid_r "radii_files/Rmax"$R"/aniso"$fac_ani"/divide"\$divideBy2".txt" --f_grid_theta "angles_files/Rmax"$R"/aniso"$fac_ani"/divide"\$divideBy2".txt" --rel_red_conv $rel_red_conv" >> run_gmgpolar_sbatch.sh
echo "let m=m*2" >> run_gmgpolar_sbatch.sh
echo "done;" >> run_gmgpolar_sbatch.sh

#submit the job
sbatch run_gmgpolar_sbatch.sh
sbatch run_gmgpolar_sbatch.sh
118 changes: 118 additions & 0 deletions plot_tools/plot_benchmarks_scaling.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
import pandas as pd
import numpy as np
import sys
# Use backend to not plot on UI
# import matplotlib
# matplotlib.use('Agg')
import matplotlib.pyplot as plt
from os.path import join, exists, dirname
from os import makedirs

### Plots scaling of FLOPS and Caches (saturation) scaling from 0 to n Cores
### as read from data frame

colors = [
[1.00, 0.49, 0.06],
[0.17, 0.63, 0.18],
[0.83, 0.15, 0.16],
[0.13, 0.47, 0.69],
[0.58, 0.40, 0.74],
[0.53, 0.35, 0.27],
[0.92, 0.46, 0.77],
[0.50, 0.50, 0.50],
[0.66, 0.85, 0.06],
[0.06, 0.85, 0.85],
[0.85, 0.15, 0.85],
[0.75, 0.75, 0.75]];

def plot_perf_per_core(path_out, fname, df, benchname, saturation_limit=0, colors=colors):
fontsize = 16

fig = plt.figure(figsize=(10, 10))
ax = fig.add_subplot()
plt.plot(df['Cores'], df[benchname[0]])

if benchname[0] == 'CACHES':
plt.plot(df['Cores'], saturation_limit * np.ones(len(df['Cores'])), linestyle='dotted', linewidth=3, color=[0, 0, 0])
ax.text(1, saturation_limit+3, 'Memory bandwith (AXPY) (' + str(saturation_limit) + ' GBytes/s)', fontsize=14)
ax.set_ylim(0, 90)


ax.set_title(benchname[1][0], fontsize=fontsize+6)
ax.set_ylabel(benchname[1][0], fontsize=fontsize)

ax.set_xlabel('Number of cores used', fontsize=fontsize)


path_out = join(path_out, 'figures')
if not exists(path_out):
makedirs(path_out)
plt.savefig(join(path_out, fname + '_' + benchname[0].lower()), bbox_inches='tight')
plt.close()


def main():

problem = 5
nr_exp = 4
mod_pk = 1
smoother = 3
extrapolation = 1

nodes = 1
ranks = 1
maxCores = 14

## saturation_limit is node specific and needs to be adapted.
saturation_limit = 80

fname = 'p' + str(problem) + '-r' + str(nr_exp) + '-mpk' + str(mod_pk) + '-s' + str(
smoother) + '-e' + str(extrapolation) + '--N' + str(nodes) + '-R' + str(ranks) + '-maxC' + str(maxCores)
path_to_files_rel = '' # relative to plot script
path_to_files = join(dirname(__file__), join(path_to_files_rel))

df = pd.read_csv(
join(path_to_files, fname + '_benchmarks.csv'),
dtype={'Problem': int, 'rExp': int, 'ModPK': int,
'Extrapolation': int, 'Nodes': int, 'Ranks': int,
'Cores': int, 'its': int})

# Likwid benchmark columns, more benchmarks are in timings.
likwid_benchmarks = {'FLOPS_DP': ['Flop performance in Multi-Threading', 'Flops (GFlops/s)'], 'CACHES': [
'Memory bandwidth saturation', 'Memory bandwidth (GBytes/s)']} # benchmark : [plot title, plot y-label]
timing_benchmarks = {'Total_execution_time' : ['Total execution time in Multi-Threading', 'Execution time']}
# Problem setting columns
setting_cols = ['Problem', 'rExp', 'ModPK', 'Extrapolation', 'Nodes', 'Ranks']

# check content
for bench in likwid_benchmarks.items():
bench_rows = np.where(df[bench[0]].isnull()!=True)[0]
if len(bench_rows) > 0:
df_subframe = df.iloc[bench_rows].copy()

# Check that the different number of threads/cores where only conducted
# on one particular setting of the above columns
if np.max(df_subframe.loc[:,setting_cols].nunique()) > 1:
sys.exit('Error in input data, more than one setting found.')

# TODO or not TODO: If the same run was done multiple times with different LIKWID benchmarks
# it is not clear which line to take.
# This could be extended to take the weighted sum or minimum of all corresponding lines.
# However, these timings should be identical or in the same region...
# Nonetheless, there could be a nicer table format to store the results ;-)
cores_used = df_subframe['Cores'].unique()
if len(cores_used) != len(df_subframe['Cores']):
sys.exit('Error: Multiple times computed with the same number of threads.')

plot_perf_per_core(path_to_files, fname, df_subframe, bench, saturation_limit=saturation_limit)

# Plot particular timings from table for first benchmark. Timings should be similar for FLOPS_DP and CACHES.
if bench[0] == list(likwid_benchmarks.keys())[0]:
for timing_benchmark in timing_benchmarks.items():
plot_perf_per_core(path_to_files, fname, df_subframe, timing_benchmark)



if __name__ == '__main__':
main()

Loading

0 comments on commit 6a0625f

Please sign in to comment.