diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..f946a116 --- /dev/null +++ b/.gitignore @@ -0,0 +1,273 @@ +# See https://help.github.com/articles/ignoring-files/ for more about ignoring files. + +# dependencies +/.pnp +.pnp.js + +# misc +.DS_Store +.env.local +.env.development.local +.env.test.local +.env.production.local +*.xls +*.xlsx +*.zip +*.tar.gz +*.csv +*slurm* + +# Created by https://www.gitignore.io/api/c++,node,python +# Edit at https://www.gitignore.io/?templates=c++,node,python + +### C++ ### +# Prerequisites +*.d + +# Compiled Object files +*.slo +*.lo +*.o +*.obj + +# Precompiled Headers +*.gch +*.pch + +# Compiled Dynamic libraries +*.so +*.dylib +*.dll + +# Fortran module files +*.mod +*.smod + +# Compiled Static libraries +*.lai +*.la +*.a +*.lib + +# Executables +*.exe +*.out +*.app + +### Node ### +# Logs +logs +*.log +npm-debug.log* +yarn-debug.log* +yarn-error.log* +lerna-debug.log* + +# Diagnostic reports (https://nodejs.org/api/report.html) +report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json + +# Runtime data +pids +*.pid +*.seed +*.pid.lock + +# Directory for instrumented libs generated by jscoverage/JSCover +lib-cov + +# Coverage directory used by tools like istanbul +coverage +*.lcov + +# nyc test coverage +.nyc_output + +# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) +.grunt + +# Bower dependency directory (https://bower.io/) +bower_components + +# node-waf configuration +.lock-wscript + +# Compiled binary addons (https://nodejs.org/api/addons.html) +build/Release + +# Dependency directories +node_modules/ +jspm_packages/ + +# TypeScript v1 declaration files +typings/ + +# TypeScript cache +*.tsbuildinfo + +# Optional npm cache directory +.npm + +# Optional eslint cache +.eslintcache + +# Optional REPL history +.node_repl_history + +# Output of 'npm pack' +*.tgz + +# Yarn Integrity file +.yarn-integrity + +# dotenv environment variables file +.env +.env.test + +# parcel-bundler cache (https://parceljs.org/) +.cache + +# next.js build output +.next + +# nuxt.js build output +.nuxt + +# rollup.js default build output +dist/ + +# Uncomment the public line if your project uses Gatsby +# https://nextjs.org/blog/next-9-1#public-directory-support +# https://create-react-app.dev/docs/using-the-public-folder/#docsNav +# public + +# Storybook build outputs +.out +.storybook-out + +# vuepress build output +.vuepress/dist + +# Serverless directories +.serverless/ + +# FuseBox cache +.fusebox/ + +# DynamoDB Local files +.dynamodb/ + +# Temporary folders +tmp/ +temp/ + +### Python ### +# Pycharm +.idea + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions + +# Distribution / packaging +.Python +build/ +develop-eggs/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST +_skbuild/ + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# Mr Developer +.mr.developer.cfg +.project +.pydevproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# Docs +docs/html +docs/xml + +# End of https://www.gitignore.io/api/c++,node,python diff --git a/CMakeLists.txt b/CMakeLists.txt index 13143f65..57ca070b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,6 +4,8 @@ project(GMGPolar VERSION 1.0.0) option(GMGPOLAR_BUILD_TESTS "Build GMGPolar unit tests." ON) option(GMGPOLAR_USE_MUMPS "Use MUMPS to compute matrix factorizations." OFF) +option(GMGPOLAR_USE_LIKWID "Use LIKWID to measure code (regions)." OFF) + set(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH}) set(CMAKE_POSITION_INDEPENDENT_CODE ON) @@ -42,6 +44,18 @@ configure_file(${CMAKE_SOURCE_DIR}/include/config_internal.h.in ${CMAKE_SOURCE_D target_include_directories(gmgpolar_simulation PRIVATE ${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/include/test_cases ) target_include_directories(GMGPolar PRIVATE ${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/include/test_cases ) +if(GMGPOLAR_USE_LIKWID) + + find_package(LIKWID REQUIRED) + + target_include_directories(GMGPolar PUBLIC ${LIKWID_INCLUDE_DIRS}) + target_link_libraries(GMGPolar PUBLIC ${LIKWID_LIBRARIES}) + target_compile_definitions(GMGPolar PUBLIC "-DLIKWID_PERFMON") + +endif() + + + if(GMGPOLAR_USE_MUMPS) set(INC_DIRS diff --git a/include/config_internal.h.in b/include/config_internal.h.in index de594a47..3bf582e8 100644 --- a/include/config_internal.h.in +++ b/include/config_internal.h.in @@ -28,5 +28,6 @@ #define GMGPOLAR_CONFIG_INTERNAL_H #cmakedefine GMGPOLAR_USE_MUMPS +#cmakedefine GMGPOLAR_USE_LIKWID #endif // GMGPOLAR_CONFIG_INTERNAL_H diff --git a/include/constants.h b/include/constants.h index 3588e4aa..8083cce2 100644 --- a/include/constants.h +++ b/include/constants.h @@ -245,7 +245,7 @@ domains with the method of characteristics and spline finite elements. (2019) * * Defines the coefficient * 0: beta = 0 - * 1: beta = (1 / alpha) + * 1: beta: 1/alpha for some cases, different in others (see coeffs in test_cases) */ beta_coeff, /*! \brief Norm for stopping criterion @@ -316,8 +316,8 @@ enum dcontrols rel_red_conv, /*! \brief Timings */ - t_coeff, - t_arr_art_att, + t_arr_art_att, // Evaluation of arr, art, and att + t_coeff, // Evaluation of alpha and beta (subcounter of t_arr_art_att) t_sol, t_detDFinv, t_trafo, diff --git a/include/gmgpolar.h b/include/gmgpolar.h index 9de56530..50e3cc1d 100644 --- a/include/gmgpolar.h +++ b/include/gmgpolar.h @@ -45,13 +45,16 @@ #include "constants.h" #include "level.h" #include "gyro.h" +#ifdef GMGPOLAR_USE_LIKWID +#include +#endif class gmgpolar { public: -/******************************************************************************* - * Attributes - ******************************************************************************/ + /******************************************************************************* + * Attributes + ******************************************************************************/ /*************************************************************************** * Grid levels **************************************************************************/ @@ -67,25 +70,30 @@ class gmgpolar std::vector nrm_inf_res; /* execution times */ + // Setup double t_setup; // prepare_op_levels + // Subcounters of Setup double t_build; // build A and RHS double t_facto_Ac; // factorization of coarse operator double t_build_P; // build coarse nodes, line splitting and P double t_build_Asc; // build Asc and Asc_ortho double t_facto_Asc; // factorization of Asc - double t_total; // multigrid_cycle_extrapol + // Multigrid cycle + double t_total_mgcycle; // multigrid_cycle_extrapol + // Subcounter of Multigrid cycle double t_smoothing; // pre- and post-smoothing (including solve Asc) - double t_residual; // factorization of Asc + double t_residual; // computation of the residual double t_restriction; // restriction double t_Ac; // solve coarse operator double t_prolongation; // prolongation and coarse grid correction + double t_fine_residual; // computation of residual on level 0 (init and after iteration) double t_error; // computation of final error - double t_applyA; // apply the operator A, method apply_A + double t_applyA; // apply the operator A, method apply_A - /******************************************************************************* - * Methods - ******************************************************************************/ + /******************************************************************************* + * Methods + ******************************************************************************/ gmgpolar(); ~gmgpolar(); @@ -104,13 +112,13 @@ class gmgpolar double compute_backwarderror(); private: - /******************************************************************************* - * Attributes - ******************************************************************************/ +/******************************************************************************* +* Attributes +******************************************************************************/ - /******************************************************************************* - * Methods - ******************************************************************************/ +/******************************************************************************* +* Methods +******************************************************************************/ }; #endif // GMGPOLAR_HXX diff --git a/include/gyro.h b/include/gyro.h index 1ce578dd..e6e39608 100644 --- a/include/gyro.h +++ b/include/gyro.h @@ -66,8 +66,8 @@ namespace gyro * Attributes ******************************************************************************/ /*************************************************************************** - * Controls and Informations - **************************************************************************/ +* Controls and Informations +**************************************************************************/ /*! The integer control array, see Controls::icontrols for the * possible values */ @@ -99,16 +99,16 @@ extern std::string f_sol_in, f_sol_out; * Methods ******************************************************************************/ /*************************************************************************** - * Parameters - **************************************************************************/ +* Parameters +**************************************************************************/ void init_params(); void show_params(); void get_geometry_coeffs(geometry_type geom); void select_functions_class(int alpha_coeff, int beta_coeff, int geom, int prob); /*************************************************************************** - * Boundary and solution - **************************************************************************/ +* Boundary and solution +**************************************************************************/ /************************ * Single ************************/ @@ -125,12 +125,12 @@ std::vector def_solution_rt(double r_i, std::vector theta, std:: std::vector cos_theta, int ntheta, int verbose); /*************************************************************************** - * Diffusivity and operator - **************************************************************************/ +* Diffusivity and operator +**************************************************************************/ /************************ * Single ************************/ -double coeff(double r, int verbose); +double coeff_alpha(double r, int verbose); double coeff_beta(double r, int verbose); double detDFinv(double r, double theta, int verbose); double arr(double r, double theta, int verbose); @@ -140,7 +140,7 @@ void arr_att_art(double r, double theta, double& arr, double& att, double& art, /************************ * Vector ************************/ -std::vector coeff(std::vector r, int verbose); +std::vector coeff_alpha(std::vector r, int verbose); std::vector coeff_beta(std::vector r, int verbose); std::vector detDFinv(double r, std::vector theta, std::vector sin_theta, std::vector cos_theta, int ntheta, int verbose); @@ -157,8 +157,8 @@ void arr_att_art(std::vector r, double theta, std::vector& arr, std::vector& art, int verbose); /*************************************************************************** - * Polar to cartesian and back - **************************************************************************/ +* Polar to cartesian and back +**************************************************************************/ /************************ * Single ************************/ @@ -207,8 +207,8 @@ void disp(std::vector a, const std::string& s_a) } /*************************************************************************** - * Matrix operations - **************************************************************************/ +* Matrix operations +**************************************************************************/ void sp_dgemv(int trans, int m, int n, double alpha, std::vector row_indices, std::vector col_indices, std::vector vals, int lda, std::vector x, int incx, double beta, std::vector& y, int incy); diff --git a/performance/run_gmgpolar.sh b/performance/run_gmgpolar.sh index f1460d45..04bf56f3 100644 --- a/performance/run_gmgpolar.sh +++ b/performance/run_gmgpolar.sh @@ -30,9 +30,8 @@ fac_ani=3 # a nr_exp=4 # n #changing variables -mod_pk=1 # mod_pk=1: Shafranov geometry -prob=7 # Prob=7: Simulate solution (23) of Bourne et al. -# TODO: which alpha and beta to simulate? Alpha aligned with anisotropy? +mod_pk=2 # mod_pk=1: Shafranov geometry +prob=6 # Prob=7: Simulate solution (23) of Bourne et al. alpha_coeff=2 beta_coeff=1 @@ -75,10 +74,11 @@ fi echo "#!/bin/bash" > run_gmgpolar_sbatch.sh # create a short name for your job echo "#SBATCH --job-name=gmgpolar" >> run_gmgpolar_sbatch.sh +# Change fixed "dbt7" in file name if divideBy2 below is changed! # stdout file %A=job id -echo "#SBATCH --output=slurm-%A-p$prob-r$nr_exp-mpk$mod_pk-s$smoother-e$extrapolation--N$nodes-R$ranks-maxC$cores.out" >> run_gmgpolar_sbatch.sh +echo "#SBATCH --output=slurm-%A-p$prob-r$nr_exp-dbt7-mpk$mod_pk-s$smoother-e$extrapolation--N$nodes-R$ranks-maxC$cores.out" >> run_gmgpolar_sbatch.sh # stderr file -echo "#SBATCH --error=slurm-%A-p$prob-r$nr_exp-mpk$mod_pk-s$smoother-e$extrapolation--N$nodes-R$ranks-maxC$cores.err" >> run_gmgpolar_sbatch.sh +echo "#SBATCH --error=slurm-%A-p$prob-r$nr_exp-dbt7-mpk$mod_pk-s$smoother-e$extrapolation--N$nodes-R$ranks-maxC$cores.err" >> run_gmgpolar_sbatch.sh echo "#SBATCH -N $nodes" >> run_gmgpolar_sbatch.sh echo "#SBATCH -n $ranks" >> run_gmgpolar_sbatch.sh @@ -87,7 +87,8 @@ echo "#SBATCH -c $cores" >> run_gmgpolar_sbatch.sh echo "#SBATCH --threads-per-core=1" >> run_gmgpolar_sbatch.sh # fix CPU frequency to 1.8 Mhz echo "#SBATCH --cpu-freq=1800000" >> run_gmgpolar_sbatch.sh -echo "#SBATCH -t 600" >> run_gmgpolar_sbatch.sh +## Estimation on 2x AMD EPYC 7702: nr=4, dbt=7, prob=7, mpk=1 takes 720 minutes for 1->2->...->128 cores ## +echo "#SBATCH -t 1600" >> run_gmgpolar_sbatch.sh echo "#SBATCH --exclusive" >> run_gmgpolar_sbatch.sh # remove potentially loaded and conflicting modules @@ -103,9 +104,12 @@ echo "module load likwid/5.2.2" >> run_gmgpolar_sbatch.sh # echo "cd .." >> run_gmgpolar_sbatch.sh # echo "make -j16" >> run_gmgpolar_sbatch.sh +echo "# potentially run benchmark on machine" >> run_gmgpolar_sbatch.sh +echo "# srun --cpus-per-task=$((cores)) likwid-bench -i 1000 -t stream_avx_fma -w S0:500MB:64" >> run_gmgpolar_sbatch.sh + # to be defined for use case (3/4/5/6) # Attention: divideBy is used as a dummy variable to access folders as grids are read in -echo "let divideBy2=4" >> run_gmgpolar_sbatch.sh +echo "let divideBy2=7" >> run_gmgpolar_sbatch.sh #################################### ## solve system ## @@ -117,7 +121,11 @@ echo "let m=1" >> run_gmgpolar_sbatch.sh # FLOPS-DP counter from 1 to cores many threads echo "while [ \$m -le $max_threads ]; do" >> run_gmgpolar_sbatch.sh echo "let mminus1=m-1" >> run_gmgpolar_sbatch.sh -echo "srun --cpus-per-task=\$m likwid-perfctr -f -C 0-\$mminus1 -g FLOPS_DP ./build/gmgpolar_simulation --openmp \$m --matrix_free 1 -n $nr_exp -a $fac_ani --mod_pk $mod_pk --DirBC_Interior $DirBC_Interior --divideBy2 0 -r $R0 --smoother $smoother -E $extrapolation --verbose 2 --debug $debug --optimized 1 --v1 $v1 --v2 $v2 -R $R --prob $prob --maxiter $maxiter --alpha_coeff $alpha_coeff --beta_coeff $beta_coeff --res_norm $res_norm --f_grid_r "radii_files/Rmax"$R"/aniso"$fac_ani"/divide"\$divideBy2".txt" --f_grid_theta "angles_files/Rmax"$R"/aniso"$fac_ani"/divide"\$divideBy2".txt" --rel_red_conv $rel_red_conv" >> run_gmgpolar_sbatch.sh + +echo "# for testing that pin works correctly, potentially use likwid-pin beforehand" >> run_gmgpolar_sbatch.sh +echo "# srun --cpus-per-task=\$m likwid-pin -c N:0-\$mminus1 ./build/gmgpolar_simulation --openmp \$m --matrix_free 1 -n $nr_exp -a $fac_ani --mod_pk $mod_pk --DirBC_Interior $DirBC_Interior --divideBy2 0 -r $R0 --smoother $smoother -E $extrapolation --verbose 2 --debug $debug --optimized 1 --v1 $v1 --v2 $v2 -R $R --prob $prob --maxiter $maxiter --alpha_coeff $alpha_coeff --beta_coeff $beta_coeff --res_norm $res_norm --f_grid_r "radii_files/Rmax"$R"/aniso"$fac_ani"/divide"\$divideBy2".txt" --f_grid_theta "angles_files/Rmax"$R"/aniso"$fac_ani"/divide"\$divideBy2".txt" --rel_red_conv $rel_red_conv" >> run_gmgpolar_sbatch.sh + +echo "srun --cpus-per-task=\$m likwid-perfctr -f -m -C 0-\$mminus1 -g FLOPS_DP ./build/gmgpolar_simulation --openmp \$m --matrix_free 1 -n $nr_exp -a $fac_ani --mod_pk $mod_pk --DirBC_Interior $DirBC_Interior --divideBy2 0 -r $R0 --smoother $smoother -E $extrapolation --verbose 2 --debug $debug --optimized 1 --v1 $v1 --v2 $v2 -R $R --prob $prob --maxiter $maxiter --alpha_coeff $alpha_coeff --beta_coeff $beta_coeff --res_norm $res_norm --f_grid_r "radii_files/Rmax"$R"/aniso"$fac_ani"/divide"\$divideBy2".txt" --f_grid_theta "angles_files/Rmax"$R"/aniso"$fac_ani"/divide"\$divideBy2".txt" --rel_red_conv $rel_red_conv" >> run_gmgpolar_sbatch.sh echo "let m=m*2" >> run_gmgpolar_sbatch.sh echo "done;" >> run_gmgpolar_sbatch.sh @@ -125,7 +133,7 @@ echo "done;" >> run_gmgpolar_sbatch.sh echo "let m=1" >> run_gmgpolar_sbatch.sh echo "while [ \$m -le $max_threads ]; do" >> run_gmgpolar_sbatch.sh echo "let mminus1=m-1" >> run_gmgpolar_sbatch.sh -echo "srun --cpus-per-task=\$m likwid-perfctr -f -C 0-\$mminus1 -g CACHE ./build/gmgpolar_simulation --openmp \$m --matrix_free 1 -n $nr_exp -a $fac_ani --mod_pk $mod_pk --DirBC_Interior $DirBC_Interior --divideBy2 0 -r $R0 --smoother $smoother -E $extrapolation --verbose 2 --debug $debug --optimized 1 --v1 $v1 --v2 $v2 -R $R --prob $prob --maxiter $maxiter --alpha_coeff $alpha_coeff --beta_coeff $beta_coeff --res_norm $res_norm --f_grid_r "radii_files/Rmax"$R"/aniso"$fac_ani"/divide"\$divideBy2".txt" --f_grid_theta "angles_files/Rmax"$R"/aniso"$fac_ani"/divide"\$divideBy2".txt" --rel_red_conv $rel_red_conv" >> run_gmgpolar_sbatch.sh +echo "srun --cpus-per-task=\$m likwid-perfctr -f -m -C 0-\$mminus1 -g MEM_DP ./build/gmgpolar_simulation --openmp \$m --matrix_free 1 -n $nr_exp -a $fac_ani --mod_pk $mod_pk --DirBC_Interior $DirBC_Interior --divideBy2 0 -r $R0 --smoother $smoother -E $extrapolation --verbose 2 --debug $debug --optimized 1 --v1 $v1 --v2 $v2 -R $R --prob $prob --maxiter $maxiter --alpha_coeff $alpha_coeff --beta_coeff $beta_coeff --res_norm $res_norm --f_grid_r "radii_files/Rmax"$R"/aniso"$fac_ani"/divide"\$divideBy2".txt" --f_grid_theta "angles_files/Rmax"$R"/aniso"$fac_ani"/divide"\$divideBy2".txt" --rel_red_conv $rel_red_conv" >> run_gmgpolar_sbatch.sh echo "let m=m*2" >> run_gmgpolar_sbatch.sh echo "done;" >> run_gmgpolar_sbatch.sh diff --git a/plot_tools/lskkr23_plot_benchmarks_strong_scaling.py b/plot_tools/lskkr23_plot_benchmarks_strong_scaling.py new file mode 100644 index 00000000..84be55b6 --- /dev/null +++ b/plot_tools/lskkr23_plot_benchmarks_strong_scaling.py @@ -0,0 +1,188 @@ +import pandas as pd +import numpy as np +import sys +# Use backend to not plot on UI +# import matplotlib +# matplotlib.use('Agg') +import matplotlib.pyplot as plt +from os.path import join, exists, dirname +from os import makedirs +import os.path + +### Plots scaling of FLOPS and Caches (saturation) scaling from 0 to n Cores +### as read from data frame + +colors = [ + [1.00, 0.49, 0.06], + [0.17, 0.63, 0.18], + [0.83, 0.15, 0.16], + [0.13, 0.47, 0.69], + [0.58, 0.40, 0.74], + [0.53, 0.35, 0.27], + [0.92, 0.46, 0.77], + [0.50, 0.50, 0.50], + [0.66, 0.85, 0.06], + [0.06, 0.85, 0.85], + [0.85, 0.15, 0.85], + [0.75, 0.75, 0.75], + [0.80, 0.67, 0.00], + [0.30, 0.65, 1.00], + [0.40, 0.07, 0.00], + [1.00, 0.80, 0.87], + [1.00, 0.87, 0.20], + [1.00, 0.80, 0.70], + [0.80, 0.87, 1.00]]; + +def plot_scaling(path_out, fname, benchname, df, title, ylabel, saturation_limit=0, logplot = True, colors=colors): + ''' + Plot different timings or other benchmarks against the number of cores used. + @param path_out Path to output files. + @param fname First part of filename. + @param benchname Benchmark name and postfix for filename. + @param df Data frame with results per core scaling. + @param title Plot title. + @param ylabel Y-axis label. + @param saturation_limit Saturation limit to be plotted for MEM_DP scaling. + ''' + fontsize = 24 + + fig = plt.figure(figsize=(12, 10)) + # fig.subplots_adjust(bottom=0.3) # use for interactive plotting, for savefig, legend+figure is adjusted with bbox_inches='tight' + ax = fig.add_subplot() + start_optimal_line = df.loc[0,[timer for timer in df.iloc[:,1:].columns if 'Total' in timer]].max() + optimal_line = 1.0/df['Cores'].values * start_optimal_line + if logplot: + if benchname == 'Timings': + plt.loglog(df['Cores'], optimal_line, linewidth=2, linestyle='dashed', color='black', label='Optimal') + for i in range(len(df.iloc[:,1:].columns)): + col_label = df.columns[i+1] + col_label = col_label.replace('Computing residual on finest level', 'Computing residual (finest)') + col_label = col_label.replace('Applying prolongation (+ coarse grid correction)', 'Applying prolongation (+ CGC)') + plt.loglog(df['Cores'], df.iloc[:,i+1], linewidth=2, label=col_label, color=colors[i]) # Cores is assumed to be in first column + else: + if benchname == 'Timings': + plt.plot(df['Cores'], optimal_line, linewidth=2, linestyle='dashed', color='black', label='Optimal') + for i in range(len(df.iloc[:,1:].columns)): + col_label = df.columns[i+1] + col_label = col_label.replace('Computing residual on finest level', 'Computing residual (finest)') + col_label = col_label.replace('Applying prolongation (+ coarse grid correction)', 'Applying prolongation (+ CGC)') + plt.plot(df['Cores'], df.iloc[:,i+1], linewidth=2, label=col_label, color=colors[i]) # Cores is assumed to be in first column + + if benchname == 'MEM_DP': + if saturation_limit > 0: + plt.plot(df['Cores'], saturation_limit * np.ones(len(df['Cores'])), linestyle='dotted', linewidth=3, color=[0, 0, 0]) + ax.text(1, saturation_limit + 3, 'Memory bandwith (AXPY) (' + str(saturation_limit) + ' GBytes/s)', fontsize=14) + ax.set_ylim(0, saturation_limit + 10) + + ax.legend(bbox_to_anchor=(0, 0, 1, -0.1), mode="expand", ncols=2, fontsize=fontsize-6) + # ax.set_title(title, fontsize=fontsize+6) + ax.set_ylabel(ylabel, fontsize=fontsize) + ax.set_xlabel('Number of cores', fontsize=fontsize) + ax.set_xticks(df['Cores']) + ax.set_xticklabels(df['Cores']) + + plt.rcParams['xtick.labelsize']=fontsize-6 + plt.rcParams['ytick.labelsize']=fontsize-6 + + path_out = join(path_out, 'figures') + if not exists(path_out): + makedirs(path_out) + plt.savefig(join(path_out, fname + '_' + benchname.lower()), dpi=300, bbox_inches='tight') + # plt.show() + plt.close() + + +def main(benchmarks=['FLOPS_DP'], problem=6, divideBy2=7, mod_pk=1, maxCores=128): + # maxCores simulated in scaling + + file_prefix = 'caro-paper' # provide correct slurm job id + if file_prefix != '': + file_prefix = file_prefix + '-' + file_postfix = '' + if file_postfix != '': + file_postfix = '-' + file_postfix + nr_exp = 4 + smoother = 3 + extrapolation = 1 + + nodes = 1 + ranks = 1 + + maxCoresPlot = 64 # maxCores to plot + plot_counter = {} # dict on which counter to plot + plot_counter['Total setup'] = 0 + plot_counter['Building system matrix A and RHS'] = 0 + plot_counter['Factorization of coarse operator Ac'] = 0 + plot_counter['Building intergrid operators (e.g. projections)'] = 0 + plot_counter['Building smoothing operators A_sc'] = 0 + plot_counter['Factorizing smoothing operators A_sc'] = 0 + plot_counter['Total multigrid cycle'] = 1 + plot_counter['Complete smoothing'] = 1 + plot_counter['Computing residual'] = 1 + plot_counter['Applying restriction'] = 1 + plot_counter['Solve coarse system'] = 1 + plot_counter['Applying prolongation (+ coarse grid correction)'] = 1 + plot_counter['Computing residual on finest level'] = 1 + plot_counter['Computing final error'] = 0 + plot_counter['Total application of A'] = 1 + plot_counter['Evaluation of arr, art, and att'] = 0 + plot_counter['Evaluation of alpha and beta'] = 0 + plot_counter['Computing determinant of Jacobian of inverse mapping'] = 0 + plot_counter['Computing exact solution'] = 0 + plot_counter['Total execution time'] = 0 + + plot_regions = {} # dict on which likwid region to plot + plot_regions['Setup'] = 1 + plot_regions['Iteration'] = 1 + + bench_to_unit = {} + bench_to_unit['FLOPS_DP'] = 'MFLOP/s' + bench_to_unit['MEM_DP'] = 'MBytes/s' + + ## saturation_limit for MEM_DP scaling (node specific and needs to be adapted). + saturation_limit = 80 + + fname = file_prefix + 'p' + str(problem) + '-r' + str(nr_exp) + '-dbt' + str(divideBy2) + '-mpk' + str(mod_pk) + '-s' + str( + smoother) + '-e' + str(extrapolation) + '--N' + str(nodes) + '-R' + str(ranks) + '-maxC' + str(maxCores) + file_postfix + + path_to_files_rel = os.path.join('..', os.path.join('..', 'scaling_output')) # relative from read_output call + path_to_files = os.path.join(os.path.dirname(__file__), os.path.join(path_to_files_rel)) + + + # Problem setting columns + setting_cols = ['Problem', 'rExp', 'divB2', 'ModPK', 'Extrapolation', 'Nodes', 'Ranks'] + + + df = [] + for bench in benchmarks: + df.append(pd.read_csv( + join(path_to_files, fname + '_' + bench + '.csv'), + dtype={'Problem': int, 'rExp': int, 'divB2': int, 'ModPK': int, + 'Extrapolation': int, 'Nodes': int, 'Ranks': int, + 'Cores': int, 'its': int})) + + # Check that the different number of threads/cores where only conducted + # on one particular setting of the above columns + if np.max(df[0].loc[:,setting_cols].nunique()) > 1: + sys.exit('Error in input data, more than one setting found.') + # check that only one row per number of cores is available + if df[0]['Cores'].nunique() != len(df[0]['Cores']): + sys.exit('Error: Multiple times computed with the same number of cores.') + + # check content + for i in range(len(df)): + # plot timings + plot_scaling(path_to_files, fname, 'Timings', df[i].loc[df[i]['Cores']<=maxCoresPlot, ['Cores'] + [k for k, v in plot_counter.items() if v==1]], 'Strong scaling of GMGPolar', 'Time (sec)') + # plot likwid benchmark + plot_scaling(path_to_files, fname, benchmarks[i], df[i].loc[df[i]['Cores']<=maxCoresPlot, ['Cores'] + [col for col in df[i].columns if bench_to_unit[benchmarks[i]] in col]], 'Strong scaling of GMGPolar', bench_to_unit[benchmarks[i]]) + + +if __name__ == '__main__': + # strong scaling + main(benchmarks=['FLOPS_DP', 'MEM_DP'], problem=6, divideBy2=7, mod_pk=1) + main(benchmarks=['FLOPS_DP', 'MEM_DP'], problem=6, divideBy2=7, mod_pk=2) + main(benchmarks=['FLOPS_DP', 'MEM_DP'], problem=7, divideBy2=7, mod_pk=1) + main(benchmarks=['FLOPS_DP', 'MEM_DP'], problem=7, divideBy2=7, mod_pk=2) + # large case strong scaling + main(benchmarks=['FLOPS_DP'], problem=7, divideBy2=8, mod_pk=1, maxCores=64) + \ No newline at end of file diff --git a/plot_tools/lskkr23_plot_sol_coeff_compute_flops.ipynb b/plot_tools/lskkr23_plot_sol_coeff_compute_flops.ipynb new file mode 100644 index 00000000..f3fcf599 --- /dev/null +++ b/plot_tools/lskkr23_plot_sol_coeff_compute_flops.ipynb @@ -0,0 +1,277 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "from matplotlib.colors import LogNorm\n", + "import matplotlib.ticker as ticker\n", + "\n", + "# Define the curvilinear mapping\n", + "def xymap(r, t, mod_pk=1, par1=0.3, par2=0.2, x0=0, y0=0):\n", + " if mod_pk == 0:\n", + " x = r*np.cos(t)\n", + " y = r*np.sin(t)\n", + " elif mod_pk == 1:\n", + " kappa=par1\n", + " delta=par2\n", + " x = x0 + (1-kappa)*r*np.cos(t)-delta*r*r\n", + " y = y0 + (1+kappa)*r*np.sin(t)\n", + " elif mod_pk == 2:\n", + " epsilon=par1\n", + " lambdaa=par2\n", + " sq = np.sqrt(1+epsilon*(epsilon+2*r*np.cos(t)))\n", + " ksi = 1/np.sqrt(1-epsilon*epsilon/4)\n", + " x = (1/epsilon)*(1-sq)\n", + " y = y0 + lambdaa*ksi*r*np.sin(t)/(2-sq)\n", + " return x, y\n", + "\n", + "# Define the 2D function\n", + "def func(r, t, x, y, prob=6, mod_pk=1, C=1, Rmax=1, m=1):\n", + " if prob==6:\n", + " f=C*np.power(r/Rmax, 6)*np.power(r/Rmax-1, 6)*np.cos(m*t)\n", + " elif prob==7:\n", + " f=C*np.power(1+r/Rmax, 6)*np.power(1-r/Rmax, 6)*np.cos(2*np.pi*x)*np.sin(2*np.pi*y)\n", + " return f\n", + "\n", + "# Format numbers in scientific format\n", + "def fmt(x, pos):\n", + " # a, b = '{:.2e}'.format(x).split('e')\n", + " a, b = '{:.0e}'.format(x).split('e')\n", + " b = int(b)\n", + " return r'${} \\times 10^{{{}}}$'.format(a, b)\n", + "\n", + "PROB=6\n", + "MOD_PK=1\n", + "C = 2**12 * 10**(-4)\n", + "M = 11\n", + "RMIN = 1e-5\n", + "RMAX = 1.3\n", + "KAPPA=0.3\n", + "DELTA=0.2\n", + "EPSILON=0.3\n", + "LAMBDA=1.4\n", + "\n", + "fontsize=18\n", + "\n", + "# Generate x and y values\n", + "N_INT = 1000\n", + "r = np.linspace(RMIN, RMAX, N_INT)\n", + "t = np.linspace(0, 2*np.pi, N_INT)\n", + "\n", + "# Create a meshgrid from x and y\n", + "R, T = np.meshgrid(r, t)\n", + "\n", + "for PROB in [6, 7]:\n", + " print(\"Prob:\",PROB)\n", + " for MOD_PK in [0,1,2]:\n", + " print(\"Geometry:\",MOD_PK)\n", + " if MOD_PK == 1:\n", + " par1=KAPPA\n", + " par2=DELTA\n", + " elif MOD_PK == 2:\n", + " par1=EPSILON\n", + " par2=LAMBDA\n", + " else:\n", + " par1=0\n", + " par2=0\n", + " \n", + " # Evaluate the function values for each point in the meshgrid\n", + " X, Y = xymap(R, T, mod_pk=MOD_PK, par1=par1, par2=par2)\n", + " Z = func(R, T, X, Y, prob=PROB, mod_pk=MOD_PK, C=C, Rmax=RMAX, m=M)\n", + " # print(\"X:\",np.min(X),np.max(X))\n", + " # print(\"Y:\",np.min(Y),np.max(Y))\n", + " # print(\"Z:\",np.min(Z),np.max(Z))\n", + "\n", + " # Plot the 2D function with logarithmic color scale\n", + " plt.contourf(X, Y, Z, cmap='coolwarm',levels=np.linspace(Z.min(),Z.max(),100))\n", + "\n", + " # Set limits on the x and y axes\n", + " if MOD_PK == 0:\n", + " plt.xlim(-1.4, 1.4)\n", + " plt.ylim(-1.4, 1.4)\n", + " elif MOD_PK == 1:\n", + " plt.xlim(-1.4, 0.7)\n", + " plt.ylim(-1.8, 1.8)\n", + " elif MOD_PK == 2:\n", + " plt.xlim(-1.4, 1.6)\n", + " plt.ylim(-2.2, 2.2)\n", + "\n", + " # Add colorbar\n", + " cbar = plt.colorbar() #format=ticker.FuncFormatter(fmt))\n", + " # format=ticker.ScalarFormatter()\n", + " # cbar.set_label('Solution')\n", + "\n", + " # Set equal aspect ratio for the plot\n", + " plt.gca().set_aspect('equal', adjustable='box')\n", + "\n", + " # Add labels and title\n", + " # plt.xlabel('X-axis')\n", + " # plt.ylabel('Y-axis')\n", + " # plt.title('2D Function with Logarithmic Color Scale')\n", + " \n", + " #plt.grid()\n", + " plt.rcParams['xtick.labelsize']=fontsize-4\n", + " plt.rcParams['ytick.labelsize']=fontsize-4 \n", + " \n", + " # Save figure\n", + " plt.savefig('sol_prob-'+str(PROB)+'_geom-'+str(MOD_PK)+'.png', bbox_inches='tight', dpi=300)\n", + "\n", + " # Show the plot\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "\n", + "# Define the 1D functions\n", + "def alpha(r, Rmax=1.3, rp=0.7, deltar=0.05):\n", + " return np.exp(-np.tanh(((r/Rmax)-rp)/deltar))\n", + "\n", + "def beta(r, Rmax=1.3, rp=0.7, deltar=0.05):\n", + " return 1/alpha(r, Rmax=Rmax, rp=rp, deltar=deltar)\n", + "\n", + "RMIN=1e-5\n", + "RMAX=1.3\n", + "RP=0.7\n", + "DELTAR=0.05\n", + "\n", + "fontsize=18\n", + "\n", + "# Generate x values\n", + "r = np.linspace(RMIN, RMAX, 100)\n", + "\n", + "# Evaluate the function values\n", + "alpha_r = alpha(r, Rmax=RMAX, rp=RP, deltar=DELTAR)\n", + "beta_r = beta(r, Rmax=RMAX, rp=RP, deltar=DELTAR)\n", + "print(\"ALPHA:\",np.min(alpha_r),np.max(alpha_r))\n", + "print(\"BETA:\",np.min(beta_r),np.max(beta_r))\n", + "\n", + "# Plot the functions on the same plot\n", + "plt.plot(r, alpha_r, label='α(r)', linewidth=3)\n", + "plt.plot(r, beta_r, label='β(r)', linewidth=3)\n", + "\n", + "# Add labels and title\n", + "plt.xlabel('r', fontsize=fontsize)\n", + "\n", + "# Add a legend\n", + "plt.legend(fontsize=fontsize)\n", + " \n", + "#plt.grid()\n", + "plt.rcParams['xtick.labelsize']=fontsize-4\n", + "plt.rcParams['ytick.labelsize']=fontsize-4\n", + "\n", + "# Save figure\n", + "plt.savefig('coeff.png', bbox_inches='tight', dpi=300)\n", + "\n", + "# Show the plot\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def flops_comp(nr, nt, m_c, it, mod_pk):\n", + " m = nr*nt\n", + " mu_c = m_c/m\n", + " # print(\"m\",m,\"m_c\",m_c,\"mu_c\",mu_c)\n", + " v1=1\n", + " v2=1\n", + " \n", + " # GLOBAL\n", + " if mod_pk == 0:\n", + " flops_DF = 6\n", + " elif mod_pk == 1:\n", + " flops_DF = 18\n", + " elif mod_pk == 2:\n", + " flops_DF = 112\n", + "\n", + " # SETUP\n", + " ## Flops\n", + " flops_setup_smoother_m = 2*flops_DF + 70 + 7*mu_c\n", + " flops_setup_m = 4/3*flops_setup_smoother_m\n", + " # print(\"flops_setup_m\",flops_setup_m)\n", + " \n", + " flops_setup = flops_setup_m * m\n", + " ## Mem\n", + " mem_setup_m = 3 + 2*mu_c\n", + " # print(\"mem_setup_m\",mem_setup_m)\n", + " \n", + " mem_setup = mem_setup_m * m\n", + " \n", + " # MG\n", + " ## Flops\n", + " flops_smoother_m = 2*flops_DF + 80 + 4*mu_c\n", + " flops_residual_m = flops_DF + 107.25\n", + " flops_m = (4*it)*((v1+v2)*flops_smoother_m + flops_residual_m)/3\n", + " # print(\"flops_m\",flops_m)\n", + " \n", + " flops = flops_m * m\n", + " ## Mem\n", + " mem_smoother_m = 2\n", + " mem_residual_m = 3\n", + " mem_m = 4*(mem_smoother_m + mem_residual_m)/3\n", + " # print(\"mem_m\",mem_m)\n", + " \n", + " mem = mem_m * m\n", + " \n", + " return flops_setup, mem_setup, flops, mem\n", + " \n", + " \n", + "MOD_PK=1\n", + "nr = [769, 1537, 3073, 6145]\n", + "nt = [1024, 2048, 4096, 8192]\n", + "m_c = [161, 321, 641, 1281]\n", + "it = [[70, 67, 64, 61], [56, 54, 51, 49]] # MOD_PK 1, MOD_PK 2\n", + "for MOD_PK in [1, 2]:\n", + " print(\"MOD_PK: \",MOD_PK)\n", + " print(\"nr,nt,m,m_c,it,flops_setup,mem_setup,flops,mem\")\n", + " for i in range(len(nr)):\n", + " flops_setup, mem_setup, flops, mem = flops_comp(nr[i], nt[i], m_c[i], it[MOD_PK-1][i], MOD_PK)\n", + " print(nr[i],\",\",nt[i],\",\",\"{:.2e}\".format(nr[i]*nt[i]),\",\",m_c[i],\",\",it[MOD_PK-1][i],\",\",\"{:.2e}\".format(flops_setup),\",\",\"{:.2e}\".format(mem_setup),\",\",\"{:.2e}\".format(flops),\",\",\"{:.2e}\".format(mem))\n", + " print(\"\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/plot_tools/lskkr23_plot_weak_scaling.py b/plot_tools/lskkr23_plot_weak_scaling.py new file mode 100644 index 00000000..6c463aa7 --- /dev/null +++ b/plot_tools/lskkr23_plot_weak_scaling.py @@ -0,0 +1,119 @@ +import pandas as pd +import numpy as np +import sys +# Use backend to not plot on UI +# import matplotlib +# matplotlib.use('Agg') +import matplotlib.pyplot as plt +from os.path import join, exists, dirname +from os import makedirs +import os.path + + +# This file has been written and used to plot the read weak scaling results for +# Leleux, Schwarz, Kühn, Kruse, Rüde - Complexity analysis and scalability of a matrix-free extrapolated geometric multigrid (2023) +# In contrast to the other files with prefix lskkr23_*, it is written quick and dirty and only +# for weak scaling data collected from different strong scaling experiments. + +colors = [ + [1.00, 0.49, 0.06], + [0.17, 0.63, 0.18], + [0.83, 0.15, 0.16], + [0.13, 0.47, 0.69], + [0.58, 0.40, 0.74], + [0.53, 0.35, 0.27], + [0.92, 0.46, 0.77], + [0.50, 0.50, 0.50], + [0.66, 0.85, 0.06], + [0.06, 0.85, 0.85], + [0.85, 0.15, 0.85], + [0.75, 0.75, 0.75], + [0.80, 0.67, 0.00], + [0.30, 0.65, 1.00], + [0.40, 0.07, 0.00], + [1.00, 0.80, 0.87], + [1.00, 0.87, 0.20], + [1.00, 0.80, 0.70], + [0.80, 0.87, 1.00]]; + +def plot_weak_scaling(path_out, fname, df, ylabel, logplot = True, colors=colors): + ''' + Plot different timings or other benchmarks against the number of cores used. + @param path_out Path to output files. + @param fname First part of filename. + @param df Data frame with results per core scaling. + @param ylabel Y-axis label. + ''' + fontsize = 24 + + fig = plt.figure(figsize=(12, 10)) + # fig.subplots_adjust(bottom=0.3) # use for interactive plotting, for savefig, legend+figure is adjusted with bbox_inches='tight' + ax = fig.add_subplot() + start_plot = list(df.columns).index('Total multigrid cycle') + start_optimal_line = df.loc[0,[timer for timer in df.iloc[:,start_plot:].columns if 'Total' in timer]].max() + optimal_line = [start_optimal_line for i in df['Cores'].values] + if logplot: + plt.loglog(df['Cores'], optimal_line, linewidth=2, linestyle='dashed', color='black', label='Optimal') + for i in range(len(df.iloc[:,start_plot:].columns)): + col_label = df.columns[start_plot+i] + col_label = col_label.replace('Computing residual on finest level', 'Computing residual (finest)') + col_label = col_label.replace('Applying prolongation (+ coarse grid correction)', 'Applying prolongation (+ CGC)') + plt.loglog(df['Cores'], df.iloc[:,start_plot+i], linewidth=2, label=col_label, color=colors[i]) # Cores is assumed to be in first column + else: + plt.plot(df['Cores'], optimal_line, linewidth=2, linestyle='dashed', color='black', label='Optimal') + for i in range(len(df.iloc[:,start_plot:].columns)): + col_label = df.columns[start_plot+i] + col_label = col_label.replace('Computing residual on finest level', 'Computing residual (finest)') + col_label = col_label.replace('Applying prolongation (+ coarse grid correction)', 'Applying prolongation (+ CGC)') + plt.plot(df['Cores'], df.iloc[:,start_plot+i], linewidth=2, label=col_label, color=colors[i]) # Cores is assumed to be in first column + + + ax.legend(bbox_to_anchor=(0, 0, 1, -0.1), mode="expand", ncols=2, fontsize=fontsize-6) + # ax.set_title(title, fontsize=fontsize+6) + ax.set_ylabel(ylabel, fontsize=fontsize) + ax.set_xlabel('Number of cores', fontsize=fontsize) + ax.set_xticks(df['Cores']) + ax.set_xticklabels(df['Cores']) + + plt.rcParams['xtick.labelsize']=fontsize-6 + plt.rcParams['ytick.labelsize']=fontsize-6 + + path_out = join(path_out, 'figures') + if not exists(path_out): + makedirs(path_out) + plt.savefig(join(path_out, fname), dpi=300, bbox_inches='tight') + # plt.show() + plt.close() + + +def main(problem=7, divideBy2=[4, 5, 6, 7], mod_pk=1): + + file_prefix = 'caro-paper-weakscaling' + if file_prefix != '': + file_prefix = file_prefix + '-' + file_postfix = 'C1toC64_FLOPSDP' + if file_postfix != '': + file_postfix = '-' + file_postfix + nr_exp = 4 + smoother = 3 + extrapolation = 1 + + nodes = 1 + ranks = 1 + + path_to_perf_files_rel = os.path.join('..', os.path.join('..', 'scaling_output')) # relative from read_output call + path_to_perf_files = os.path.join(os.path.dirname(__file__), os.path.join(path_to_perf_files_rel)) + + filename_input = 'p' + str(problem) + '-r' + str(nr_exp) + '-dbt' + str(divideBy2[0]) + '-' + str(divideBy2[-1]) + '-mpk' + str(mod_pk) + '-s' + str( + smoother) + '-e' + str(extrapolation) + '--N' + str(nodes) + '-R' + str(ranks) + + try: # allow deletion of empty error files + df_weak = pd.read_csv(os.path.join(path_to_perf_files, file_prefix + filename_input + file_postfix + '.csv'), sep=';') + + plot_weak_scaling(path_to_perf_files, file_prefix + filename_input + file_postfix, df_weak.iloc[:, list(df_weak.columns).index('Cores'):], 'Time (sec)') + except FileNotFoundError: + err = 0 # no error file = no error + +if __name__ == "__main__": + # weak scaling + main(problem=7, divideBy2=[4,5,6,7], mod_pk=2) \ No newline at end of file diff --git a/plot_tools/lskkr23_read_output_strong_scaling.py b/plot_tools/lskkr23_read_output_strong_scaling.py new file mode 100644 index 00000000..92ab41d4 --- /dev/null +++ b/plot_tools/lskkr23_read_output_strong_scaling.py @@ -0,0 +1,225 @@ +import pandas as pd +import numpy as np +import os +import sys + +# This file has been written and used to read the results for +# Leleux, Schwarz, Kühn, Kruse, Rüde - Complexity analysis and scalability of a matrix-free extrapolated geometric multigrid (2023) +def main(problem=6, divideBy2=7, mod_pk=1): + + file_prefix = 'caro-paper' + if file_prefix != '': + file_prefix = file_prefix + '-' + file_postfix = '' + if file_postfix != '': + file_postfix = '-' + file_postfix + nr_exp = 4 + smoother = 3 + extrapolation = 1 + + nodes = 1 + ranks = 1 + maxCores = 64# 128 + cores_used = [] # will be filled automatically + + path_to_perf_files_rel = os.path.join('..', os.path.join('..', 'scaling_output')) # relative from read_output call + path_to_perf_files = os.path.join(os.path.dirname(__file__), os.path.join(path_to_perf_files_rel)) + + cols_problem = ['Problem', 'rExp', 'divB2', 'ModPK', 'Extrapolation'] + + cols_cluster = ['Nodes', 'Ranks', 'Cores'] + + cols_convergence = ['its', '2-norm of error', 'inf-norm of error'] + + cols_time = [] + cols_time += ['Total setup', 'Building system matrix A and RHS', 'Factorization of coarse operator Ac', + 'Building intergrid operators (e.g. projections)', 'Building smoothing operators A_sc', + 'Factorizing smoothing operators A_sc'] + cols_time += ['Total multigrid cycle', 'Complete smoothing', 'Computing residual', + 'Applying restriction', 'Solve coarse system', 'Applying prolongation (+ coarse grid correction)'] + cols_time += ['Computing residual on finest level', 'Computing final error', 'Total application of A'] + cols_time += ['Evaluation of arr, art, and att', 'Evaluation of alpha and beta', 'Computing determinant of Jacobian of inverse mapping', + 'Computing exact solution'] + cols_time += ['Total execution time'] + + cols = cols_problem + cols_cluster + cols_convergence + cols_time + cols_dtypes = {col: 'int' for col in cols_problem + cols_cluster} + cols_dtypes.update( + {col: 'double' for col in cols_convergence + cols_time}) + + filename_input = 'p' + str(problem) + '-r' + str(nr_exp) + '-dbt' + str(divideBy2) + '-mpk' + str(mod_pk) + '-s' + str( + smoother) + '-e' + str(extrapolation) + '--N' + str(nodes) + '-R' + str(ranks) + '-maxC' + str(maxCores) + + err = 0 + regions = {} # Attention: We assume all benchmarks in one file to be run with the same regions (output)! + search_terms_likwid = {} + search_terms_likwid['FLOPS_DP'] = 'DP [MFLOP/s]' + search_terms_likwid['MEM_DP'] = 'Memory bandwidth [MBytes/s]' + benchmarks = [] + + try: # allow deletion of empty error files + with open(os.path.join(path_to_perf_files, file_prefix + filename_input + file_postfix + '.err')) as f: + line = f.readline() + while line and err == 0: + line = f.readline() + if ' error' in line: + err = 1 + print('Error in job script.\n') + print(line) + except FileNotFoundError: + err = 0 # no error file = no error + + if err == 0: + with open(os.path.join(path_to_perf_files, file_prefix + filename_input + file_postfix + '.out')) as f: + + lines = f.readlines() + i = 0 + while i < len(lines)-1: + + # search for next program execution + if '--> Iteration' in lines[i]: + while '--> Iteration' in lines[i] and '--> Iteration' in lines[i+1]: + i = i+1 + if i > len(lines)-1: + sys.exit( + 'End of file reached without finding output.') + # iteration scheme ended: + # split on empty spaces and take number of iterations in third place + its = int(lines[i].split()[2][:-1]) + while '-norm' not in lines[i]: + i = i+1 + if i > len(lines)-1: + sys.exit( + 'End of file reached without finding output.') + norm2 = -1 + if '2-norm' in lines[i]: + norm2 = float(lines[i].split()[-1]) + else: + sys.exit('Error. 2-Norm information not available.') + norminf = -1 + if 'inf-norm' in lines[i+1]: + norminf = float(lines[i+1].split()[-1]) + else: + sys.exit('Error. Inf-Norm information not available.') + + while 'Total setup' not in lines[i]: + i = i+1 + if i > len(lines)-1: + sys.exit( + 'End of file reached without finding timing output.') + # global time print out, split line by line of information and add to dataframe + time_dict = dict() + while 'Total execution time' not in lines[i-1]: + timings = lines[i].split(':') # here we split the timers based on empty spaces + # convert timing to floating point + if len(timings) > 1: + time_dict[timings[0].replace('\t', '')] = float(timings[1]) + i = i+1 + if i > len(lines)-1: + sys.exit( + 'End of file reached without finding output.') + + multi_region_run = True + while multi_region_run: # iterate over potentially multiple LIKWID regions + + while 'Group 1: ' not in lines[i]: # iterate until LIKWID output + i = i+1 + if 'Number of OpenMP threads:' in lines[i] or (i == len(lines)-1): + multi_region_run = False + break + elif i > len(lines)-1: + sys.exit( + 'End of file reached without finding output.') + if not multi_region_run: + break + benchmark_line = lines[i].split() + if 'Region' in lines[i]: + rg = benchmark_line[1].replace(',', '') + regions[rg] = 0 + else: + multi_region_run = False + benchmark = benchmark_line[-1] + + # save dataframe for previous benchmark (if there was) and create dataframe for current (new) benchmark + if benchmark not in benchmarks: + if len(benchmarks) > 0: + perf_results_df.to_csv( + os.path.join(path_to_perf_files, + file_prefix + filename_input + file_postfix + '_' + benchmarks[-1] + '.csv'), + index=False) + + benchmarks.append(benchmark) + # create empty data frame from column set + perf_results_df = pd.DataFrame(columns=cols) + # set data types for new data frame as defined above + for k, v in cols_dtypes.items(): + perf_results_df[k] = perf_results_df[k].astype(v) + + # number of LIKWID outputs gives number of used cores + cores = str.count(lines[i+2], 'HWThread') + + cores_used.append(cores) + + search_postfix = '' + if cores > 1: + search_postfix = ' STAT' + + while all([val + search_postfix not in lines[i] for val in search_terms_likwid.values()]): + i = i+1 + if i > len(lines)-1: + sys.exit( + 'End of file reached without finding output.') + + # extract benchmark result + regions[rg] = float(lines[i].split('|')[2].split()[0]) + + # with the number of used cores determined, check if + # line for this result is already present + row_setting_avail = ( + perf_results_df[cols_problem[0]] == problem) & ( + perf_results_df[cols_problem[1]] == nr_exp) & ( + perf_results_df[cols_problem[2]] == divideBy2) & ( + perf_results_df[cols_problem[3]] == mod_pk) & ( + perf_results_df[cols_cluster[0]] == nodes) & ( + perf_results_df[cols_cluster[1]] == ranks) & ( + perf_results_df[cols_cluster[2]] == cores) + + if row_setting_avail.values.sum() <= 1: # add new line + # add to all rows fix identifiers for: + # problem, exponent in r, modified coordinates, extrapolation + # number of nodes and ranks of the job + # as well as variable numbers for other columns + benchmark_unit = search_terms_likwid[benchmarks[-1]].split('[')[1][:-1] + perf_results_df.loc[len(perf_results_df.index), + cols_problem + cols_cluster + + cols_convergence + [reg + ' (' + benchmark_unit + ')' for reg in regions.keys()]] = [ + problem, nr_exp, divideBy2, mod_pk, extrapolation, + nodes, ranks, cores, its, norm2, norminf] + list(regions.values()) + # add timings + for time_col, time_val in time_dict.items(): + perf_results_df.loc[len( + perf_results_df.index)-1, time_col] = time_val + elif row_setting_avail.values.sum() > 1: # error + sys.exit( + 'Error. More than two lines corresponds to criterion.' + '\n\tTwo benchmarks for the exact same setting in output. Please check output file.') + + i += 1 + # end while + + # save file of last benchmark + perf_results_df.to_csv( + os.path.join(path_to_perf_files, + file_prefix + filename_input + file_postfix + '_' + benchmarks[-1] + '.csv'), + index=False) + # close file + + +if __name__ == "__main__": + # strong scaling + main(problem=6, divideBy2=7, mod_pk=1) + main(problem=6, divideBy2=7, mod_pk=2) + main(problem=7, divideBy2=7, mod_pk=1) + main(problem=7, divideBy2=7, mod_pk=2) + # large case strong scaling + main(problem=7, divideBy2=8, mod_pk=1) \ No newline at end of file diff --git a/plot_tools/plot_benchmarks_scaling.py b/plot_tools/plot_benchmarks_scaling.py deleted file mode 100644 index cafcf8a9..00000000 --- a/plot_tools/plot_benchmarks_scaling.py +++ /dev/null @@ -1,118 +0,0 @@ -import pandas as pd -import numpy as np -import sys -# Use backend to not plot on UI -# import matplotlib -# matplotlib.use('Agg') -import matplotlib.pyplot as plt -from os.path import join, exists, dirname -from os import makedirs - -### Plots scaling of FLOPS and Caches (saturation) scaling from 0 to n Cores -### as read from data frame - -colors = [ - [1.00, 0.49, 0.06], - [0.17, 0.63, 0.18], - [0.83, 0.15, 0.16], - [0.13, 0.47, 0.69], - [0.58, 0.40, 0.74], - [0.53, 0.35, 0.27], - [0.92, 0.46, 0.77], - [0.50, 0.50, 0.50], - [0.66, 0.85, 0.06], - [0.06, 0.85, 0.85], - [0.85, 0.15, 0.85], - [0.75, 0.75, 0.75]]; - -def plot_perf_per_core(path_out, fname, df, benchname, saturation_limit=0, colors=colors): - fontsize = 16 - - fig = plt.figure(figsize=(10, 10)) - ax = fig.add_subplot() - plt.plot(df['Cores'], df[benchname[0]]) - - if benchname[0] == 'CACHES': - plt.plot(df['Cores'], saturation_limit * np.ones(len(df['Cores'])), linestyle='dotted', linewidth=3, color=[0, 0, 0]) - ax.text(1, saturation_limit+3, 'Memory bandwith (AXPY) (' + str(saturation_limit) + ' GBytes/s)', fontsize=14) - ax.set_ylim(0, 90) - - - ax.set_title(benchname[1][0], fontsize=fontsize+6) - ax.set_ylabel(benchname[1][0], fontsize=fontsize) - - ax.set_xlabel('Number of cores used', fontsize=fontsize) - - - path_out = join(path_out, 'figures') - if not exists(path_out): - makedirs(path_out) - plt.savefig(join(path_out, fname + '_' + benchname[0].lower()), bbox_inches='tight') - plt.close() - - -def main(): - - problem = 5 - nr_exp = 4 - mod_pk = 1 - smoother = 3 - extrapolation = 1 - - nodes = 1 - ranks = 1 - maxCores = 14 - - ## saturation_limit is node specific and needs to be adapted. - saturation_limit = 80 - - fname = 'p' + str(problem) + '-r' + str(nr_exp) + '-mpk' + str(mod_pk) + '-s' + str( - smoother) + '-e' + str(extrapolation) + '--N' + str(nodes) + '-R' + str(ranks) + '-maxC' + str(maxCores) - path_to_files_rel = '' # relative to plot script - path_to_files = join(dirname(__file__), join(path_to_files_rel)) - - df = pd.read_csv( - join(path_to_files, fname + '_benchmarks.csv'), - dtype={'Problem': int, 'rExp': int, 'ModPK': int, - 'Extrapolation': int, 'Nodes': int, 'Ranks': int, - 'Cores': int, 'its': int}) - - # Likwid benchmark columns, more benchmarks are in timings. - likwid_benchmarks = {'FLOPS_DP': ['Flop performance in Multi-Threading', 'Flops (GFlops/s)'], 'CACHES': [ - 'Memory bandwidth saturation', 'Memory bandwidth (GBytes/s)']} # benchmark : [plot title, plot y-label] - timing_benchmarks = {'Total_execution_time' : ['Total execution time in Multi-Threading', 'Execution time']} - # Problem setting columns - setting_cols = ['Problem', 'rExp', 'ModPK', 'Extrapolation', 'Nodes', 'Ranks'] - - # check content - for bench in likwid_benchmarks.items(): - bench_rows = np.where(df[bench[0]].isnull()!=True)[0] - if len(bench_rows) > 0: - df_subframe = df.iloc[bench_rows].copy() - - # Check that the different number of threads/cores where only conducted - # on one particular setting of the above columns - if np.max(df_subframe.loc[:,setting_cols].nunique()) > 1: - sys.exit('Error in input data, more than one setting found.') - - # TODO or not TODO: If the same run was done multiple times with different LIKWID benchmarks - # it is not clear which line to take. - # This could be extended to take the weighted sum or minimum of all corresponding lines. - # However, these timings should be identical or in the same region... - # Nonetheless, there could be a nicer table format to store the results ;-) - cores_used = df_subframe['Cores'].unique() - if len(cores_used) != len(df_subframe['Cores']): - sys.exit('Error: Multiple times computed with the same number of threads.') - - plot_perf_per_core(path_to_files, fname, df_subframe, bench, saturation_limit=saturation_limit) - - # Plot particular timings from table for first benchmark. Timings should be similar for FLOPS_DP and CACHES. - if bench[0] == list(likwid_benchmarks.keys())[0]: - for timing_benchmark in timing_benchmarks.items(): - plot_perf_per_core(path_to_files, fname, df_subframe, timing_benchmark) - - - -if __name__ == '__main__': - main() - \ No newline at end of file diff --git a/plot_tools/read_output.py b/plot_tools/read_output.py deleted file mode 100644 index 6fbb502a..00000000 --- a/plot_tools/read_output.py +++ /dev/null @@ -1,190 +0,0 @@ -import pandas as pd -import numpy as np -import os -import sys - - -def main(): - - jobid = '1256' # provide correct slurm job id - problem = 5 - nr_exp = 4 - mod_pk = 1 - smoother = 3 - extrapolation = 1 - - nodes = 1 - ranks = 1 - maxCores = 14 - cores_used = [] # will be filled automatically - - path_to_perf_files_rel = '' # relative from read_output call - path_to_perf_files = os.path.join(os.path.dirname(__file__), os.path.join(path_to_perf_files_rel)) - - cols_problem = ['Problem', 'rExp', 'ModPK', 'Extrapolation'] - - cols_cluster = ['Nodes', 'Ranks', 'Cores'] - - cols_convergence = ['its', '2-norm of error', 'inf-norm of error'] - - cols_time = [] - cols_time += ['t_setup', 't_build', 't_facto_Ac', - 't_build_P', 't_build_Asc', 't_facto_Asc'] - cols_time += ['t_total_(fine)', 't_smoothing', 't_residual', - 't_restriction', 't_Ac', 't_prolongation', 't_fine_residual', - 't_error'] - cols_time += ['t_applyA'] - cols_time += ['t_coeff', 't_arr_art_att', 't_sol', 't_detDFinv', 't_trafo'] - cols_time += ['Total_execution_time'] - - cols_benchmark = ['FLOPS_DP', 'CACHES'] - - cols = cols_problem + cols_cluster + cols_convergence + cols_time + cols_benchmark - cols_dtypes = {col: 'int' for col in cols_problem + cols_cluster} - cols_dtypes.update( - {col: 'double' for col in cols_convergence + cols_time + cols_benchmark}) - - filename_input = 'p' + str(problem) + '-r' + str(nr_exp) + '-mpk' + str(mod_pk) + '-s' + str( - smoother) + '-e' + str(extrapolation) + '--N' + str(nodes) + '-R' + str(ranks) + '-maxC' + str(maxCores) - - err = 0 - with open(os.path.join(path_to_perf_files, 'slurm-' + str(jobid) + '-' + filename_input + '.err')) as f: - line = f.readline() - while line and err == 0: - line = f.readline() - if ' error' in line: - err = 1 - print('Error in job script.\n') - print(line) - - if err == 0: - - with open(os.path.join(path_to_perf_files, 'slurm-' + str(jobid) + '-' + filename_input + '.out')) as f: - - # create empty data frame from column set - perf_results_df = pd.DataFrame(columns=cols) - # set data types for new data frame as defined above - for k, v in cols_dtypes.items(): - perf_results_df[k] = perf_results_df[k].astype(v) - - lines = f.readlines() - i = 0 - while i < len(lines)-1: - - # search for next program execution - if '--> Iteration' in lines[i]: - while '--> Iteration' in lines[i] and '--> Iteration' in lines[i+1]: - i = i+1 - if i > len(lines)-1: - sys.exit( - 'End of file reached without finding output.') - # iteration scheme ended: - # split on empty spaces and take number of iterations in third place - its = int(lines[i].split()[2][:-1]) - while '-norm' not in lines[i]: - i = i+1 - if i > len(lines)-1: - sys.exit( - 'End of file reached without finding output.') - norm2 = -1 - if '2-norm' in lines[i]: - norm2 = float(lines[i].split()[-1]) - else: - sys.exit('Error. 2-Norm information not available.') - norminf = -1 - if 'inf-norm' in lines[i+1]: - norminf = float(lines[i+1].split()[-1]) - else: - sys.exit('Error. Inf-Norm information not available.') - - while 't_setup' not in lines[i]: - i = i+1 - if i > len(lines)-1: - sys.exit( - 'End of file reached without finding output.') - # global time print out, split line by line of information and add to dataframe - time_dict = dict() - while 'Total_execution_time' not in lines[i-1]: - timings = lines[i].split() - for j in range(0, len(timings), 2): - # remove last char (':') from string and convert timing to floating point - time_dict[timings[j][:-1] - ] = float(timings[j+1].replace(',', '')) - i = i+1 - if i > len(lines)-1: - sys.exit( - 'End of file reached without finding output.') - - # iterate until LIKWID printout - while 'Group 1: ' not in lines[i]: - i = i+1 - if i > len(lines)-1: - sys.exit( - 'End of file reached without finding output.') - benchmark = lines[i].split()[-1] - # number of LIKWID outputs gives number of used cores - cores = str.count(lines[i+2], 'HWThread') - - cores_used.append(cores) - - search_term_likwid = '' - if benchmark == 'FLOPS_DP': - search_term_likwid = 'DP [MFLOP/s]' - elif benchmark == 'CACHES': - search_term_likwid = 'Memory bandwidth [MBytes/s]' - else: - sys.exit('Error. Benchmark type not supported.') - - if cores > 1: - search_term_likwid = search_term_likwid + ' STAT' - - while search_term_likwid not in lines[i]: - i = i+1 - if i > len(lines)-1: - sys.exit( - 'End of file reached without finding output.') - - benchmark_result = float(lines[i].split('|')[2].split()[0]) - - # with the number of used cores determined, check if - # line for this result is already present - row_setting_avail = ( - perf_results_df[cols_problem[0]] == problem) & ( - perf_results_df[cols_problem[1]] == nr_exp) & ( - perf_results_df[cols_problem[2]] == mod_pk) & ( - perf_results_df[cols_cluster[0]] == nodes) & ( - perf_results_df[cols_cluster[1]] == ranks) & ( - perf_results_df[cols_cluster[2]] == cores) - - if row_setting_avail.values.sum() <= 1: # add new line - # add to all rows fix identifiers for: - # problem, exponent in r, modified coordinates, extrapolation - # number of nodes and ranks of the job - # as well as variable numbers for other columns - perf_results_df.loc[len(perf_results_df.index), - cols_problem + cols_cluster + - cols_convergence + [benchmark]] = [ - problem, nr_exp, mod_pk, extrapolation, - nodes, ranks, cores, its, norm2, norminf, - benchmark_result] - # add timings - for time_col, time_val in time_dict.items(): - perf_results_df.loc[len( - perf_results_df.index)-1, time_col] = time_val - elif row_setting_avail.values.sum() > 1: # error - sys.exit( - 'Error. More than two lines corresponds to criterion.' - ' We can only have one line for each benchmark: FLOPS_DP and CACHES.') - - i += 1 - # end while - - perf_results_df.to_csv( - os.path.join( - path_to_perf_files, filename_input + '_benchmarks.csv'), - index=False) - # close file - - -if __name__ == "__main__": - main() diff --git a/src/create_grid_polar.cpp b/src/create_grid_polar.cpp index 2d348e76..ed497b8d 100644 --- a/src/create_grid_polar.cpp +++ b/src/create_grid_polar.cpp @@ -1,4 +1,4 @@ -/* +/* * Copyright (C) 2019-2023 The GMGPolar Development Team * * Authors: Philippe Leleux, Christina Schwarz, Martin J. Kühn, Carola Kruse, Ulrich Rüde @@ -141,12 +141,29 @@ void level::build_r() // edge int se; - if (gyro::icntl[Param::alpha_coeff] == 0 || gyro::icntl[Param::alpha_coeff] == 2) { - se = floor(nr * 0.66) - n_elems_refined / 2; - } - else { - se = floor(nr * 0.5) - n_elems_refined / 2; + + // allow refining of the grid at r_jump, the center point of the + // drop of the diffusion coefficient alpha. + double r_jump; + if (gyro::icntl[Param::alpha_coeff] == SONNENDRUCKER) { + // The center of the coefficient jump lies at 0.6888697651782026 + // for backward stability with previous runs and the Matlab code, + // we use 0.66 though. + r_jump = 0.66; + } else if (gyro::icntl[Param::alpha_coeff] == ZONI) { + r_jump = 0.4837; + } else if (gyro::icntl[Param::alpha_coeff] == ZONI_SHIFTED) { + // Choose center point of descent. + // a) - ln(0.5 * (alpha(0) - alpha(Rmax))): + // - ln(0.5 * (np.exp(-np.tanh(-14)) - np.exp(-np.tanh(6)))) = 0.16143743821247852 + // b) r_center = Rmax * (np.arctanh(0.16143743821247852) + 14) / 20 = 0.7081431124450334 Rmax + r_jump = 0.7081; + } else if (gyro::icntl[Param::alpha_coeff] == POISSON) { + r_jump = 0.5; // There is no jump for Poisson so this is an arbitrary choice + } else { + throw std::runtime_error("Unknown alpha coeff"); } + se = floor(nr * r_jump) - n_elems_refined / 2; int ee = se + n_elems_refined; // takeout int st = ceil((double)n_elems_refined / 4.0 + 1) - 1; diff --git a/src/gmgpolar.cpp b/src/gmgpolar.cpp index bde3925b..dea7adc1 100644 --- a/src/gmgpolar.cpp +++ b/src/gmgpolar.cpp @@ -67,7 +67,7 @@ void gmgpolar::reset_timers() t_build_P = 0; t_build_Asc = 0; t_facto_Asc = 0; - t_total = 0; + t_total_mgcycle = 0; t_smoothing = 0; t_residual = 0; t_restriction = 0; diff --git a/src/gyro.cpp b/src/gyro.cpp index 23bfdb57..d70ec556 100644 --- a/src/gyro.cpp +++ b/src/gyro.cpp @@ -362,11 +362,8 @@ std::vector gyro::def_solution_rt(double r_i, std::vector theta, } /* ----- end of gyro::eval_def_solution_vec ----- */ /*************************************************************************** - * Diffusivity and operator - **************************************************************************/ -/************************ - * Single - ************************/ +* Diffusivity and operator +**************************************************************************/ /*! * \brief Diffusivity coefficient * @@ -380,7 +377,7 @@ std::vector gyro::def_solution_rt(double r_i, std::vector theta, * \return the coefficient * */ -double gyro::coeff(double r, int verbose) +double gyro::coeff_alpha(double r, int verbose) { double t; TIC; @@ -398,11 +395,7 @@ double gyro::coeff(double r, int verbose) } /* ----- end of level::coeff ----- */ /*! - * \brief Diffusivity coefficient - * - * Computes the diffusivity coefficient depending on the radius r - * - for prob 3 or 5: alpha(r)=(2.0/(2.6+3.14)) * (1.3 + atan((1-1.3*r/Rmax) / 0.09)) - * - else: 1 + * \brief Beta coefficient * * \param r: the r coordinate * \param verbose: verbose level for debug @@ -440,7 +433,7 @@ double gyro::coeff_beta(double r, int verbose) * \return the coefficient * */ -std::vector gyro::coeff(std::vector r, int verbose) +std::vector gyro::coeff_alpha(std::vector r, int verbose) { double t; TIC; @@ -460,11 +453,7 @@ std::vector gyro::coeff(std::vector r, int verbose) } /* ----- end of level::coeff ----- */ /*! - * \brief Diffusivity coefficient - * - * Computes the diffusivity coefficient depending on the radius r - * - for prob 3 or 5: alpha(r)=(2.0/(2.6+3.14)) * (1.3 + atan((1-1.3*r/Rmax) / 0.09)) - * - else: 1 + * \brief Beta coefficient * * \param r: the r coordinate * \param verbose: verbose level for debug @@ -529,6 +518,7 @@ double gyro::detDFinv(double r, double theta, int verbose) } return detDFinv_r; } /* ----- end of level::detDFinv ----- */ + double gyro::arr(double r, double theta, int verbose) { double t; @@ -539,7 +529,7 @@ double gyro::arr(double r, double theta, int verbose) double Rmax = dcntl[Param::R]; double detDFinv_r = detDFinv(r, theta, verbose); - double coeff_r = coeff(r, verbose); + double coeff_r = coeff_alpha(r, verbose); double Jrt = functions->J_rt(r, theta, kappa_eps, delta_e, Rmax); double Jtt = functions->J_tt(r, theta, kappa_eps, delta_e, Rmax); double arr_r = 0.5 * (Jtt * Jtt + Jrt * Jrt) * coeff_r / fabs(detDFinv_r); @@ -551,6 +541,7 @@ double gyro::arr(double r, double theta, int verbose) } return arr_r; } /* ----- end of level::arr ----- */ + double gyro::art(double r, double theta, int verbose) { double t; @@ -561,7 +552,7 @@ double gyro::art(double r, double theta, int verbose) double Rmax = dcntl[Param::R]; double detDFinv_r = detDFinv(r, theta, verbose); - double coeff_r = coeff(r, verbose); + double coeff_r = coeff_alpha(r, verbose); double Jrr = functions->J_rr(r, theta, kappa_eps, delta_e, Rmax); double Jrt = functions->J_rt(r, theta, kappa_eps, delta_e, Rmax); double Jtr = functions->J_tr(r, theta, kappa_eps, delta_e, Rmax); @@ -575,6 +566,7 @@ double gyro::art(double r, double theta, int verbose) } return art_r; } /* ----- end of level::art ----- */ + double gyro::att(double r, double theta, int verbose) { double t; @@ -585,7 +577,7 @@ double gyro::att(double r, double theta, int verbose) double Rmax = dcntl[Param::R]; double detDFinv_r = detDFinv(r, theta, verbose); - double coeff_r = coeff(r, verbose); + double coeff_r = coeff_alpha(r, verbose); double Jrr = functions->J_rr(r, theta, kappa_eps, delta_e, Rmax); double Jtr = functions->J_tr(r, theta, kappa_eps, delta_e, Rmax); double att_r = 0.5 * (Jtr * Jtr + Jrr * Jrr) * coeff_r / fabs(detDFinv_r); @@ -597,6 +589,7 @@ double gyro::att(double r, double theta, int verbose) } return att_r; } /* ----- end of level::att ----- */ + void gyro::arr_att_art(double r, double theta, double& arr, double& att, double& art, int verbose) { double t; @@ -607,7 +600,7 @@ void gyro::arr_att_art(double r, double theta, double& arr, double& att, double& double Rmax = dcntl[Param::R]; double detDFinv_r = detDFinv(r, theta, verbose); - double coeff_r = coeff(r, verbose); + double coeff_r = coeff_alpha(r, verbose); double Jrr = functions->J_rr(r, theta, kappa_eps, delta_e, Rmax); double Jrt = functions->J_rt(r, theta, kappa_eps, delta_e, Rmax); double Jtr = functions->J_tr(r, theta, kappa_eps, delta_e, Rmax); @@ -741,7 +734,7 @@ std::vector gyro::arr(double r, std::vector theta, std::vector Jtt(ntheta); functions->J_rt(r, theta, kappa_eps, delta_e, Rmax, Jrt, sin_theta, cos_theta); functions->J_tt(r, theta, kappa_eps, delta_e, Rmax, Jtt, sin_theta, cos_theta); - double coeff_r = coeff(r, verbose); + double coeff_r = coeff_alpha(r, verbose); ; for (int i = 0; i < ntheta; i++) { arr_r[i] = 0.5 * (Jtt[i] * Jtt[i] + Jrt[i] * Jrt[i]) * coeff_r / fabs(detDFinv_r[i]); @@ -777,7 +770,7 @@ std::vector gyro::art(double r, std::vector theta, std::vectorJ_rt(r, theta, kappa_eps, delta_e, Rmax, Jrt, sin_theta, cos_theta); functions->J_tr(r, theta, kappa_eps, delta_e, Rmax, Jtr, sin_theta, cos_theta); functions->J_tt(r, theta, kappa_eps, delta_e, Rmax, Jtt, sin_theta, cos_theta); - double coeff_r = coeff(r, verbose); + double coeff_r = coeff_alpha(r, verbose); ; for (int i = 0; i < ntheta; i++) { art_r[i] = -0.25 * (Jtt[i] * Jtr[i] + Jrt[i] * Jrr[i]) * coeff_r / fabs(detDFinv_r[i]); @@ -811,7 +804,7 @@ std::vector gyro::att(double r, std::vector theta, std::vector Jtt(ntheta); functions->J_rr(r, theta, kappa_eps, delta_e, Rmax, Jrr, sin_theta, cos_theta); functions->J_tr(r, theta, kappa_eps, delta_e, Rmax, Jtr, sin_theta, cos_theta); - double coeff_r = coeff(r, verbose); + double coeff_r = coeff_alpha(r, verbose); ; for (int i = 0; i < ntheta; i++) { att_r[i] = 0.5 * (Jtr[i] * Jtr[i] + Jrr[i] * Jrr[i]) * coeff_r / fabs(detDFinv_r[i]); @@ -850,7 +843,7 @@ void gyro::arr_att_art(double r, std::vector theta, std::vector functions->J_rt(r, theta, kappa_eps, delta_e, Rmax, Jrt, sin_theta, cos_theta); functions->J_tr(r, theta, kappa_eps, delta_e, Rmax, Jtr, sin_theta, cos_theta); functions->J_tt(r, theta, kappa_eps, delta_e, Rmax, Jtt, sin_theta, cos_theta); - double coeff_r = coeff(r, verbose); + double coeff_r = coeff_alpha(r, verbose); for (int i = 0; i < ntheta; i++) { double coeff = coeff_r / fabs(detDFinv_r[i]); arr[i] = 0.5 * (Jtt[i] * Jtt[i] + Jrt[i] * Jrt[i]) * coeff; @@ -888,7 +881,7 @@ void gyro::arr_att_art(std::vector r, double theta, std::vector& std::vector Jrt(size); std::vector Jtr(size); std::vector Jtt(size); - std::vector coeff_r = coeff(r, verbose); + std::vector coeff_r = coeff_alpha(r, verbose); functions->J_rr(r, theta, kappa_eps, delta_e, Rmax, Jrr); functions->J_rt(r, theta, kappa_eps, delta_e, Rmax, Jrt); functions->J_tr(r, theta, kappa_eps, delta_e, Rmax, Jtr); diff --git a/src/main.cpp b/src/main.cpp index 3b1c8b9e..5e18b313 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -53,6 +53,10 @@ */ int main(int argc, char* argv[]) { +#ifdef GMGPOLAR_USE_LIKWID + LIKWID_MARKER_INIT; +#endif + int error = 0; //////////////////////////////////////////////////////////////////////////////// @@ -227,7 +231,7 @@ int main(int argc, char* argv[]) error = 1; } // if (gyro::icntl[Param::verbose] > 0) - std::cout << "Total_execution_time: " << TOC + std::cout << "Total execution time: " << TOC << "\n---------------------------------------------------------------------------\n"; } @@ -287,9 +291,20 @@ int main(int argc, char* argv[]) TIC; gmgpolar gmg; try { +#ifdef GMGPOLAR_USE_LIKWID +#pragma omp parallel +{ + LIKWID_MARKER_START("create_grid_polar"); +} +#endif // Create polar grids gmg.create_grid_polar(); - +#ifdef GMGPOLAR_USE_LIKWID +#pragma omp parallel +{ + LIKWID_MARKER_STOP("create_grid_polar"); +} +#endif // Solve using multigrid gmg.polar_multigrid(); } @@ -305,7 +320,7 @@ int main(int argc, char* argv[]) error = 1; } if (gyro::icntl[Param::verbose] > 0) - std::cout << "Total_execution_time: " << TOC << "\n"; + std::cout << "Total execution time: " << TOC << "\n"; std::cout << "\n\n\n\n\n"; @@ -375,7 +390,7 @@ int main(int argc, char* argv[]) error = 1; } if (gyro::icntl[Param::verbose] > 0) - std::cout << "Total_execution_time: " << TOC << "\n"; + std::cout << "Total execution time: " << TOC << "\n"; } // } } @@ -454,13 +469,17 @@ int main(int argc, char* argv[]) error = 1; } if (gyro::icntl[Param::verbose] > 0) - std::cout << "Total_execution_time: " << TOC << "\n"; + std::cout << "Total execution time: " << TOC << "\n"; } } } } } } - + +#ifdef GMGPOLAR_USE_LIKWID + LIKWID_MARKER_CLOSE; +#endif + return error; } /* ----- end of main ----- */ diff --git a/src/multigrid_iter.cpp b/src/multigrid_iter.cpp index b33b39a5..4e5b960e 100644 --- a/src/multigrid_iter.cpp +++ b/src/multigrid_iter.cpp @@ -104,6 +104,12 @@ void gmgpolar::multigrid_iter() t_fine_residual += TOC; TIC; +#ifdef GMGPOLAR_USE_LIKWID +#pragma omp parallel +{ + LIKWID_MARKER_START("Iteration"); +} +#endif //! Start the Multigrid-Iteration while (it < gyro::icntl[Param::maxiter] && convergence_criterium > rel_red_conv) { it++; @@ -187,7 +193,13 @@ void gmgpolar::multigrid_iter() } t_fine_residual += TOC; TIC; - } + } +#ifdef GMGPOLAR_USE_LIKWID +#pragma omp parallel +{ + LIKWID_MARKER_STOP("Iteration"); +} +#endif if (gyro::icntl[Param::verbose] > 0) { if (it == gyro::icntl[Param::maxiter]) { std::cout << "Multigrid reached maxiter=" << gyro::icntl[Param::maxiter] << "\n"; @@ -295,7 +307,7 @@ void gmgpolar::multigrid_cycle_extrapol(int l) v_level[l]->multigrid_smoothing0(smoother); if (gyro::icntl[Param::verbose] > 4) - std::cout << "SMOOTHER: " << smoother << " = " << TOC << "\n"; + std::cout << "Finishing running of pre-smoother: " << smoother << " (" << TOC << " s)\n"; } } else { @@ -320,7 +332,7 @@ void gmgpolar::multigrid_cycle_extrapol(int l) v_level[l]->dep_Asc_ortho[smoother]); if (gyro::icntl[Param::verbose] > 4) - std::cout << "SMOOTHER: " << smoother << " = " << TOC << "\n"; + std::cout << "Finishing running of pre-smoother: " << smoother << " (" << TOC << " s)\n"; } } } // omp single @@ -341,21 +353,25 @@ void gmgpolar::multigrid_cycle_extrapol(int l) } //std::cout << "pre-smoothing done \n"; - if (gyro::icntl[Param::verbose] > 5) - gyro::disp(v_level[l]->u, "u"); - t = t_smoothing_tmp; t_smoothing += TOC; + + if (gyro::icntl[Param::verbose] > 5){ // no timing of large output + gyro::disp(v_level[l]->u, "u"); + } + TIC; //! compute residual (of level l) //even if we have extrapolation, compute just normal residual (extrapolation-restriction follows in the next step) gmgpolar::compute_residual(l, 0); - if (gyro::icntl[Param::verbose] > 5) + t_residual += TOC; + + if (gyro::icntl[Param::verbose] > 5){ // no timing of large output gyro::disp(v_level[l]->res, "res"); + } - t_residual += TOC; TIC; //! Restriction of residual (coarsening) @@ -411,9 +427,14 @@ void gmgpolar::multigrid_cycle_extrapol(int l) if (gyro::icntl[Param::matrix_free] == 1) { if (gyro::icntl[Param::optimized] == 0) + { v_level[l + 1]->apply_A0(u_coarse, Au_coarse); - else + }else{ + double start = omp_get_wtime(); v_level[l + 1]->apply_A(u_coarse, Au_coarse); + double end = omp_get_wtime(); + t_applyA += (end - start); + } } else { if (gyro::icntl[Param::openmp] == 1) { @@ -471,12 +492,13 @@ void gmgpolar::multigrid_cycle_extrapol(int l) t_restriction += TOC; TIC; - //! iterative call of multigrid_cycle_extrapol - v_level[l + 1]->u.assign(v_level[l + 1]->m, 0); //zero u in every iteration + //! recursive call of multigrid_cycle_extrapol + v_level[l + 1]->u.assign(v_level[l + 1]->m, 0); // zero u in every iteration std::vector error_coarse; if (l == levels - 2) { -//solve exactly on the coarsest level for the error (A * error = res) (use whole A from coarsest level) -// check for the second coarsest level (levels-2), because we have no smoothing on the coarsest level (and thus no Asc and no Asc_ortho) + // exact solve on the coarsest level for the error (A * error = res) (use whole A from coarsest level) + // check for the second to coarsest level (levels-2), as no smoothing on the coarsest level exists + // (and thus no Asc and no Asc_ortho) #ifdef GMGPOLAR_USE_MUMPS if (gyro::icntl[Param::optimized] == 0) { #endif @@ -492,7 +514,7 @@ void gmgpolar::multigrid_cycle_extrapol(int l) } else { multigrid_cycle_extrapol(l + 1); - error_coarse = v_level[l + 1]->u; //the coarse_error on level l is u on level l+1 + error_coarse = v_level[l + 1]->u; // the coarse_error on level l is u on level l+1 TIC; } @@ -588,7 +610,7 @@ void gmgpolar::multigrid_cycle_extrapol(int l) v_level[l]->multigrid_smoothing0(smoother); if (gyro::icntl[Param::verbose] > 4) - std::cout << "SMOOTHER: " << smoother << " = " << TOC << "\n"; + std::cout << "Finishing running of post-smoother: " << smoother << " (" << TOC << " s)\n"; } } else { @@ -613,7 +635,7 @@ void gmgpolar::multigrid_cycle_extrapol(int l) v_level[l]->dep_Asc_ortho[smoother]); if (gyro::icntl[Param::verbose] > 4) - std::cout << "SMOOTHER: " << smoother << " = " << TOC << "\n"; + std::cout << "Finishing running of post-smoother: " << smoother << " ( " << TOC << " s)\n"; } } } // omp single @@ -626,7 +648,7 @@ void gmgpolar::multigrid_cycle_extrapol(int l) TIC; t = t_total_tmp; - t_total += TOC; + t_total_mgcycle += TOC; if (gyro::icntl[Param::verbose] > 5) gyro::disp(v_level[l]->u, "u"); @@ -653,7 +675,6 @@ void gmgpolar::compute_residual(int l, int extrapol) else { double start = omp_get_wtime(); v_level[l]->apply_A(v_level[l]->u, Au); - double end = omp_get_wtime(); t_applyA += (end - start); } @@ -725,10 +746,15 @@ void gmgpolar::compute_residual(int l, int extrapol) //apply A(l+1) to Pu std::vector APu(v_level[l + 1]->m, 0); if (gyro::icntl[Param::matrix_free] == 1) { - if (gyro::icntl[Param::optimized] == 0) + if (gyro::icntl[Param::optimized] == 0) { v_level[l + 1]->apply_A0(Pu, APu); //APu = A(l+1) * Pu - else + } + else { + double start = omp_get_wtime(); v_level[l + 1]->apply_A(Pu, APu); //APu = A(l+1) * Pu + double end = omp_get_wtime(); + t_applyA += (end - start); + } } else { if (gyro::icntl[Param::openmp] == 1) { diff --git a/src/polar_multigrid.cpp b/src/polar_multigrid.cpp index 8c79e02f..ab0f5add 100644 --- a/src/polar_multigrid.cpp +++ b/src/polar_multigrid.cpp @@ -131,10 +131,23 @@ void gmgpolar::polar_multigrid() std::cout << "t_applyA: " << t_applyA << std::endl; } else { - if (gyro::icntl[Param::verbose] > 3) + if (gyro::icntl[Param::verbose] > 3){ std::cout << "Building discretized system, restriction and interpolation operators, and defining splittings...\n"; + } +#ifdef GMGPOLAR_USE_LIKWID +#pragma omp parallel +{ + LIKWID_MARKER_START("Setup"); +} +#endif prepare_op_levels(); +#ifdef GMGPOLAR_USE_LIKWID +#pragma omp parallel +{ + LIKWID_MARKER_STOP("Setup"); +} +#endif int m = v_level[0]->m; @@ -171,34 +184,41 @@ void gmgpolar::polar_multigrid() if (gyro::icntl[Param::verbose] > 1) { for (int l = 0; l < levels; l++) { std::cout << "LEVEL " << l << "\n"; - std::cout << "\nt_smoothing: " << v_level[l]->t_smoothing << ", t_f_sc: " << v_level[l]->t_f_sc + std::cout << "\tSmoothing: " << v_level[l]->t_smoothing << ", t_f_sc: " << v_level[l]->t_f_sc << ", t_Asc_ortho: " << v_level[l]->t_Asc_ortho << ", t_Asc: " << v_level[l]->t_Asc << "\n"; - std::cout << "\nt_get_ptr: " << v_level[l]->t_get_ptr + std::cout << "\tt_get_ptr: " << v_level[l]->t_get_ptr << ", t_get_stencil: " << v_level[l]->t_get_stencil << ", t_get_smoother: " << v_level[l]->t_get_smoother - << ", t_get_row: " << v_level[l]->t_get_row << "\n"; + << ", t_get_row: " << v_level[l]->t_get_row; std::cout << "\n"; } } if (gyro::icntl[Param::verbose] > 1) { - std::cout << "\nt_setup: " << t_setup << ", t_build: " << t_build << ", t_facto_Ac: " << t_facto_Ac - << ", t_build_P: " << t_build_P << ", t_build_Asc: " << t_build_Asc - << ", t_facto_Asc: " << t_facto_Asc << "\n"; - std::cout << "t_total_(fine): " << t_total << ", t_smoothing: " << t_smoothing - << ", t_residual: " << t_residual << ", t_restriction: " << t_restriction - << ", t_Ac: " << t_Ac << ", t_prolongation: " << t_prolongation - << ", t_fine_residual: " << t_fine_residual << ", t_error: " << t_error << "\n"; - std::cout << "t_applyA: " << t_applyA << std::endl; + std::cout << "\nTotal setup: " << t_setup << "\n\tBuilding system matrix A and RHS: " << t_build + << "\n\tFactorization of coarse operator Ac: " << t_facto_Ac << "\n\tBuilding intergrid operators (e.g. projections): " << t_build_P + << "\n\tBuilding smoothing operators A_sc: " << t_build_Asc << "\n\tFactorizing smoothing operators A_sc: " << t_facto_Asc << "\n"; + std::cout << "Total multigrid cycle: " << t_total_mgcycle << "\n\tComplete smoothing: " << t_smoothing + << "\n\tComputing residual: " << t_residual << "\n\tApplying restriction: " << t_restriction + << "\n\tSolve coarse system: " << t_Ac << "\n\tApplying prolongation (+ coarse grid correction): " << t_prolongation + << "\nComputing residual on finest level: " << t_fine_residual; + if (gyro::icntl[Param::check_error] == 1) { + std::cout << "\nComputing final error: " << t_error; + } + std::cout << "\nTotal application of A: " << t_applyA; + std::cout << "\n"; } if (gyro::icntl[Param::verbose] > 1) { - std::cout << "\nt_coeff: " << gyro::dcntl[Param::t_coeff] - << ", t_arr_art_att: " << gyro::dcntl[Param::t_arr_art_att] - << ", t_sol: " << gyro::dcntl[Param::t_sol] - << ", t_detDFinv: " << gyro::dcntl[Param::t_detDFinv] - << ", t_trafo: " << gyro::dcntl[Param::t_trafo] << "\n"; + std::cout << "\nEvaluation of arr, art, and att: " << gyro::dcntl[Param::t_arr_art_att] + << "\n\tEvaluation of alpha and beta: " << gyro::dcntl[Param::t_coeff] + << "\n\tComputing determinant of Jacobian of inverse mapping: " << gyro::dcntl[Param::t_detDFinv]; + if (gyro::icntl[Param::check_error] == 1) { + std::cout << "\nComputing exact solution: " << gyro::dcntl[Param::t_sol]; + } + + std::cout << "\n"; } } } diff --git a/src/test_cases/CartesianR2GyroZoniShiftedCircular.cpp b/src/test_cases/CartesianR2GyroZoniShiftedCircular.cpp index 12d4525d..92a37a45 100644 --- a/src/test_cases/CartesianR2GyroZoniShiftedCircular.cpp +++ b/src/test_cases/CartesianR2GyroZoniShiftedCircular.cpp @@ -257,12 +257,12 @@ void CartesianR2GyroZoniShiftedCircular::rho_pole(double r, std::vector } /*........................................*/ double CartesianR2GyroZoniShiftedCircular::coeffs1(double r, double Rmax) const -{ +{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249 return exp(-tanh(20.0 * (r/Rmax) - 14.0)); } /*........................................*/ void CartesianR2GyroZoniShiftedCircular::coeffs1(std::vector const& r, double Rmax, std::vector& sol) const -{ +{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249 for (std::size_t i=0; i < sol.size(); ++i) { sol[i] = exp(-tanh(20.0 * (r[i]/Rmax) - 14.0)); diff --git a/src/test_cases/CartesianR2GyroZoniShiftedShafranov.cpp b/src/test_cases/CartesianR2GyroZoniShiftedShafranov.cpp index e0069469..8d992420 100644 --- a/src/test_cases/CartesianR2GyroZoniShiftedShafranov.cpp +++ b/src/test_cases/CartesianR2GyroZoniShiftedShafranov.cpp @@ -257,12 +257,12 @@ void CartesianR2GyroZoniShiftedShafranov::rho_pole(double r, std::vector } /*........................................*/ double CartesianR2GyroZoniShiftedShafranov::coeffs1(double r, double Rmax) const -{ +{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249 return exp(-tanh(20.0 * (r/Rmax) - 14.0)); } /*........................................*/ void CartesianR2GyroZoniShiftedShafranov::coeffs1(std::vector const& r, double Rmax, std::vector& sol) const -{ +{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249 for (std::size_t i=0; i < sol.size(); ++i) { sol[i] = exp(-tanh(20.0 * (r[i]/Rmax) - 14.0)); diff --git a/src/test_cases/CartesianR2GyroZoniShiftedTriangular.cpp b/src/test_cases/CartesianR2GyroZoniShiftedTriangular.cpp index 863db829..b37c88e1 100644 --- a/src/test_cases/CartesianR2GyroZoniShiftedTriangular.cpp +++ b/src/test_cases/CartesianR2GyroZoniShiftedTriangular.cpp @@ -257,12 +257,12 @@ void CartesianR2GyroZoniShiftedTriangular::rho_pole(double r, std::vector const& r, double Rmax, std::vector& sol) const -{ +{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249 for (std::size_t i=0; i < sol.size(); ++i) { sol[i] = exp(-tanh(20.0 * (r[i]/Rmax) - 14.0)); diff --git a/src/test_cases/CartesianR2ZoniShiftedCircular.cpp b/src/test_cases/CartesianR2ZoniShiftedCircular.cpp index 90b7a50c..7c7c31d9 100644 --- a/src/test_cases/CartesianR2ZoniShiftedCircular.cpp +++ b/src/test_cases/CartesianR2ZoniShiftedCircular.cpp @@ -257,12 +257,12 @@ void CartesianR2ZoniShiftedCircular::rho_pole(double r, std::vector cons } /*........................................*/ double CartesianR2ZoniShiftedCircular::coeffs1(double r, double Rmax) const -{ +{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249 return exp(-tanh(20.0 * (r/Rmax) - 14.0)); } /*........................................*/ void CartesianR2ZoniShiftedCircular::coeffs1(std::vector const& r, double Rmax, std::vector& sol) const -{ +{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249 for (std::size_t i=0; i < sol.size(); ++i) { sol[i] = exp(-tanh(20.0 * (r[i]/Rmax) - 14.0)); diff --git a/src/test_cases/CartesianR2ZoniShiftedShafranov.cpp b/src/test_cases/CartesianR2ZoniShiftedShafranov.cpp index dd21e61f..6af2ef5d 100644 --- a/src/test_cases/CartesianR2ZoniShiftedShafranov.cpp +++ b/src/test_cases/CartesianR2ZoniShiftedShafranov.cpp @@ -257,12 +257,12 @@ void CartesianR2ZoniShiftedShafranov::rho_pole(double r, std::vector con } /*........................................*/ double CartesianR2ZoniShiftedShafranov::coeffs1(double r, double Rmax) const -{ +{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249 return exp(-tanh(20.0 * (r/Rmax) - 14.0)); } /*........................................*/ void CartesianR2ZoniShiftedShafranov::coeffs1(std::vector const& r, double Rmax, std::vector& sol) const -{ +{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249 for (std::size_t i=0; i < sol.size(); ++i) { sol[i] = exp(-tanh(20.0 * (r[i]/Rmax) - 14.0)); diff --git a/src/test_cases/CartesianR2ZoniShiftedTriangular.cpp b/src/test_cases/CartesianR2ZoniShiftedTriangular.cpp index 6c78e82e..21f2d195 100644 --- a/src/test_cases/CartesianR2ZoniShiftedTriangular.cpp +++ b/src/test_cases/CartesianR2ZoniShiftedTriangular.cpp @@ -257,12 +257,12 @@ void CartesianR2ZoniShiftedTriangular::rho_pole(double r, std::vector co } /*........................................*/ double CartesianR2ZoniShiftedTriangular::coeffs1(double r, double Rmax) const -{ +{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249 return exp(-tanh(20.0 * (r/Rmax) - 14.0)); } /*........................................*/ void CartesianR2ZoniShiftedTriangular::coeffs1(std::vector const& r, double Rmax, std::vector& sol) const -{ +{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249 for (std::size_t i=0; i < sol.size(); ++i) { sol[i] = exp(-tanh(20.0 * (r[i]/Rmax) - 14.0)); diff --git a/src/test_cases/CartesianR6GyroZoniShiftedShafranov.cpp b/src/test_cases/CartesianR6GyroZoniShiftedShafranov.cpp index 59c024e1..9e9c4700 100644 --- a/src/test_cases/CartesianR6GyroZoniShiftedShafranov.cpp +++ b/src/test_cases/CartesianR6GyroZoniShiftedShafranov.cpp @@ -258,12 +258,12 @@ void CartesianR6GyroZoniShiftedShafranov::rho_pole(double r, std::vector } /*........................................*/ double CartesianR6GyroZoniShiftedShafranov::coeffs1(double r, double Rmax) const -{ +{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249 return exp(-tanh(20.0 * (r/Rmax) - 14.0)); } /*........................................*/ void CartesianR6GyroZoniShiftedShafranov::coeffs1(std::vector const& r, double Rmax, std::vector& sol) const -{ +{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249 for (std::size_t i=0; i < sol.size(); ++i) { sol[i] = exp(-tanh(20.0 * (r[i]/Rmax) - 14.0)); diff --git a/src/test_cases/CartesianR6ZoniShiftedCircular.cpp b/src/test_cases/CartesianR6ZoniShiftedCircular.cpp index ff90dfd1..0ce60df7 100644 --- a/src/test_cases/CartesianR6ZoniShiftedCircular.cpp +++ b/src/test_cases/CartesianR6ZoniShiftedCircular.cpp @@ -258,12 +258,12 @@ void CartesianR6ZoniShiftedCircular::rho_pole(double r, std::vector cons } /*........................................*/ double CartesianR6ZoniShiftedCircular::coeffs1(double r, double Rmax) const -{ +{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249 return exp(-tanh(20.0 * (r/Rmax) - 14.0)); } /*........................................*/ void CartesianR6ZoniShiftedCircular::coeffs1(std::vector const& r, double Rmax, std::vector& sol) const -{ +{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249 for (std::size_t i=0; i < sol.size(); ++i) { sol[i] = exp(-tanh(20.0 * (r[i]/Rmax) - 14.0)); diff --git a/src/test_cases/CartesianR6ZoniShiftedShafranov.cpp b/src/test_cases/CartesianR6ZoniShiftedShafranov.cpp index 390a3d20..c225079d 100644 --- a/src/test_cases/CartesianR6ZoniShiftedShafranov.cpp +++ b/src/test_cases/CartesianR6ZoniShiftedShafranov.cpp @@ -258,12 +258,12 @@ void CartesianR6ZoniShiftedShafranov::rho_pole(double r, std::vector con } /*........................................*/ double CartesianR6ZoniShiftedShafranov::coeffs1(double r, double Rmax) const -{ +{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249 return exp(-tanh(20.0 * (r/Rmax) - 14.0)); } /*........................................*/ void CartesianR6ZoniShiftedShafranov::coeffs1(std::vector const& r, double Rmax, std::vector& sol) const -{ +{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249 for (std::size_t i=0; i < sol.size(); ++i) { sol[i] = exp(-tanh(20.0 * (r[i]/Rmax) - 14.0)); diff --git a/src/test_cases/CartesianR6ZoniShiftedTriangular.cpp b/src/test_cases/CartesianR6ZoniShiftedTriangular.cpp index f6eb3ed4..7a8098e5 100644 --- a/src/test_cases/CartesianR6ZoniShiftedTriangular.cpp +++ b/src/test_cases/CartesianR6ZoniShiftedTriangular.cpp @@ -258,12 +258,12 @@ void CartesianR6ZoniShiftedTriangular::rho_pole(double r, std::vector co } /*........................................*/ double CartesianR6ZoniShiftedTriangular::coeffs1(double r, double Rmax) const -{ +{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249 return exp(-tanh(20.0 * (r/Rmax) - 14.0)); } /*........................................*/ void CartesianR6ZoniShiftedTriangular::coeffs1(std::vector const& r, double Rmax, std::vector& sol) const -{ +{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249 for (std::size_t i=0; i < sol.size(); ++i) { sol[i] = exp(-tanh(20.0 * (r[i]/Rmax) - 14.0)); diff --git a/src/test_cases/PolarR6GyroZoniShiftedCircular.cpp b/src/test_cases/PolarR6GyroZoniShiftedCircular.cpp index 858a7f5e..fc0e8a60 100644 --- a/src/test_cases/PolarR6GyroZoniShiftedCircular.cpp +++ b/src/test_cases/PolarR6GyroZoniShiftedCircular.cpp @@ -258,12 +258,12 @@ void PolarR6GyroZoniShiftedCircular::rho_pole(double r, std::vector cons } /*........................................*/ double PolarR6GyroZoniShiftedCircular::coeffs1(double r, double Rmax) const -{ +{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249 return exp(-tanh(20.0 * (r/Rmax) - 14.0)); } /*........................................*/ void PolarR6GyroZoniShiftedCircular::coeffs1(std::vector const& r, double Rmax, std::vector& sol) const -{ +{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249 for (std::size_t i=0; i < sol.size(); ++i) { sol[i] = exp(-tanh(20.0 * (r[i]/Rmax) - 14.0)); diff --git a/src/test_cases/PolarR6GyroZoniShiftedCulham.cpp b/src/test_cases/PolarR6GyroZoniShiftedCulham.cpp index a1b72161..f3743745 100644 --- a/src/test_cases/PolarR6GyroZoniShiftedCulham.cpp +++ b/src/test_cases/PolarR6GyroZoniShiftedCulham.cpp @@ -572,12 +572,12 @@ void PolarR6GyroZoniShiftedCulham::J_yq(double r, std::vector const& the } /*........................................*/ double PolarR6GyroZoniShiftedCulham::coeffs1(double r, double Rmax) const -{ +{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249 return exp(-tanh(20.0 * (r/Rmax) - 14.0)); } /*........................................*/ void PolarR6GyroZoniShiftedCulham::coeffs1(std::vector const& r, double Rmax, std::vector& sol) const -{ +{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249 for (std::size_t i=0; i < sol.size(); ++i) { sol[i] = exp(-tanh(20.0 * (r[i]/Rmax) - 14.0)); diff --git a/src/test_cases/PolarR6GyroZoniShiftedShafranov.cpp b/src/test_cases/PolarR6GyroZoniShiftedShafranov.cpp index 99d7520e..7975c455 100644 --- a/src/test_cases/PolarR6GyroZoniShiftedShafranov.cpp +++ b/src/test_cases/PolarR6GyroZoniShiftedShafranov.cpp @@ -258,12 +258,12 @@ void PolarR6GyroZoniShiftedShafranov::rho_pole(double r, std::vector con } /*........................................*/ double PolarR6GyroZoniShiftedShafranov::coeffs1(double r, double Rmax) const -{ +{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249 return exp(-tanh(20.0 * (r/Rmax) - 14.0)); } /*........................................*/ void PolarR6GyroZoniShiftedShafranov::coeffs1(std::vector const& r, double Rmax, std::vector& sol) const -{ +{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249 for (std::size_t i=0; i < sol.size(); ++i) { sol[i] = exp(-tanh(20.0 * (r[i]/Rmax) - 14.0)); diff --git a/src/test_cases/PolarR6GyroZoniShiftedTriangular.cpp b/src/test_cases/PolarR6GyroZoniShiftedTriangular.cpp index fd0e8603..92ed8b7a 100644 --- a/src/test_cases/PolarR6GyroZoniShiftedTriangular.cpp +++ b/src/test_cases/PolarR6GyroZoniShiftedTriangular.cpp @@ -258,12 +258,12 @@ void PolarR6GyroZoniShiftedTriangular::rho_pole(double r, std::vector co } /*........................................*/ double PolarR6GyroZoniShiftedTriangular::coeffs1(double r, double Rmax) const -{ +{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249 return exp(-tanh(20.0 * (r/Rmax) - 14.0)); } /*........................................*/ void PolarR6GyroZoniShiftedTriangular::coeffs1(std::vector const& r, double Rmax, std::vector& sol) const -{ +{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249 for (std::size_t i=0; i < sol.size(); ++i) { sol[i] = exp(-tanh(20.0 * (r[i]/Rmax) - 14.0)); diff --git a/src/test_cases/PolarR6ZoniShiftedCircular.cpp b/src/test_cases/PolarR6ZoniShiftedCircular.cpp index 3cbf6e23..969760b1 100644 --- a/src/test_cases/PolarR6ZoniShiftedCircular.cpp +++ b/src/test_cases/PolarR6ZoniShiftedCircular.cpp @@ -258,12 +258,12 @@ void PolarR6ZoniShiftedCircular::rho_pole(double r, std::vector const& t } /*........................................*/ double PolarR6ZoniShiftedCircular::coeffs1(double r, double Rmax) const -{ +{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249 return exp(-tanh(20.0 * (r/Rmax) - 14.0)); } /*........................................*/ void PolarR6ZoniShiftedCircular::coeffs1(std::vector const& r, double Rmax, std::vector& sol) const -{ +{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249 for (std::size_t i=0; i < sol.size(); ++i) { sol[i] = exp(-tanh(20.0 * (r[i]/Rmax) - 14.0)); diff --git a/src/test_cases/PolarR6ZoniShiftedShafranov.cpp b/src/test_cases/PolarR6ZoniShiftedShafranov.cpp index 473d174d..45be29e6 100644 --- a/src/test_cases/PolarR6ZoniShiftedShafranov.cpp +++ b/src/test_cases/PolarR6ZoniShiftedShafranov.cpp @@ -258,12 +258,12 @@ void PolarR6ZoniShiftedShafranov::rho_pole(double r, std::vector const& } /*........................................*/ double PolarR6ZoniShiftedShafranov::coeffs1(double r, double Rmax) const -{ +{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249 return exp(-tanh(20.0 * (r/Rmax) - 14.0)); } /*........................................*/ void PolarR6ZoniShiftedShafranov::coeffs1(std::vector const& r, double Rmax, std::vector& sol) const -{ +{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249 for (std::size_t i=0; i < sol.size(); ++i) { sol[i] = exp(-tanh(20.0 * (r[i]/Rmax) - 14.0)); diff --git a/src/test_cases/PolarR6ZoniShiftedTriangular.cpp b/src/test_cases/PolarR6ZoniShiftedTriangular.cpp index 437fc5c7..626d1eb0 100644 --- a/src/test_cases/PolarR6ZoniShiftedTriangular.cpp +++ b/src/test_cases/PolarR6ZoniShiftedTriangular.cpp @@ -258,12 +258,12 @@ void PolarR6ZoniShiftedTriangular::rho_pole(double r, std::vector const& } /*........................................*/ double PolarR6ZoniShiftedTriangular::coeffs1(double r, double Rmax) const -{ +{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249 return exp(-tanh(20.0 * (r/Rmax) - 14.0)); } /*........................................*/ void PolarR6ZoniShiftedTriangular::coeffs1(std::vector const& r, double Rmax, std::vector& sol) const -{ +{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249 for (std::size_t i=0; i < sol.size(); ++i) { sol[i] = exp(-tanh(20.0 * (r[i]/Rmax) - 14.0)); diff --git a/src/test_cases/README.md b/src/test_cases/README.md new file mode 100644 index 00000000..6a9d9e60 --- /dev/null +++ b/src/test_cases/README.md @@ -0,0 +1,16 @@ +The following files define the geometry, coefficients of the ordinary differential equation and the manufactured solution. We always solve + +$-\nabla \cdot \left( \alpha \nabla u \right) + \beta u = f\quad \text{ in }\quad \Omega$ + +The naming is defined in `constants.h` and it follows the following rules: + +- The first part, e.g., `PolarR6` or `CartesianR6`, defines the manufactured solution. +- The **potential** second part `Gyro` or empty, defines if the $\beta$ coefficient is nontrivial or zero. +- The following part, e.g., `Sonnendrucker` or `Poisson`, defines the value of the $\alpha$ coefficient. +- The last part, e.g., `Shafranov` or `Triangular`, defines the geometry to be used + +For the particular solutions, see: +- Bourne et al. - Solver comparison for Poisson-like equations on tokamak geometries (2023) https://doi.org/10.1016/j.jcp.2023.112249 +- Kuehn, Kruse, Ruede - Implicitly extrapolated geometric multigrid on disk-like domains for the gyrokinetic Poisson equation from fusion plasma applications (2022) https://doi.org/10.1007/s10915-022-01802-1 + +A great thanks to Emily Bourne for preparing these scripts! \ No newline at end of file diff --git a/src/test_cases/RefinedGyroZoniShiftedCulham.cpp b/src/test_cases/RefinedGyroZoniShiftedCulham.cpp index b3d73047..7dccf508 100644 --- a/src/test_cases/RefinedGyroZoniShiftedCulham.cpp +++ b/src/test_cases/RefinedGyroZoniShiftedCulham.cpp @@ -571,12 +571,12 @@ void RefinedGyroZoniShiftedCulham::J_yq(double r, std::vector const& the } /*........................................*/ double RefinedGyroZoniShiftedCulham::coeffs1(double r, double Rmax) const -{ +{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249 return exp(-tanh(20.0 * (r/Rmax) - 14.0)); } /*........................................*/ void RefinedGyroZoniShiftedCulham::coeffs1(std::vector const& r, double Rmax, std::vector& sol) const -{ +{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249 for (std::size_t i=0; i < sol.size(); ++i) { sol[i] = exp(-tanh(20.0 * (r[i]/Rmax) - 14.0)); diff --git a/src/test_cases/RefinedGyroZoniShiftedShafranov.cpp b/src/test_cases/RefinedGyroZoniShiftedShafranov.cpp index 25e39975..6966f995 100644 --- a/src/test_cases/RefinedGyroZoniShiftedShafranov.cpp +++ b/src/test_cases/RefinedGyroZoniShiftedShafranov.cpp @@ -257,12 +257,12 @@ void RefinedGyroZoniShiftedShafranov::rho_pole(double r, std::vector con } /*........................................*/ double RefinedGyroZoniShiftedShafranov::coeffs1(double r, double Rmax) const -{ +{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249 return exp(-tanh(20.0 * (r/Rmax) - 14.0)); } /*........................................*/ void RefinedGyroZoniShiftedShafranov::coeffs1(std::vector const& r, double Rmax, std::vector& sol) const -{ +{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249 for (std::size_t i=0; i < sol.size(); ++i) { sol[i] = exp(-tanh(20.0 * (r[i]/Rmax) - 14.0)); diff --git a/src/test_cases/RefinedGyroZoniShiftedTriangular.cpp b/src/test_cases/RefinedGyroZoniShiftedTriangular.cpp index d5c4dc39..0f2d14cf 100644 --- a/src/test_cases/RefinedGyroZoniShiftedTriangular.cpp +++ b/src/test_cases/RefinedGyroZoniShiftedTriangular.cpp @@ -257,12 +257,12 @@ void RefinedGyroZoniShiftedTriangular::rho_pole(double r, std::vector co } /*........................................*/ double RefinedGyroZoniShiftedTriangular::coeffs1(double r, double Rmax) const -{ +{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249 return exp(-tanh(20.0 * (r/Rmax) - 14.0)); } /*........................................*/ void RefinedGyroZoniShiftedTriangular::coeffs1(std::vector const& r, double Rmax, std::vector& sol) const -{ +{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249 for (std::size_t i=0; i < sol.size(); ++i) { sol[i] = exp(-tanh(20.0 * (r[i]/Rmax) - 14.0)); diff --git a/treat_output.py b/treat_output.py index 8da4d0b8..a2f9e1a3 100644 --- a/treat_output.py +++ b/treat_output.py @@ -99,7 +99,7 @@ latex.write(error_norm_inf) latex.write(" & ") - time = line.find('Total_execution_time:') + time = line.find('Total execution time:') if (time != -1): #print(f'{line}') #execution time is found at: time+22, until the end