diff --git a/.gitignore b/.gitignore
new file mode 100644
index 00000000..f946a116
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,273 @@
+# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
+
+# dependencies
+/.pnp
+.pnp.js
+
+# misc
+.DS_Store
+.env.local
+.env.development.local
+.env.test.local
+.env.production.local
+*.xls
+*.xlsx
+*.zip
+*.tar.gz
+*.csv
+*slurm*
+
+# Created by https://www.gitignore.io/api/c++,node,python
+# Edit at https://www.gitignore.io/?templates=c++,node,python
+
+### C++ ###
+# Prerequisites
+*.d
+
+# Compiled Object files
+*.slo
+*.lo
+*.o
+*.obj
+
+# Precompiled Headers
+*.gch
+*.pch
+
+# Compiled Dynamic libraries
+*.so
+*.dylib
+*.dll
+
+# Fortran module files
+*.mod
+*.smod
+
+# Compiled Static libraries
+*.lai
+*.la
+*.a
+*.lib
+
+# Executables
+*.exe
+*.out
+*.app
+
+### Node ###
+# Logs
+logs
+*.log
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+lerna-debug.log*
+
+# Diagnostic reports (https://nodejs.org/api/report.html)
+report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
+
+# Runtime data
+pids
+*.pid
+*.seed
+*.pid.lock
+
+# Directory for instrumented libs generated by jscoverage/JSCover
+lib-cov
+
+# Coverage directory used by tools like istanbul
+coverage
+*.lcov
+
+# nyc test coverage
+.nyc_output
+
+# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
+.grunt
+
+# Bower dependency directory (https://bower.io/)
+bower_components
+
+# node-waf configuration
+.lock-wscript
+
+# Compiled binary addons (https://nodejs.org/api/addons.html)
+build/Release
+
+# Dependency directories
+node_modules/
+jspm_packages/
+
+# TypeScript v1 declaration files
+typings/
+
+# TypeScript cache
+*.tsbuildinfo
+
+# Optional npm cache directory
+.npm
+
+# Optional eslint cache
+.eslintcache
+
+# Optional REPL history
+.node_repl_history
+
+# Output of 'npm pack'
+*.tgz
+
+# Yarn Integrity file
+.yarn-integrity
+
+# dotenv environment variables file
+.env
+.env.test
+
+# parcel-bundler cache (https://parceljs.org/)
+.cache
+
+# next.js build output
+.next
+
+# nuxt.js build output
+.nuxt
+
+# rollup.js default build output
+dist/
+
+# Uncomment the public line if your project uses Gatsby
+# https://nextjs.org/blog/next-9-1#public-directory-support
+# https://create-react-app.dev/docs/using-the-public-folder/#docsNav
+# public
+
+# Storybook build outputs
+.out
+.storybook-out
+
+# vuepress build output
+.vuepress/dist
+
+# Serverless directories
+.serverless/
+
+# FuseBox cache
+.fusebox/
+
+# DynamoDB Local files
+.dynamodb/
+
+# Temporary folders
+tmp/
+temp/
+
+### Python ###
+# Pycharm
+.idea
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+_skbuild/
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# pyenv
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# Mr Developer
+.mr.developer.cfg
+.project
+.pydevproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# Docs
+docs/html
+docs/xml
+
+# End of https://www.gitignore.io/api/c++,node,python
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 13143f65..57ca070b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -4,6 +4,8 @@ project(GMGPolar VERSION 1.0.0)
 
 option(GMGPOLAR_BUILD_TESTS "Build GMGPolar unit tests." ON)
 option(GMGPOLAR_USE_MUMPS "Use MUMPS to compute matrix factorizations." OFF)
+option(GMGPOLAR_USE_LIKWID "Use LIKWID to measure code (regions)." OFF)
+
 
 set(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH})
 set(CMAKE_POSITION_INDEPENDENT_CODE ON)
@@ -42,6 +44,18 @@ configure_file(${CMAKE_SOURCE_DIR}/include/config_internal.h.in ${CMAKE_SOURCE_D
 target_include_directories(gmgpolar_simulation PRIVATE ${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/include/test_cases )
 target_include_directories(GMGPolar PRIVATE ${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/include/test_cases )
 
+if(GMGPOLAR_USE_LIKWID)
+
+    find_package(LIKWID REQUIRED)
+
+    target_include_directories(GMGPolar PUBLIC ${LIKWID_INCLUDE_DIRS})
+    target_link_libraries(GMGPolar PUBLIC ${LIKWID_LIBRARIES})
+    target_compile_definitions(GMGPolar PUBLIC "-DLIKWID_PERFMON")
+
+endif()
+
+
+
 if(GMGPOLAR_USE_MUMPS)
 
     set(INC_DIRS
diff --git a/include/config_internal.h.in b/include/config_internal.h.in
index de594a47..3bf582e8 100644
--- a/include/config_internal.h.in
+++ b/include/config_internal.h.in
@@ -28,5 +28,6 @@
 #define GMGPOLAR_CONFIG_INTERNAL_H
 
 #cmakedefine GMGPOLAR_USE_MUMPS
+#cmakedefine GMGPOLAR_USE_LIKWID
 
 #endif // GMGPOLAR_CONFIG_INTERNAL_H
diff --git a/include/constants.h b/include/constants.h
index 3588e4aa..8083cce2 100644
--- a/include/constants.h
+++ b/include/constants.h
@@ -245,7 +245,7 @@ domains with the method of characteristics and spline finite elements.  (2019)
          *
          * Defines the coefficient
          * 0: beta = 0
-         * 1: beta = (1 / alpha)
+         * 1: beta: 1/alpha for some cases, different in others (see coeffs in test_cases)
          */
     beta_coeff,
     /*! \brief Norm for stopping criterion
@@ -316,8 +316,8 @@ enum dcontrols
     rel_red_conv,
     /*! \brief Timings
          */
-    t_coeff,
-    t_arr_art_att,
+    t_arr_art_att, // Evaluation of arr, art, and att
+    t_coeff, // Evaluation of alpha and beta (subcounter of t_arr_art_att)    
     t_sol,
     t_detDFinv,
     t_trafo,
diff --git a/include/gmgpolar.h b/include/gmgpolar.h
index 9de56530..50e3cc1d 100644
--- a/include/gmgpolar.h
+++ b/include/gmgpolar.h
@@ -45,13 +45,16 @@
 #include "constants.h"
 #include "level.h"
 #include "gyro.h"
+#ifdef GMGPOLAR_USE_LIKWID
+#include <likwid-marker.h>
+#endif
 
 class gmgpolar
 {
 public:
-/*******************************************************************************
- * Attributes
- ******************************************************************************/
+   /*******************************************************************************
+   * Attributes
+   ******************************************************************************/
     /***************************************************************************
      * Grid levels
      **************************************************************************/
@@ -67,25 +70,30 @@ class gmgpolar
     std::vector<double> nrm_inf_res;
 
     /* execution times */
+    // Setup
     double t_setup; // prepare_op_levels
+    // Subcounters of Setup
     double t_build; // build A and RHS
     double t_facto_Ac; // factorization of coarse operator
     double t_build_P; // build coarse nodes, line splitting and P
     double t_build_Asc; // build Asc and Asc_ortho
     double t_facto_Asc; // factorization of Asc
-    double t_total; // multigrid_cycle_extrapol
+    // Multigrid cycle
+    double t_total_mgcycle; // multigrid_cycle_extrapol
+    // Subcounter of Multigrid cycle
     double t_smoothing; // pre- and post-smoothing (including solve Asc)
-    double t_residual; // factorization of Asc
+    double t_residual; // computation of the residual
     double t_restriction; // restriction
     double t_Ac; // solve coarse operator
     double t_prolongation; // prolongation and coarse grid correction
+
     double t_fine_residual; // computation of residual on level 0 (init and after iteration)
     double t_error; // computation of final error
-    double t_applyA; // apply the operator A,  method apply_A
+    double t_applyA; // apply the operator A, method apply_A
 
-    /*******************************************************************************
- * Methods
- ******************************************************************************/
+   /*******************************************************************************
+   * Methods
+   ******************************************************************************/
     gmgpolar();
     ~gmgpolar();
 
@@ -104,13 +112,13 @@ class gmgpolar
     double compute_backwarderror();
 
 private:
-    /*******************************************************************************
- * Attributes
- ******************************************************************************/
+/*******************************************************************************
+* Attributes
+******************************************************************************/
 
-    /*******************************************************************************
- * Methods
- ******************************************************************************/
+/*******************************************************************************
+* Methods
+******************************************************************************/
 };
 
 #endif // GMGPOLAR_HXX
diff --git a/include/gyro.h b/include/gyro.h
index 1ce578dd..e6e39608 100644
--- a/include/gyro.h
+++ b/include/gyro.h
@@ -66,8 +66,8 @@ namespace gyro
  * Attributes
  ******************************************************************************/
 /***************************************************************************
-     * Controls and Informations
-     **************************************************************************/
+* Controls and Informations
+**************************************************************************/
 /*! The integer control array, see Controls::icontrols for the
      *  possible values
      */
@@ -99,16 +99,16 @@ extern std::string f_sol_in, f_sol_out;
  * Methods
  ******************************************************************************/
 /***************************************************************************
-     * Parameters
-     **************************************************************************/
+* Parameters
+**************************************************************************/
 void init_params();
 void show_params();
 void get_geometry_coeffs(geometry_type geom);
 void select_functions_class(int alpha_coeff, int beta_coeff, int geom, int prob);
 
 /***************************************************************************
-     * Boundary and solution
-     **************************************************************************/
+* Boundary and solution
+**************************************************************************/
 /************************
  * Single
  ************************/
@@ -125,12 +125,12 @@ std::vector<double> def_solution_rt(double r_i, std::vector<double> theta, std::
                                     std::vector<double> cos_theta, int ntheta, int verbose);
 
 /***************************************************************************
-     * Diffusivity and operator
-     **************************************************************************/
+* Diffusivity and operator
+**************************************************************************/
 /************************
  * Single
  ************************/
-double coeff(double r, int verbose);
+double coeff_alpha(double r, int verbose);
 double coeff_beta(double r, int verbose);
 double detDFinv(double r, double theta, int verbose);
 double arr(double r, double theta, int verbose);
@@ -140,7 +140,7 @@ void arr_att_art(double r, double theta, double& arr, double& att, double& art,
 /************************
  * Vector
  ************************/
-std::vector<double> coeff(std::vector<double> r, int verbose);
+std::vector<double> coeff_alpha(std::vector<double> r, int verbose);
 std::vector<double> coeff_beta(std::vector<double> r, int verbose);
 std::vector<double> detDFinv(double r, std::vector<double> theta, std::vector<double> sin_theta,
                              std::vector<double> cos_theta, int ntheta, int verbose);
@@ -157,8 +157,8 @@ void arr_att_art(std::vector<double> r, double theta, std::vector<double>& arr,
                  std::vector<double>& art, int verbose);
 
 /***************************************************************************
-     * Polar to cartesian and back
-     **************************************************************************/
+* Polar to cartesian and back
+**************************************************************************/
 /************************
  * Single
  ************************/
@@ -207,8 +207,8 @@ void disp(std::vector<T> a, const std::string& s_a)
 } 
 
 /***************************************************************************
-     * Matrix operations
-     **************************************************************************/
+* Matrix operations
+**************************************************************************/
 void sp_dgemv(int trans, int m, int n, double alpha, std::vector<int> row_indices, std::vector<int> col_indices,
               std::vector<double> vals, int lda, std::vector<double> x, int incx, double beta, std::vector<double>& y,
               int incy);
diff --git a/performance/run_gmgpolar.sh b/performance/run_gmgpolar.sh
index f1460d45..04bf56f3 100644
--- a/performance/run_gmgpolar.sh
+++ b/performance/run_gmgpolar.sh
@@ -30,9 +30,8 @@ fac_ani=3		    # a
 nr_exp=4		    # n
 
 #changing variables
-mod_pk=1		    # mod_pk=1: Shafranov geometry
-prob=7			    # Prob=7: Simulate solution (23) of Bourne et al.
-# TODO: which alpha and beta to simulate? Alpha aligned with anisotropy?
+mod_pk=2		    # mod_pk=1: Shafranov geometry
+prob=6			    # Prob=7: Simulate solution (23) of Bourne et al.
 alpha_coeff=2
 beta_coeff=1
 
@@ -75,10 +74,11 @@ fi
 echo "#!/bin/bash" > run_gmgpolar_sbatch.sh
 # create a short name for your job
 echo "#SBATCH --job-name=gmgpolar" >> run_gmgpolar_sbatch.sh
+# Change fixed "dbt7" in file name if divideBy2 below is changed!
 # stdout file %A=job id
-echo "#SBATCH --output=slurm-%A-p$prob-r$nr_exp-mpk$mod_pk-s$smoother-e$extrapolation--N$nodes-R$ranks-maxC$cores.out" >> run_gmgpolar_sbatch.sh
+echo "#SBATCH --output=slurm-%A-p$prob-r$nr_exp-dbt7-mpk$mod_pk-s$smoother-e$extrapolation--N$nodes-R$ranks-maxC$cores.out" >> run_gmgpolar_sbatch.sh
 # stderr file
-echo "#SBATCH --error=slurm-%A-p$prob-r$nr_exp-mpk$mod_pk-s$smoother-e$extrapolation--N$nodes-R$ranks-maxC$cores.err" >> run_gmgpolar_sbatch.sh
+echo "#SBATCH --error=slurm-%A-p$prob-r$nr_exp-dbt7-mpk$mod_pk-s$smoother-e$extrapolation--N$nodes-R$ranks-maxC$cores.err" >> run_gmgpolar_sbatch.sh
 
 echo "#SBATCH -N $nodes" >> run_gmgpolar_sbatch.sh
 echo "#SBATCH -n $ranks" >> run_gmgpolar_sbatch.sh
@@ -87,7 +87,8 @@ echo "#SBATCH -c $cores" >> run_gmgpolar_sbatch.sh
 echo "#SBATCH --threads-per-core=1" >> run_gmgpolar_sbatch.sh
 # fix CPU frequency to 1.8 Mhz
 echo "#SBATCH --cpu-freq=1800000" >> run_gmgpolar_sbatch.sh
-echo "#SBATCH -t 600" >> run_gmgpolar_sbatch.sh
+## Estimation on 2x AMD EPYC 7702: nr=4, dbt=7, prob=7, mpk=1 takes 720 minutes for 1->2->...->128 cores ##
+echo "#SBATCH -t 1600" >> run_gmgpolar_sbatch.sh
 echo "#SBATCH --exclusive" >> run_gmgpolar_sbatch.sh
  
 # remove potentially loaded and conflicting modules
@@ -103,9 +104,12 @@ echo "module load likwid/5.2.2" >> run_gmgpolar_sbatch.sh
 # echo "cd .." >> run_gmgpolar_sbatch.sh
 # echo "make -j16" >> run_gmgpolar_sbatch.sh
 
+echo "# potentially run benchmark on machine" >> run_gmgpolar_sbatch.sh
+echo "# srun --cpus-per-task=$((cores)) likwid-bench -i 1000 -t stream_avx_fma -w S0:500MB:64" >> run_gmgpolar_sbatch.sh
+
 # to be defined for use case (3/4/5/6)
 # Attention: divideBy is used as a dummy variable to access folders as grids are read in
-echo "let divideBy2=4" >> run_gmgpolar_sbatch.sh
+echo "let divideBy2=7" >> run_gmgpolar_sbatch.sh
 
 ####################################
 ## solve system                   ##
@@ -117,7 +121,11 @@ echo "let m=1" >> run_gmgpolar_sbatch.sh
 # FLOPS-DP counter from 1 to cores many threads
 echo "while [ \$m -le $max_threads ]; do" >> run_gmgpolar_sbatch.sh
 echo "let mminus1=m-1" >> run_gmgpolar_sbatch.sh
-echo "srun --cpus-per-task=\$m likwid-perfctr -f -C 0-\$mminus1 -g FLOPS_DP ./build/gmgpolar_simulation --openmp \$m --matrix_free 1 -n $nr_exp -a $fac_ani --mod_pk $mod_pk --DirBC_Interior $DirBC_Interior --divideBy2 0 -r $R0 --smoother $smoother -E $extrapolation --verbose 2 --debug $debug --optimized 1 --v1 $v1 --v2 $v2 -R $R --prob $prob --maxiter $maxiter --alpha_coeff $alpha_coeff --beta_coeff $beta_coeff --res_norm $res_norm --f_grid_r "radii_files/Rmax"$R"/aniso"$fac_ani"/divide"\$divideBy2".txt" --f_grid_theta "angles_files/Rmax"$R"/aniso"$fac_ani"/divide"\$divideBy2".txt" --rel_red_conv $rel_red_conv" >> run_gmgpolar_sbatch.sh
+
+echo "# for testing that pin works correctly, potentially use likwid-pin beforehand" >> run_gmgpolar_sbatch.sh
+echo "# srun --cpus-per-task=\$m likwid-pin -c N:0-\$mminus1 ./build/gmgpolar_simulation --openmp \$m --matrix_free 1 -n $nr_exp -a $fac_ani --mod_pk $mod_pk --DirBC_Interior $DirBC_Interior --divideBy2 0 -r $R0 --smoother $smoother -E $extrapolation --verbose 2 --debug $debug --optimized 1 --v1 $v1 --v2 $v2 -R $R --prob $prob --maxiter $maxiter --alpha_coeff $alpha_coeff --beta_coeff $beta_coeff --res_norm $res_norm --f_grid_r "radii_files/Rmax"$R"/aniso"$fac_ani"/divide"\$divideBy2".txt" --f_grid_theta "angles_files/Rmax"$R"/aniso"$fac_ani"/divide"\$divideBy2".txt" --rel_red_conv $rel_red_conv" >> run_gmgpolar_sbatch.sh
+
+echo "srun --cpus-per-task=\$m likwid-perfctr -f -m -C 0-\$mminus1 -g FLOPS_DP ./build/gmgpolar_simulation --openmp \$m --matrix_free 1 -n $nr_exp -a $fac_ani --mod_pk $mod_pk --DirBC_Interior $DirBC_Interior --divideBy2 0 -r $R0 --smoother $smoother -E $extrapolation --verbose 2 --debug $debug --optimized 1 --v1 $v1 --v2 $v2 -R $R --prob $prob --maxiter $maxiter --alpha_coeff $alpha_coeff --beta_coeff $beta_coeff --res_norm $res_norm --f_grid_r "radii_files/Rmax"$R"/aniso"$fac_ani"/divide"\$divideBy2".txt" --f_grid_theta "angles_files/Rmax"$R"/aniso"$fac_ani"/divide"\$divideBy2".txt" --rel_red_conv $rel_red_conv" >> run_gmgpolar_sbatch.sh
 echo "let m=m*2" >> run_gmgpolar_sbatch.sh
 echo "done;" >> run_gmgpolar_sbatch.sh
 
@@ -125,7 +133,7 @@ echo "done;" >> run_gmgpolar_sbatch.sh
 echo "let m=1" >> run_gmgpolar_sbatch.sh
 echo "while [ \$m -le $max_threads ]; do" >> run_gmgpolar_sbatch.sh
 echo "let mminus1=m-1" >> run_gmgpolar_sbatch.sh
-echo "srun --cpus-per-task=\$m likwid-perfctr -f -C 0-\$mminus1 -g CACHE ./build/gmgpolar_simulation --openmp \$m --matrix_free 1 -n $nr_exp -a $fac_ani --mod_pk $mod_pk --DirBC_Interior $DirBC_Interior --divideBy2 0 -r $R0 --smoother $smoother -E $extrapolation --verbose 2 --debug $debug --optimized 1 --v1 $v1 --v2 $v2 -R $R --prob $prob --maxiter $maxiter --alpha_coeff $alpha_coeff --beta_coeff $beta_coeff --res_norm $res_norm --f_grid_r "radii_files/Rmax"$R"/aniso"$fac_ani"/divide"\$divideBy2".txt" --f_grid_theta "angles_files/Rmax"$R"/aniso"$fac_ani"/divide"\$divideBy2".txt" --rel_red_conv $rel_red_conv" >> run_gmgpolar_sbatch.sh
+echo "srun --cpus-per-task=\$m likwid-perfctr -f -m -C 0-\$mminus1 -g MEM_DP ./build/gmgpolar_simulation --openmp \$m --matrix_free 1 -n $nr_exp -a $fac_ani --mod_pk $mod_pk --DirBC_Interior $DirBC_Interior --divideBy2 0 -r $R0 --smoother $smoother -E $extrapolation --verbose 2 --debug $debug --optimized 1 --v1 $v1 --v2 $v2 -R $R --prob $prob --maxiter $maxiter --alpha_coeff $alpha_coeff --beta_coeff $beta_coeff --res_norm $res_norm --f_grid_r "radii_files/Rmax"$R"/aniso"$fac_ani"/divide"\$divideBy2".txt" --f_grid_theta "angles_files/Rmax"$R"/aniso"$fac_ani"/divide"\$divideBy2".txt" --rel_red_conv $rel_red_conv" >> run_gmgpolar_sbatch.sh
 echo "let m=m*2" >> run_gmgpolar_sbatch.sh
 echo "done;" >> run_gmgpolar_sbatch.sh
 
diff --git a/plot_tools/lskkr23_plot_benchmarks_strong_scaling.py b/plot_tools/lskkr23_plot_benchmarks_strong_scaling.py
new file mode 100644
index 00000000..84be55b6
--- /dev/null
+++ b/plot_tools/lskkr23_plot_benchmarks_strong_scaling.py
@@ -0,0 +1,188 @@
+import pandas as pd
+import numpy as np
+import sys
+# Use backend to not plot on UI
+# import matplotlib
+# matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+from os.path import join, exists, dirname
+from os import makedirs
+import os.path
+
+### Plots scaling of FLOPS and Caches (saturation) scaling from 0 to n Cores
+### as read from data frame
+
+colors  =   [
+            [1.00,  0.49,  0.06],
+            [0.17,  0.63,  0.18],
+            [0.83,  0.15,  0.16],
+            [0.13, 0.47, 0.69],
+            [0.58,  0.40,  0.74],
+            [0.53,  0.35,  0.27],
+            [0.92,  0.46,  0.77],
+            [0.50,  0.50,  0.50],
+            [0.66,  0.85,  0.06],                
+            [0.06,  0.85,  0.85],
+            [0.85,  0.15,  0.85],
+            [0.75,  0.75,  0.75],
+            [0.80,  0.67,  0.00],
+            [0.30,  0.65,  1.00],
+            [0.40,  0.07,  0.00],
+            [1.00, 0.80, 0.87],
+            [1.00, 0.87, 0.20],
+            [1.00, 0.80, 0.70],
+            [0.80, 0.87, 1.00]];  
+
+def plot_scaling(path_out, fname, benchname, df, title, ylabel, saturation_limit=0, logplot = True, colors=colors):
+    '''
+    Plot different timings or other benchmarks against the number of cores used.
+    @param path_out Path to output files.
+    @param fname First part of filename.
+    @param benchname Benchmark name and postfix for filename.
+    @param df Data frame with results per core scaling.
+    @param title Plot title.
+    @param ylabel Y-axis label.
+    @param saturation_limit Saturation limit to be plotted for MEM_DP scaling.
+    '''
+    fontsize = 24
+
+    fig = plt.figure(figsize=(12, 10))
+    # fig.subplots_adjust(bottom=0.3) # use for interactive plotting, for savefig, legend+figure is adjusted with bbox_inches='tight'
+    ax = fig.add_subplot() 
+    start_optimal_line = df.loc[0,[timer for timer in df.iloc[:,1:].columns if 'Total' in timer]].max()
+    optimal_line = 1.0/df['Cores'].values * start_optimal_line
+    if logplot:
+        if benchname == 'Timings':
+            plt.loglog(df['Cores'], optimal_line, linewidth=2, linestyle='dashed', color='black', label='Optimal')
+        for i in range(len(df.iloc[:,1:].columns)):
+            col_label = df.columns[i+1]
+            col_label = col_label.replace('Computing residual on finest level', 'Computing residual (finest)')
+            col_label = col_label.replace('Applying prolongation (+ coarse grid correction)', 'Applying prolongation (+ CGC)')
+            plt.loglog(df['Cores'], df.iloc[:,i+1], linewidth=2, label=col_label, color=colors[i]) # Cores is assumed to be in first column
+    else:
+        if benchname == 'Timings':
+            plt.plot(df['Cores'], optimal_line, linewidth=2, linestyle='dashed', color='black', label='Optimal')
+        for i in range(len(df.iloc[:,1:].columns)):
+            col_label = df.columns[i+1]
+            col_label = col_label.replace('Computing residual on finest level', 'Computing residual (finest)')
+            col_label = col_label.replace('Applying prolongation (+ coarse grid correction)', 'Applying prolongation (+ CGC)')            
+            plt.plot(df['Cores'], df.iloc[:,i+1], linewidth=2, label=col_label, color=colors[i]) # Cores is assumed to be in first column
+
+    if benchname == 'MEM_DP':
+        if saturation_limit > 0:
+            plt.plot(df['Cores'], saturation_limit * np.ones(len(df['Cores'])), linestyle='dotted', linewidth=3, color=[0, 0, 0]) 
+            ax.text(1, saturation_limit + 3, 'Memory bandwith (AXPY) (' + str(saturation_limit) + ' GBytes/s)', fontsize=14)
+            ax.set_ylim(0, saturation_limit + 10)
+
+    ax.legend(bbox_to_anchor=(0, 0, 1, -0.1), mode="expand", ncols=2,  fontsize=fontsize-6)
+    # ax.set_title(title, fontsize=fontsize+6)
+    ax.set_ylabel(ylabel, fontsize=fontsize)
+    ax.set_xlabel('Number of cores', fontsize=fontsize)
+    ax.set_xticks(df['Cores'])
+    ax.set_xticklabels(df['Cores'])
+
+    plt.rcParams['xtick.labelsize']=fontsize-6
+    plt.rcParams['ytick.labelsize']=fontsize-6
+
+    path_out = join(path_out, 'figures')
+    if not exists(path_out):
+        makedirs(path_out)
+    plt.savefig(join(path_out, fname + '_' + benchname.lower()), dpi=300, bbox_inches='tight')
+    # plt.show()
+    plt.close()   
+
+
+def main(benchmarks=['FLOPS_DP'], problem=6, divideBy2=7, mod_pk=1, maxCores=128):
+    # maxCores simulated in scaling
+
+    file_prefix = 'caro-paper' # provide correct slurm job id
+    if file_prefix != '':
+        file_prefix = file_prefix + '-'
+    file_postfix = ''
+    if file_postfix != '':
+        file_postfix = '-' + file_postfix
+    nr_exp = 4
+    smoother = 3
+    extrapolation = 1
+
+    nodes = 1
+    ranks = 1
+
+    maxCoresPlot = 64 # maxCores to plot
+    plot_counter = {} # dict on which counter to plot
+    plot_counter['Total setup'] = 0
+    plot_counter['Building system matrix A and RHS'] = 0
+    plot_counter['Factorization of coarse operator Ac'] = 0
+    plot_counter['Building intergrid operators (e.g. projections)'] = 0
+    plot_counter['Building smoothing operators A_sc'] = 0
+    plot_counter['Factorizing smoothing operators A_sc'] = 0
+    plot_counter['Total multigrid cycle'] = 1
+    plot_counter['Complete smoothing'] = 1
+    plot_counter['Computing residual'] = 1
+    plot_counter['Applying restriction'] = 1
+    plot_counter['Solve coarse system'] = 1
+    plot_counter['Applying prolongation (+ coarse grid correction)'] = 1
+    plot_counter['Computing residual on finest level'] = 1
+    plot_counter['Computing final error'] = 0
+    plot_counter['Total application of A'] = 1
+    plot_counter['Evaluation of arr, art, and att'] = 0
+    plot_counter['Evaluation of alpha and beta'] = 0
+    plot_counter['Computing determinant of Jacobian of inverse mapping'] = 0
+    plot_counter['Computing exact solution'] = 0
+    plot_counter['Total execution time'] = 0
+
+    plot_regions = {} # dict on which likwid region to plot
+    plot_regions['Setup'] = 1
+    plot_regions['Iteration'] = 1
+
+    bench_to_unit = {}
+    bench_to_unit['FLOPS_DP'] = 'MFLOP/s'
+    bench_to_unit['MEM_DP'] = 'MBytes/s'   
+
+    ## saturation_limit for MEM_DP scaling (node specific and needs to be adapted).
+    saturation_limit = 80
+
+    fname = file_prefix + 'p' + str(problem) + '-r' + str(nr_exp) + '-dbt' + str(divideBy2) + '-mpk' + str(mod_pk) + '-s' + str(
+        smoother) + '-e' + str(extrapolation) + '--N' + str(nodes) + '-R' + str(ranks) + '-maxC' + str(maxCores) + file_postfix
+    
+    path_to_files_rel = os.path.join('..', os.path.join('..', 'scaling_output')) # relative from read_output call
+    path_to_files = os.path.join(os.path.dirname(__file__), os.path.join(path_to_files_rel))
+
+
+    # Problem setting columns
+    setting_cols = ['Problem', 'rExp', 'divB2', 'ModPK', 'Extrapolation', 'Nodes', 'Ranks']
+
+
+    df = []
+    for bench in benchmarks:
+        df.append(pd.read_csv(
+            join(path_to_files, fname + '_' + bench + '.csv'),
+            dtype={'Problem': int, 'rExp': int, 'divB2': int, 'ModPK': int,
+                    'Extrapolation': int, 'Nodes': int, 'Ranks': int,
+                    'Cores': int, 'its': int}))
+        
+        # Check that the different number of threads/cores where only conducted
+        # on one particular setting of the above columns
+        if np.max(df[0].loc[:,setting_cols].nunique()) > 1:
+            sys.exit('Error in input data, more than one setting found.')   
+        # check that only one row per number of cores is available
+        if df[0]['Cores'].nunique() != len(df[0]['Cores']):
+            sys.exit('Error: Multiple times computed with the same number of cores.')                 
+
+    # check content
+    for i in range(len(df)):
+        # plot timings
+        plot_scaling(path_to_files, fname, 'Timings', df[i].loc[df[i]['Cores']<=maxCoresPlot, ['Cores'] + [k for k, v in plot_counter.items() if v==1]], 'Strong scaling of GMGPolar', 'Time (sec)')
+        # plot likwid benchmark
+        plot_scaling(path_to_files, fname, benchmarks[i], df[i].loc[df[i]['Cores']<=maxCoresPlot, ['Cores'] + [col for col in df[i].columns if bench_to_unit[benchmarks[i]] in col]], 'Strong scaling of GMGPolar', bench_to_unit[benchmarks[i]])
+
+
+if __name__ == '__main__':
+    # strong scaling
+    main(benchmarks=['FLOPS_DP', 'MEM_DP'], problem=6, divideBy2=7, mod_pk=1)
+    main(benchmarks=['FLOPS_DP', 'MEM_DP'], problem=6, divideBy2=7, mod_pk=2)
+    main(benchmarks=['FLOPS_DP', 'MEM_DP'], problem=7, divideBy2=7, mod_pk=1)
+    main(benchmarks=['FLOPS_DP', 'MEM_DP'], problem=7, divideBy2=7, mod_pk=2)
+    # large case strong scaling
+    main(benchmarks=['FLOPS_DP'], problem=7, divideBy2=8, mod_pk=1, maxCores=64) 
+    
\ No newline at end of file
diff --git a/plot_tools/lskkr23_plot_sol_coeff_compute_flops.ipynb b/plot_tools/lskkr23_plot_sol_coeff_compute_flops.ipynb
new file mode 100644
index 00000000..f3fcf599
--- /dev/null
+++ b/plot_tools/lskkr23_plot_sol_coeff_compute_flops.ipynb
@@ -0,0 +1,277 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "from matplotlib.colors import LogNorm\n",
+    "import matplotlib.ticker as ticker\n",
+    "\n",
+    "# Define the curvilinear mapping\n",
+    "def xymap(r, t, mod_pk=1, par1=0.3, par2=0.2, x0=0, y0=0):\n",
+    "    if mod_pk == 0:\n",
+    "        x = r*np.cos(t)\n",
+    "        y = r*np.sin(t)\n",
+    "    elif mod_pk == 1:\n",
+    "        kappa=par1\n",
+    "        delta=par2\n",
+    "        x = x0 + (1-kappa)*r*np.cos(t)-delta*r*r\n",
+    "        y = y0 + (1+kappa)*r*np.sin(t)\n",
+    "    elif mod_pk == 2:\n",
+    "        epsilon=par1\n",
+    "        lambdaa=par2\n",
+    "        sq = np.sqrt(1+epsilon*(epsilon+2*r*np.cos(t)))\n",
+    "        ksi = 1/np.sqrt(1-epsilon*epsilon/4)\n",
+    "        x = (1/epsilon)*(1-sq)\n",
+    "        y = y0 + lambdaa*ksi*r*np.sin(t)/(2-sq)\n",
+    "    return x, y\n",
+    "\n",
+    "# Define the 2D function\n",
+    "def func(r, t, x, y, prob=6, mod_pk=1, C=1, Rmax=1, m=1):\n",
+    "    if prob==6:\n",
+    "        f=C*np.power(r/Rmax, 6)*np.power(r/Rmax-1, 6)*np.cos(m*t)\n",
+    "    elif prob==7:\n",
+    "        f=C*np.power(1+r/Rmax, 6)*np.power(1-r/Rmax, 6)*np.cos(2*np.pi*x)*np.sin(2*np.pi*y)\n",
+    "    return f\n",
+    "\n",
+    "# Format numbers in scientific format\n",
+    "def fmt(x, pos):\n",
+    "    # a, b = '{:.2e}'.format(x).split('e')\n",
+    "    a, b = '{:.0e}'.format(x).split('e')\n",
+    "    b = int(b)\n",
+    "    return r'${} \\times 10^{{{}}}$'.format(a, b)\n",
+    "\n",
+    "PROB=6\n",
+    "MOD_PK=1\n",
+    "C = 2**12 * 10**(-4)\n",
+    "M = 11\n",
+    "RMIN = 1e-5\n",
+    "RMAX = 1.3\n",
+    "KAPPA=0.3\n",
+    "DELTA=0.2\n",
+    "EPSILON=0.3\n",
+    "LAMBDA=1.4\n",
+    "\n",
+    "fontsize=18\n",
+    "\n",
+    "# Generate x and y values\n",
+    "N_INT = 1000\n",
+    "r = np.linspace(RMIN, RMAX, N_INT)\n",
+    "t = np.linspace(0, 2*np.pi, N_INT)\n",
+    "\n",
+    "# Create a meshgrid from x and y\n",
+    "R, T = np.meshgrid(r, t)\n",
+    "\n",
+    "for PROB in [6, 7]:\n",
+    "    print(\"Prob:\",PROB)\n",
+    "    for MOD_PK in [0,1,2]:\n",
+    "        print(\"Geometry:\",MOD_PK)\n",
+    "        if MOD_PK == 1:\n",
+    "            par1=KAPPA\n",
+    "            par2=DELTA\n",
+    "        elif MOD_PK == 2:\n",
+    "            par1=EPSILON\n",
+    "            par2=LAMBDA\n",
+    "        else:\n",
+    "            par1=0\n",
+    "            par2=0\n",
+    "        \n",
+    "        # Evaluate the function values for each point in the meshgrid\n",
+    "        X, Y = xymap(R, T, mod_pk=MOD_PK, par1=par1, par2=par2)\n",
+    "        Z = func(R, T, X, Y, prob=PROB, mod_pk=MOD_PK, C=C, Rmax=RMAX, m=M)\n",
+    "        # print(\"X:\",np.min(X),np.max(X))\n",
+    "        # print(\"Y:\",np.min(Y),np.max(Y))\n",
+    "        # print(\"Z:\",np.min(Z),np.max(Z))\n",
+    "\n",
+    "        # Plot the 2D function with logarithmic color scale\n",
+    "        plt.contourf(X, Y, Z, cmap='coolwarm',levels=np.linspace(Z.min(),Z.max(),100))\n",
+    "\n",
+    "        # Set limits on the x and y axes\n",
+    "        if MOD_PK == 0:\n",
+    "            plt.xlim(-1.4, 1.4)\n",
+    "            plt.ylim(-1.4, 1.4)\n",
+    "        elif MOD_PK == 1:\n",
+    "            plt.xlim(-1.4, 0.7)\n",
+    "            plt.ylim(-1.8, 1.8)\n",
+    "        elif MOD_PK == 2:\n",
+    "            plt.xlim(-1.4, 1.6)\n",
+    "            plt.ylim(-2.2, 2.2)\n",
+    "\n",
+    "        # Add colorbar\n",
+    "        cbar = plt.colorbar() #format=ticker.FuncFormatter(fmt))\n",
+    "        # format=ticker.ScalarFormatter()\n",
+    "        # cbar.set_label('Solution')\n",
+    "\n",
+    "        # Set equal aspect ratio for the plot\n",
+    "        plt.gca().set_aspect('equal', adjustable='box')\n",
+    "\n",
+    "        # Add labels and title\n",
+    "        # plt.xlabel('X-axis')\n",
+    "        # plt.ylabel('Y-axis')\n",
+    "        # plt.title('2D Function with Logarithmic Color Scale')\n",
+    "        \n",
+    "        #plt.grid()\n",
+    "        plt.rcParams['xtick.labelsize']=fontsize-4\n",
+    "        plt.rcParams['ytick.labelsize']=fontsize-4        \n",
+    "        \n",
+    "        # Save figure\n",
+    "        plt.savefig('sol_prob-'+str(PROB)+'_geom-'+str(MOD_PK)+'.png', bbox_inches='tight', dpi=300)\n",
+    "\n",
+    "        # Show the plot\n",
+    "        plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "# Define the 1D functions\n",
+    "def alpha(r, Rmax=1.3, rp=0.7, deltar=0.05):\n",
+    "    return np.exp(-np.tanh(((r/Rmax)-rp)/deltar))\n",
+    "\n",
+    "def beta(r, Rmax=1.3, rp=0.7, deltar=0.05):\n",
+    "    return 1/alpha(r, Rmax=Rmax, rp=rp, deltar=deltar)\n",
+    "\n",
+    "RMIN=1e-5\n",
+    "RMAX=1.3\n",
+    "RP=0.7\n",
+    "DELTAR=0.05\n",
+    "\n",
+    "fontsize=18\n",
+    "\n",
+    "# Generate x values\n",
+    "r = np.linspace(RMIN, RMAX, 100)\n",
+    "\n",
+    "# Evaluate the function values\n",
+    "alpha_r = alpha(r, Rmax=RMAX, rp=RP, deltar=DELTAR)\n",
+    "beta_r = beta(r, Rmax=RMAX, rp=RP, deltar=DELTAR)\n",
+    "print(\"ALPHA:\",np.min(alpha_r),np.max(alpha_r))\n",
+    "print(\"BETA:\",np.min(beta_r),np.max(beta_r))\n",
+    "\n",
+    "# Plot the functions on the same plot\n",
+    "plt.plot(r, alpha_r, label='α(r)', linewidth=3)\n",
+    "plt.plot(r, beta_r, label='β(r)', linewidth=3)\n",
+    "\n",
+    "# Add labels and title\n",
+    "plt.xlabel('r', fontsize=fontsize)\n",
+    "\n",
+    "# Add a legend\n",
+    "plt.legend(fontsize=fontsize)\n",
+    "        \n",
+    "#plt.grid()\n",
+    "plt.rcParams['xtick.labelsize']=fontsize-4\n",
+    "plt.rcParams['ytick.labelsize']=fontsize-4\n",
+    "\n",
+    "# Save figure\n",
+    "plt.savefig('coeff.png', bbox_inches='tight', dpi=300)\n",
+    "\n",
+    "# Show the plot\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def flops_comp(nr, nt, m_c, it, mod_pk):\n",
+    "    m = nr*nt\n",
+    "    mu_c = m_c/m\n",
+    "    # print(\"m\",m,\"m_c\",m_c,\"mu_c\",mu_c)\n",
+    "    v1=1\n",
+    "    v2=1\n",
+    "    \n",
+    "    # GLOBAL\n",
+    "    if mod_pk == 0:\n",
+    "        flops_DF = 6\n",
+    "    elif mod_pk == 1:\n",
+    "        flops_DF = 18\n",
+    "    elif mod_pk == 2:\n",
+    "        flops_DF = 112\n",
+    "\n",
+    "    # SETUP\n",
+    "    ## Flops\n",
+    "    flops_setup_smoother_m = 2*flops_DF + 70 + 7*mu_c\n",
+    "    flops_setup_m = 4/3*flops_setup_smoother_m\n",
+    "    # print(\"flops_setup_m\",flops_setup_m)\n",
+    "    \n",
+    "    flops_setup = flops_setup_m * m\n",
+    "    ## Mem\n",
+    "    mem_setup_m = 3 + 2*mu_c\n",
+    "    # print(\"mem_setup_m\",mem_setup_m)\n",
+    "    \n",
+    "    mem_setup = mem_setup_m * m\n",
+    "    \n",
+    "    # MG\n",
+    "    ## Flops\n",
+    "    flops_smoother_m = 2*flops_DF + 80 + 4*mu_c\n",
+    "    flops_residual_m = flops_DF + 107.25\n",
+    "    flops_m = (4*it)*((v1+v2)*flops_smoother_m + flops_residual_m)/3\n",
+    "    # print(\"flops_m\",flops_m)\n",
+    "    \n",
+    "    flops = flops_m * m\n",
+    "    ## Mem\n",
+    "    mem_smoother_m = 2\n",
+    "    mem_residual_m = 3\n",
+    "    mem_m = 4*(mem_smoother_m + mem_residual_m)/3\n",
+    "    # print(\"mem_m\",mem_m)\n",
+    "    \n",
+    "    mem = mem_m * m\n",
+    "    \n",
+    "    return flops_setup, mem_setup, flops, mem\n",
+    "    \n",
+    "    \n",
+    "MOD_PK=1\n",
+    "nr = [769, 1537, 3073, 6145]\n",
+    "nt = [1024, 2048, 4096, 8192]\n",
+    "m_c = [161, 321, 641, 1281]\n",
+    "it = [[70, 67, 64, 61], [56, 54, 51, 49]] # MOD_PK 1, MOD_PK 2\n",
+    "for MOD_PK in [1, 2]:\n",
+    "    print(\"MOD_PK: \",MOD_PK)\n",
+    "    print(\"nr,nt,m,m_c,it,flops_setup,mem_setup,flops,mem\")\n",
+    "    for i in range(len(nr)):\n",
+    "        flops_setup, mem_setup, flops, mem = flops_comp(nr[i], nt[i], m_c[i], it[MOD_PK-1][i], MOD_PK)\n",
+    "        print(nr[i],\",\",nt[i],\",\",\"{:.2e}\".format(nr[i]*nt[i]),\",\",m_c[i],\",\",it[MOD_PK-1][i],\",\",\"{:.2e}\".format(flops_setup),\",\",\"{:.2e}\".format(mem_setup),\",\",\"{:.2e}\".format(flops),\",\",\"{:.2e}\".format(mem))\n",
+    "    print(\"\\n\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/plot_tools/lskkr23_plot_weak_scaling.py b/plot_tools/lskkr23_plot_weak_scaling.py
new file mode 100644
index 00000000..6c463aa7
--- /dev/null
+++ b/plot_tools/lskkr23_plot_weak_scaling.py
@@ -0,0 +1,119 @@
+import pandas as pd
+import numpy as np
+import sys
+# Use backend to not plot on UI
+# import matplotlib
+# matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+from os.path import join, exists, dirname
+from os import makedirs
+import os.path
+
+
+# This file has been written and used to plot the read weak scaling results for
+# Leleux, Schwarz, Kühn, Kruse, Rüde - Complexity analysis and scalability of a matrix-free extrapolated geometric multigrid (2023)
+# In contrast to the other files with prefix lskkr23_*, it is written quick and dirty and only 
+# for weak scaling data collected from different strong scaling experiments.
+
+colors  =   [
+            [1.00,  0.49,  0.06],
+            [0.17,  0.63,  0.18],
+            [0.83,  0.15,  0.16],
+            [0.13, 0.47, 0.69],
+            [0.58,  0.40,  0.74],
+            [0.53,  0.35,  0.27],
+            [0.92,  0.46,  0.77],
+            [0.50,  0.50,  0.50],
+            [0.66,  0.85,  0.06],                
+            [0.06,  0.85,  0.85],
+            [0.85,  0.15,  0.85],
+            [0.75,  0.75,  0.75],
+            [0.80,  0.67,  0.00],
+            [0.30,  0.65,  1.00],
+            [0.40,  0.07,  0.00],
+            [1.00, 0.80, 0.87],
+            [1.00, 0.87, 0.20],
+            [1.00, 0.80, 0.70],
+            [0.80, 0.87, 1.00]];  
+
+def plot_weak_scaling(path_out, fname, df, ylabel, logplot = True, colors=colors):
+    '''
+    Plot different timings or other benchmarks against the number of cores used.
+    @param path_out Path to output files.
+    @param fname First part of filename.
+    @param df Data frame with results per core scaling.
+    @param ylabel Y-axis label.
+    '''
+    fontsize = 24
+
+    fig = plt.figure(figsize=(12, 10))
+    # fig.subplots_adjust(bottom=0.3) # use for interactive plotting, for savefig, legend+figure is adjusted with bbox_inches='tight'
+    ax = fig.add_subplot()
+    start_plot = list(df.columns).index('Total multigrid cycle')
+    start_optimal_line = df.loc[0,[timer for timer in df.iloc[:,start_plot:].columns if 'Total' in timer]].max()
+    optimal_line = [start_optimal_line for i in df['Cores'].values]
+    if logplot:
+        plt.loglog(df['Cores'], optimal_line, linewidth=2, linestyle='dashed', color='black', label='Optimal')
+        for i in range(len(df.iloc[:,start_plot:].columns)):
+            col_label = df.columns[start_plot+i]
+            col_label = col_label.replace('Computing residual on finest level', 'Computing residual (finest)')
+            col_label = col_label.replace('Applying prolongation (+ coarse grid correction)', 'Applying prolongation (+ CGC)')
+            plt.loglog(df['Cores'], df.iloc[:,start_plot+i], linewidth=2, label=col_label, color=colors[i]) # Cores is assumed to be in first column
+    else:
+        plt.plot(df['Cores'], optimal_line, linewidth=2, linestyle='dashed', color='black', label='Optimal')
+        for i in range(len(df.iloc[:,start_plot:].columns)):
+            col_label = df.columns[start_plot+i]
+            col_label = col_label.replace('Computing residual on finest level', 'Computing residual (finest)')
+            col_label = col_label.replace('Applying prolongation (+ coarse grid correction)', 'Applying prolongation (+ CGC)')            
+            plt.plot(df['Cores'], df.iloc[:,start_plot+i], linewidth=2, label=col_label, color=colors[i]) # Cores is assumed to be in first column
+
+
+    ax.legend(bbox_to_anchor=(0, 0, 1, -0.1), mode="expand", ncols=2,  fontsize=fontsize-6)
+    # ax.set_title(title, fontsize=fontsize+6)
+    ax.set_ylabel(ylabel, fontsize=fontsize)
+    ax.set_xlabel('Number of cores', fontsize=fontsize)
+    ax.set_xticks(df['Cores'])
+    ax.set_xticklabels(df['Cores'])
+
+    plt.rcParams['xtick.labelsize']=fontsize-6
+    plt.rcParams['ytick.labelsize']=fontsize-6
+
+    path_out = join(path_out, 'figures')
+    if not exists(path_out):
+        makedirs(path_out)
+    plt.savefig(join(path_out, fname), dpi=300, bbox_inches='tight')
+    # plt.show()
+    plt.close()
+
+
+def main(problem=7, divideBy2=[4, 5, 6, 7], mod_pk=1):
+
+    file_prefix = 'caro-paper-weakscaling' 
+    if file_prefix != '':
+        file_prefix = file_prefix + '-'
+    file_postfix = 'C1toC64_FLOPSDP'
+    if file_postfix != '':
+        file_postfix = '-' + file_postfix
+    nr_exp = 4
+    smoother = 3
+    extrapolation = 1
+
+    nodes = 1
+    ranks = 1
+
+    path_to_perf_files_rel = os.path.join('..', os.path.join('..', 'scaling_output')) # relative from read_output call
+    path_to_perf_files = os.path.join(os.path.dirname(__file__), os.path.join(path_to_perf_files_rel))
+
+    filename_input = 'p' + str(problem) + '-r' + str(nr_exp) + '-dbt' + str(divideBy2[0]) + '-' + str(divideBy2[-1]) + '-mpk' + str(mod_pk) + '-s' + str(
+        smoother) + '-e' + str(extrapolation) + '--N' + str(nodes) + '-R' + str(ranks)
+    
+    try: # allow deletion of empty error files
+        df_weak =  pd.read_csv(os.path.join(path_to_perf_files, file_prefix + filename_input + file_postfix + '.csv'), sep=';')
+
+        plot_weak_scaling(path_to_perf_files, file_prefix + filename_input + file_postfix, df_weak.iloc[:, list(df_weak.columns).index('Cores'):], 'Time (sec)')
+    except FileNotFoundError:
+        err = 0 # no error file = no error    
+
+if __name__ == "__main__":
+    # weak scaling
+    main(problem=7, divideBy2=[4,5,6,7], mod_pk=2)
\ No newline at end of file
diff --git a/plot_tools/lskkr23_read_output_strong_scaling.py b/plot_tools/lskkr23_read_output_strong_scaling.py
new file mode 100644
index 00000000..92ab41d4
--- /dev/null
+++ b/plot_tools/lskkr23_read_output_strong_scaling.py
@@ -0,0 +1,225 @@
+import pandas as pd
+import numpy as np
+import os
+import sys
+
+# This file has been written and used to read the results for
+# Leleux, Schwarz, Kühn, Kruse, Rüde - Complexity analysis and scalability of a matrix-free extrapolated geometric multigrid (2023)
+def main(problem=6, divideBy2=7, mod_pk=1):
+
+    file_prefix = 'caro-paper' 
+    if file_prefix != '':
+        file_prefix = file_prefix + '-'
+    file_postfix = ''
+    if file_postfix != '':
+        file_postfix = '-' + file_postfix
+    nr_exp = 4
+    smoother = 3
+    extrapolation = 1
+
+    nodes = 1
+    ranks = 1
+    maxCores = 64# 128
+    cores_used = [] # will be filled automatically
+
+    path_to_perf_files_rel = os.path.join('..', os.path.join('..', 'scaling_output')) # relative from read_output call
+    path_to_perf_files = os.path.join(os.path.dirname(__file__), os.path.join(path_to_perf_files_rel))
+
+    cols_problem = ['Problem', 'rExp', 'divB2', 'ModPK', 'Extrapolation']
+
+    cols_cluster = ['Nodes', 'Ranks', 'Cores']
+
+    cols_convergence = ['its', '2-norm of error', 'inf-norm of error']
+
+    cols_time = []
+    cols_time += ['Total setup', 'Building system matrix A and RHS', 'Factorization of coarse operator Ac',
+                  'Building intergrid operators (e.g. projections)', 'Building smoothing operators A_sc', 
+                  'Factorizing smoothing operators A_sc']
+    cols_time += ['Total multigrid cycle', 'Complete smoothing', 'Computing residual',
+                  'Applying restriction', 'Solve coarse system', 'Applying prolongation (+ coarse grid correction)']
+    cols_time += ['Computing residual on finest level', 'Computing final error', 'Total application of A']
+    cols_time += ['Evaluation of arr, art, and att', 'Evaluation of alpha and beta', 'Computing determinant of Jacobian of inverse mapping',
+                  'Computing exact solution']
+    cols_time += ['Total execution time']
+
+    cols = cols_problem + cols_cluster + cols_convergence + cols_time
+    cols_dtypes = {col: 'int' for col in cols_problem + cols_cluster}
+    cols_dtypes.update(
+        {col: 'double' for col in cols_convergence + cols_time})
+
+    filename_input = 'p' + str(problem) + '-r' + str(nr_exp) + '-dbt' + str(divideBy2) + '-mpk' + str(mod_pk) + '-s' + str(
+        smoother) + '-e' + str(extrapolation) + '--N' + str(nodes) + '-R' + str(ranks) + '-maxC' + str(maxCores)
+
+    err = 0
+    regions = {} # Attention: We assume all benchmarks in one file to be run with the same regions (output)!
+    search_terms_likwid = {}
+    search_terms_likwid['FLOPS_DP'] = 'DP [MFLOP/s]'
+    search_terms_likwid['MEM_DP'] = 'Memory bandwidth [MBytes/s]'    
+    benchmarks = []
+
+    try: # allow deletion of empty error files
+        with open(os.path.join(path_to_perf_files, file_prefix + filename_input + file_postfix + '.err')) as f:
+            line = f.readline()
+            while line and err == 0:
+                line = f.readline()
+                if ' error' in line:
+                    err = 1
+                    print('Error in job script.\n')
+                    print(line)
+    except FileNotFoundError:
+        err = 0 # no error file = no error
+
+    if err == 0:
+        with open(os.path.join(path_to_perf_files, file_prefix + filename_input + file_postfix + '.out')) as f:
+
+            lines = f.readlines()
+            i = 0
+            while i < len(lines)-1:
+
+                # search for next program execution
+                if '--> Iteration' in lines[i]:
+                    while '--> Iteration' in lines[i] and '--> Iteration' in lines[i+1]:
+                        i = i+1
+                        if i > len(lines)-1:
+                            sys.exit(
+                                'End of file reached without finding output.')
+                    # iteration scheme ended:
+                    # split on empty spaces and take number of iterations in third place
+                    its = int(lines[i].split()[2][:-1])
+                    while '-norm' not in lines[i]:
+                        i = i+1
+                        if i > len(lines)-1:
+                            sys.exit(
+                                'End of file reached without finding output.')
+                    norm2 = -1
+                    if '2-norm' in lines[i]:
+                        norm2 = float(lines[i].split()[-1])
+                    else:
+                        sys.exit('Error. 2-Norm information not available.')
+                    norminf = -1
+                    if 'inf-norm' in lines[i+1]:
+                        norminf = float(lines[i+1].split()[-1])
+                    else:
+                        sys.exit('Error. Inf-Norm information not available.')
+
+                    while 'Total setup' not in lines[i]:
+                        i = i+1
+                        if i > len(lines)-1:
+                            sys.exit(
+                                'End of file reached without finding timing output.')
+                    # global time print out, split line by line of information and add to dataframe
+                    time_dict = dict()
+                    while 'Total execution time' not in lines[i-1]:
+                        timings = lines[i].split(':') # here we split the timers based on empty spaces
+                        # convert timing to floating point
+                        if len(timings) > 1:
+                            time_dict[timings[0].replace('\t', '')] = float(timings[1])
+                        i = i+1
+                        if i > len(lines)-1:
+                            sys.exit(
+                                'End of file reached without finding output.')
+
+                    multi_region_run = True
+                    while multi_region_run: # iterate over potentially multiple LIKWID regions
+                    
+                        while 'Group 1: ' not in lines[i]: # iterate until LIKWID output
+                            i = i+1
+                            if 'Number of OpenMP threads:' in lines[i] or (i == len(lines)-1):
+                                multi_region_run = False
+                                break
+                            elif i > len(lines)-1:
+                                sys.exit(
+                                    'End of file reached without finding output.')
+                        if not multi_region_run:
+                            break
+                        benchmark_line = lines[i].split()
+                        if 'Region' in lines[i]:
+                            rg = benchmark_line[1].replace(',', '')
+                            regions[rg] = 0
+                        else:   
+                            multi_region_run = False
+                        benchmark = benchmark_line[-1]
+
+                        # save dataframe for previous benchmark (if there was) and create dataframe for current (new) benchmark
+                        if benchmark not in benchmarks:
+                            if len(benchmarks) > 0:
+                                perf_results_df.to_csv(
+                                    os.path.join(path_to_perf_files, 
+                                                 file_prefix + filename_input + file_postfix + '_' + benchmarks[-1] + '.csv'),
+                                    index=False)
+
+                            benchmarks.append(benchmark)
+                            # create empty data frame from column set
+                            perf_results_df = pd.DataFrame(columns=cols)
+                            # set data types for new data frame as defined above
+                            for k, v in cols_dtypes.items():
+                                perf_results_df[k] = perf_results_df[k].astype(v)
+
+                        # number of LIKWID outputs gives number of used cores
+                        cores = str.count(lines[i+2], 'HWThread')
+
+                        cores_used.append(cores)
+
+                        search_postfix = ''
+                        if cores > 1:
+                            search_postfix = ' STAT'
+
+                        while all([val + search_postfix not in lines[i] for val in search_terms_likwid.values()]):
+                            i = i+1
+                            if i > len(lines)-1:
+                                sys.exit(
+                                    'End of file reached without finding output.')
+
+                        # extract benchmark result
+                        regions[rg] = float(lines[i].split('|')[2].split()[0])
+
+                    # with the number of used cores determined, check if
+                    # line for this result is already present
+                    row_setting_avail = (
+                        perf_results_df[cols_problem[0]] == problem) & (
+                        perf_results_df[cols_problem[1]] == nr_exp) & (
+                        perf_results_df[cols_problem[2]] == divideBy2) & (
+                        perf_results_df[cols_problem[3]] == mod_pk) & (
+                        perf_results_df[cols_cluster[0]] == nodes) & (
+                        perf_results_df[cols_cluster[1]] == ranks) & (
+                        perf_results_df[cols_cluster[2]] == cores)
+
+                    if row_setting_avail.values.sum() <= 1:  # add new line
+                        # add to all rows fix identifiers for:
+                        #   problem, exponent in r, modified coordinates, extrapolation
+                        #   number of nodes and ranks of the job
+                        # as well as variable numbers for other columns
+                        benchmark_unit = search_terms_likwid[benchmarks[-1]].split('[')[1][:-1]
+                        perf_results_df.loc[len(perf_results_df.index),
+                                            cols_problem + cols_cluster +
+                                            cols_convergence + [reg + ' (' + benchmark_unit + ')' for reg in regions.keys()]] = [
+                            problem, nr_exp, divideBy2, mod_pk, extrapolation,
+                            nodes, ranks, cores, its, norm2, norminf] + list(regions.values())
+                        # add timings
+                        for time_col, time_val in time_dict.items():
+                            perf_results_df.loc[len(
+                                perf_results_df.index)-1, time_col] = time_val
+                    elif row_setting_avail.values.sum() > 1:  # error
+                        sys.exit(
+                            'Error. More than two lines corresponds to criterion.'
+                            '\n\tTwo benchmarks for the exact same setting in output. Please check output file.')
+
+                i += 1
+            # end while
+
+            # save file of last benchmark
+            perf_results_df.to_csv(
+                os.path.join(path_to_perf_files, 
+                                file_prefix + filename_input + file_postfix + '_' + benchmarks[-1] + '.csv'),
+                index=False)
+        # close file
+
+
+if __name__ == "__main__":
+    # strong scaling
+    main(problem=6, divideBy2=7, mod_pk=1)
+    main(problem=6, divideBy2=7, mod_pk=2)
+    main(problem=7, divideBy2=7, mod_pk=1)
+    main(problem=7, divideBy2=7, mod_pk=2)
+    # large case strong scaling 
+    main(problem=7, divideBy2=8, mod_pk=1)
\ No newline at end of file
diff --git a/plot_tools/plot_benchmarks_scaling.py b/plot_tools/plot_benchmarks_scaling.py
deleted file mode 100644
index cafcf8a9..00000000
--- a/plot_tools/plot_benchmarks_scaling.py
+++ /dev/null
@@ -1,118 +0,0 @@
-import pandas as pd
-import numpy as np
-import sys
-# Use backend to not plot on UI
-# import matplotlib
-# matplotlib.use('Agg')
-import matplotlib.pyplot as plt
-from os.path import join, exists, dirname
-from os import makedirs
-
-### Plots scaling of FLOPS and Caches (saturation) scaling from 0 to n Cores
-### as read from data frame
-
-colors  =   [
-            [1.00,  0.49,  0.06],
-            [0.17,  0.63,  0.18],
-            [0.83,  0.15,  0.16],
-            [0.13, 0.47, 0.69],
-            [0.58,  0.40,  0.74],
-            [0.53,  0.35,  0.27],
-            [0.92,  0.46,  0.77],
-            [0.50,  0.50,  0.50],
-            [0.66,  0.85,  0.06],                
-            [0.06,  0.85,  0.85],
-            [0.85,  0.15,  0.85],
-            [0.75,  0.75,  0.75]];  
-
-def plot_perf_per_core(path_out, fname, df, benchname, saturation_limit=0, colors=colors):  
-    fontsize = 16
-
-    fig = plt.figure(figsize=(10, 10))
-    ax = fig.add_subplot()
-    plt.plot(df['Cores'], df[benchname[0]])
-
-    if benchname[0] == 'CACHES':
-        plt.plot(df['Cores'], saturation_limit * np.ones(len(df['Cores'])), linestyle='dotted', linewidth=3, color=[0, 0, 0]) 
-        ax.text(1, saturation_limit+3, 'Memory bandwith (AXPY) (' + str(saturation_limit) + ' GBytes/s)', fontsize=14)
-        ax.set_ylim(0, 90)
-
-
-    ax.set_title(benchname[1][0], fontsize=fontsize+6)
-    ax.set_ylabel(benchname[1][0], fontsize=fontsize)
-
-    ax.set_xlabel('Number of cores used', fontsize=fontsize)
-
-
-    path_out = join(path_out, 'figures')
-    if not exists(path_out):
-        makedirs(path_out)
-    plt.savefig(join(path_out, fname + '_' + benchname[0].lower()), bbox_inches='tight')
-    plt.close()   
-
-
-def main():
-
-    problem = 5
-    nr_exp = 4
-    mod_pk = 1
-    smoother = 3
-    extrapolation = 1
-
-    nodes = 1
-    ranks = 1
-    maxCores = 14
-
-    ## saturation_limit is node specific and needs to be adapted.
-    saturation_limit = 80
-
-    fname = 'p' + str(problem) + '-r' + str(nr_exp) + '-mpk' + str(mod_pk) + '-s' + str(
-        smoother) + '-e' + str(extrapolation) + '--N' + str(nodes) + '-R' + str(ranks) + '-maxC' + str(maxCores)   
-    path_to_files_rel = '' # relative to plot script
-    path_to_files = join(dirname(__file__), join(path_to_files_rel))    
-
-    df = pd.read_csv(
-        join(path_to_files, fname + '_benchmarks.csv'),
-        dtype={'Problem': int, 'rExp': int, 'ModPK': int,
-                'Extrapolation': int, 'Nodes': int, 'Ranks': int,
-                'Cores': int, 'its': int})
-
-    # Likwid benchmark columns, more benchmarks are in timings.
-    likwid_benchmarks = {'FLOPS_DP': ['Flop performance in Multi-Threading', 'Flops (GFlops/s)'], 'CACHES': [
-        'Memory bandwidth saturation', 'Memory bandwidth (GBytes/s)']}  # benchmark : [plot title, plot y-label]
-    timing_benchmarks = {'Total_execution_time' : ['Total execution time in Multi-Threading', 'Execution time']}
-    # Problem setting columns
-    setting_cols = ['Problem', 'rExp', 'ModPK', 'Extrapolation', 'Nodes', 'Ranks']
-
-    # check content
-    for bench in likwid_benchmarks.items():
-        bench_rows = np.where(df[bench[0]].isnull()!=True)[0]
-        if len(bench_rows) > 0:
-            df_subframe = df.iloc[bench_rows].copy()
-
-            # Check that the different number of threads/cores where only conducted
-            # on one particular setting of the above columns
-            if np.max(df_subframe.loc[:,setting_cols].nunique()) > 1:
-                sys.exit('Error in input data, more than one setting found.')
-
-            # TODO or not TODO: If the same run was done multiple times with different LIKWID benchmarks
-            # it is not clear which line to take.
-            # This could be extended to take the weighted sum or minimum of all corresponding lines.
-            # However, these timings should be identical or in the same region...
-            # Nonetheless, there could be a nicer table format to store the results ;-)
-            cores_used = df_subframe['Cores'].unique()
-            if len(cores_used) != len(df_subframe['Cores']):
-                sys.exit('Error: Multiple times computed with the same number of threads.')
-
-            plot_perf_per_core(path_to_files, fname, df_subframe, bench, saturation_limit=saturation_limit)
-
-            # Plot particular timings from table for first benchmark. Timings should be similar for FLOPS_DP and CACHES.
-            if bench[0] == list(likwid_benchmarks.keys())[0]:
-                for timing_benchmark in timing_benchmarks.items():
-                    plot_perf_per_core(path_to_files, fname, df_subframe, timing_benchmark)
-
-
-
-if __name__ == '__main__':
-    main()
-    
\ No newline at end of file
diff --git a/plot_tools/read_output.py b/plot_tools/read_output.py
deleted file mode 100644
index 6fbb502a..00000000
--- a/plot_tools/read_output.py
+++ /dev/null
@@ -1,190 +0,0 @@
-import pandas as pd
-import numpy as np
-import os
-import sys
-
-
-def main():
-
-    jobid = '1256' # provide correct slurm job id
-    problem = 5
-    nr_exp = 4
-    mod_pk = 1
-    smoother = 3
-    extrapolation = 1
-
-    nodes = 1
-    ranks = 1
-    maxCores = 14
-    cores_used = [] # will be filled automatically
-
-    path_to_perf_files_rel = '' # relative from read_output call
-    path_to_perf_files = os.path.join(os.path.dirname(__file__), os.path.join(path_to_perf_files_rel))
-
-    cols_problem = ['Problem', 'rExp', 'ModPK', 'Extrapolation']
-
-    cols_cluster = ['Nodes', 'Ranks', 'Cores']
-
-    cols_convergence = ['its', '2-norm of error', 'inf-norm of error']
-
-    cols_time = []
-    cols_time += ['t_setup', 't_build', 't_facto_Ac',
-                  't_build_P', 't_build_Asc', 't_facto_Asc']
-    cols_time += ['t_total_(fine)', 't_smoothing', 't_residual',
-                  't_restriction', 't_Ac', 't_prolongation', 't_fine_residual',
-                  't_error']
-    cols_time += ['t_applyA']
-    cols_time += ['t_coeff', 't_arr_art_att', 't_sol', 't_detDFinv', 't_trafo']
-    cols_time += ['Total_execution_time']
-
-    cols_benchmark = ['FLOPS_DP', 'CACHES']
-
-    cols = cols_problem + cols_cluster + cols_convergence + cols_time + cols_benchmark
-    cols_dtypes = {col: 'int' for col in cols_problem + cols_cluster}
-    cols_dtypes.update(
-        {col: 'double' for col in cols_convergence + cols_time + cols_benchmark})
-
-    filename_input = 'p' + str(problem) + '-r' + str(nr_exp) + '-mpk' + str(mod_pk) + '-s' + str(
-        smoother) + '-e' + str(extrapolation) + '--N' + str(nodes) + '-R' + str(ranks) + '-maxC' + str(maxCores)
-
-    err = 0
-    with open(os.path.join(path_to_perf_files, 'slurm-' + str(jobid) + '-' + filename_input + '.err')) as f:
-        line = f.readline()
-        while line and err == 0:
-            line = f.readline()
-            if ' error' in line:
-                err = 1
-                print('Error in job script.\n')
-                print(line)
-
-    if err == 0:
-
-        with open(os.path.join(path_to_perf_files, 'slurm-' + str(jobid) + '-' + filename_input + '.out')) as f:
-
-            # create empty data frame from column set
-            perf_results_df = pd.DataFrame(columns=cols)
-            # set data types for new data frame as defined above
-            for k, v in cols_dtypes.items():
-                perf_results_df[k] = perf_results_df[k].astype(v)
-
-            lines = f.readlines()
-            i = 0
-            while i < len(lines)-1:
-
-                # search for next program execution
-                if '--> Iteration' in lines[i]:
-                    while '--> Iteration' in lines[i] and '--> Iteration' in lines[i+1]:
-                        i = i+1
-                        if i > len(lines)-1:
-                            sys.exit(
-                                'End of file reached without finding output.')
-                    # iteration scheme ended:
-                    # split on empty spaces and take number of iterations in third place
-                    its = int(lines[i].split()[2][:-1])
-                    while '-norm' not in lines[i]:
-                        i = i+1
-                        if i > len(lines)-1:
-                            sys.exit(
-                                'End of file reached without finding output.')
-                    norm2 = -1
-                    if '2-norm' in lines[i]:
-                        norm2 = float(lines[i].split()[-1])
-                    else:
-                        sys.exit('Error. 2-Norm information not available.')
-                    norminf = -1
-                    if 'inf-norm' in lines[i+1]:
-                        norminf = float(lines[i+1].split()[-1])
-                    else:
-                        sys.exit('Error. Inf-Norm information not available.')
-
-                    while 't_setup' not in lines[i]:
-                        i = i+1
-                        if i > len(lines)-1:
-                            sys.exit(
-                                'End of file reached without finding output.')
-                    # global time print out, split line by line of information and add to dataframe
-                    time_dict = dict()
-                    while 'Total_execution_time' not in lines[i-1]:
-                        timings = lines[i].split()
-                        for j in range(0, len(timings), 2):
-                            # remove last char (':') from string and convert timing to floating point
-                            time_dict[timings[j][:-1]
-                                      ] = float(timings[j+1].replace(',', ''))
-                        i = i+1
-                        if i > len(lines)-1:
-                            sys.exit(
-                                'End of file reached without finding output.')
-
-                    # iterate until LIKWID printout
-                    while 'Group 1: ' not in lines[i]:
-                        i = i+1
-                        if i > len(lines)-1:
-                            sys.exit(
-                                'End of file reached without finding output.')
-                    benchmark = lines[i].split()[-1]
-                    # number of LIKWID outputs gives number of used cores
-                    cores = str.count(lines[i+2], 'HWThread')
-
-                    cores_used.append(cores)
-
-                    search_term_likwid = ''
-                    if benchmark == 'FLOPS_DP':
-                        search_term_likwid = 'DP [MFLOP/s]'
-                    elif benchmark == 'CACHES':
-                        search_term_likwid = 'Memory bandwidth [MBytes/s]'
-                    else:
-                        sys.exit('Error. Benchmark type not supported.')
-
-                    if cores > 1:
-                        search_term_likwid = search_term_likwid + ' STAT'
-
-                    while search_term_likwid not in lines[i]:
-                        i = i+1
-                        if i > len(lines)-1:
-                            sys.exit(
-                                'End of file reached without finding output.')
-
-                    benchmark_result = float(lines[i].split('|')[2].split()[0])
-
-                    # with the number of used cores determined, check if
-                    # line for this result is already present
-                    row_setting_avail = (
-                        perf_results_df[cols_problem[0]] == problem) & (
-                        perf_results_df[cols_problem[1]] == nr_exp) & (
-                        perf_results_df[cols_problem[2]] == mod_pk) & (
-                        perf_results_df[cols_cluster[0]] == nodes) & (
-                        perf_results_df[cols_cluster[1]] == ranks) & (
-                        perf_results_df[cols_cluster[2]] == cores)
-
-                    if row_setting_avail.values.sum() <= 1:  # add new line
-                        # add to all rows fix identifiers for:
-                        #   problem, exponent in r, modified coordinates, extrapolation
-                        #   number of nodes and ranks of the job
-                        # as well as variable numbers for other columns
-                        perf_results_df.loc[len(perf_results_df.index),
-                                            cols_problem + cols_cluster +
-                                            cols_convergence + [benchmark]] = [
-                            problem, nr_exp, mod_pk, extrapolation,
-                            nodes, ranks, cores, its, norm2, norminf,
-                            benchmark_result]
-                        # add timings
-                        for time_col, time_val in time_dict.items():
-                            perf_results_df.loc[len(
-                                perf_results_df.index)-1, time_col] = time_val
-                    elif row_setting_avail.values.sum() > 1:  # error
-                        sys.exit(
-                            'Error. More than two lines corresponds to criterion.'
-                            ' We can only have one line for each benchmark: FLOPS_DP and CACHES.')
-
-                i += 1
-            # end while
-
-            perf_results_df.to_csv(
-                os.path.join(
-                    path_to_perf_files, filename_input + '_benchmarks.csv'),
-                index=False)
-        # close file
-
-
-if __name__ == "__main__":
-    main()
diff --git a/src/create_grid_polar.cpp b/src/create_grid_polar.cpp
index 2d348e76..ed497b8d 100644
--- a/src/create_grid_polar.cpp
+++ b/src/create_grid_polar.cpp
@@ -1,4 +1,4 @@
-/* 
+/*
 * Copyright (C) 2019-2023 The GMGPolar Development Team
 *
 * Authors: Philippe Leleux, Christina Schwarz, Martin J. Kühn, Carola Kruse, Ulrich Rüde
@@ -141,12 +141,29 @@ void level::build_r()
 
         // edge
         int se;
-        if (gyro::icntl[Param::alpha_coeff] == 0 || gyro::icntl[Param::alpha_coeff] == 2) {
-            se = floor(nr * 0.66) - n_elems_refined / 2;
-        }
-        else {
-            se = floor(nr * 0.5) - n_elems_refined / 2;
+
+        // allow refining of the grid at r_jump, the center point of the 
+        // drop of the diffusion coefficient alpha.
+        double r_jump;
+        if (gyro::icntl[Param::alpha_coeff] == SONNENDRUCKER) {
+            // The center of the coefficient jump lies at 0.6888697651782026
+            // for backward stability with previous runs and the Matlab code, 
+            // we use 0.66 though.
+            r_jump = 0.66;
+        } else if (gyro::icntl[Param::alpha_coeff] == ZONI) {
+            r_jump = 0.4837;
+        } else if (gyro::icntl[Param::alpha_coeff] == ZONI_SHIFTED) {
+            // Choose center point of descent.
+            // a) - ln(0.5 * (alpha(0) - alpha(Rmax))):
+            //    - ln(0.5 * (np.exp(-np.tanh(-14)) - np.exp(-np.tanh(6)))) = 0.16143743821247852
+            // b) r_center = Rmax * (np.arctanh(0.16143743821247852) + 14) / 20 = 0.7081431124450334 Rmax
+            r_jump = 0.7081;
+        } else if (gyro::icntl[Param::alpha_coeff] == POISSON) {
+            r_jump = 0.5; // There is no jump for Poisson so this is an arbitrary choice
+        } else {
+            throw std::runtime_error("Unknown alpha coeff");
         }
+        se = floor(nr * r_jump) - n_elems_refined / 2;
         int ee = se + n_elems_refined;
         // takeout
         int st = ceil((double)n_elems_refined / 4.0 + 1) - 1;
diff --git a/src/gmgpolar.cpp b/src/gmgpolar.cpp
index bde3925b..dea7adc1 100644
--- a/src/gmgpolar.cpp
+++ b/src/gmgpolar.cpp
@@ -67,7 +67,7 @@ void gmgpolar::reset_timers()
     t_build_P       = 0;
     t_build_Asc     = 0;
     t_facto_Asc     = 0;
-    t_total         = 0;
+    t_total_mgcycle         = 0;
     t_smoothing     = 0;
     t_residual      = 0;
     t_restriction   = 0;
diff --git a/src/gyro.cpp b/src/gyro.cpp
index 23bfdb57..d70ec556 100644
--- a/src/gyro.cpp
+++ b/src/gyro.cpp
@@ -362,11 +362,8 @@ std::vector<double> gyro::def_solution_rt(double r_i, std::vector<double> theta,
 } /* ----- end of gyro::eval_def_solution_vec ----- */
 
 /***************************************************************************
-     * Diffusivity and operator
-     **************************************************************************/
-/************************
- * Single
- ************************/
+* Diffusivity and operator
+**************************************************************************/
 /*!
  *  \brief Diffusivity coefficient
  *
@@ -380,7 +377,7 @@ std::vector<double> gyro::def_solution_rt(double r_i, std::vector<double> theta,
  * \return the coefficient
  *
  */
-double gyro::coeff(double r, int verbose)
+double gyro::coeff_alpha(double r, int verbose)
 {
     double t;
     TIC;
@@ -398,11 +395,7 @@ double gyro::coeff(double r, int verbose)
 } /* ----- end of level::coeff ----- */
 
 /*!
- *  \brief Diffusivity coefficient
- *
- * Computes the diffusivity coefficient depending on the radius r
- * - for prob 3 or 5: alpha(r)=(2.0/(2.6+3.14)) * (1.3 + atan((1-1.3*r/Rmax) / 0.09))
- * - else: 1
+ *  \brief Beta coefficient
  * 
  * \param r: the r coordinate
  * \param verbose: verbose level for debug
@@ -440,7 +433,7 @@ double gyro::coeff_beta(double r, int verbose)
  * \return the coefficient
  *
  */
-std::vector<double> gyro::coeff(std::vector<double> r, int verbose)
+std::vector<double> gyro::coeff_alpha(std::vector<double> r, int verbose)
 {
     double t;
     TIC;
@@ -460,11 +453,7 @@ std::vector<double> gyro::coeff(std::vector<double> r, int verbose)
 } /* ----- end of level::coeff ----- */
 
 /*!
- *  \brief Diffusivity coefficient
- *
- * Computes the diffusivity coefficient depending on the radius r
- * - for prob 3 or 5: alpha(r)=(2.0/(2.6+3.14)) * (1.3 + atan((1-1.3*r/Rmax) / 0.09))
- * - else: 1
+ *  \brief Beta coefficient
  * 
  * \param r: the r coordinate
  * \param verbose: verbose level for debug
@@ -529,6 +518,7 @@ double gyro::detDFinv(double r, double theta, int verbose)
     }
     return detDFinv_r;
 } /* ----- end of level::detDFinv ----- */
+
 double gyro::arr(double r, double theta, int verbose)
 {
     double t;
@@ -539,7 +529,7 @@ double gyro::arr(double r, double theta, int verbose)
     double Rmax      = dcntl[Param::R];
 
     double detDFinv_r = detDFinv(r, theta, verbose);
-    double coeff_r    = coeff(r, verbose);
+    double coeff_r    = coeff_alpha(r, verbose);
     double Jrt        = functions->J_rt(r, theta, kappa_eps, delta_e, Rmax);
     double Jtt        = functions->J_tt(r, theta, kappa_eps, delta_e, Rmax);
     double arr_r      = 0.5 * (Jtt * Jtt + Jrt * Jrt) * coeff_r / fabs(detDFinv_r);
@@ -551,6 +541,7 @@ double gyro::arr(double r, double theta, int verbose)
     }
     return arr_r;
 } /* ----- end of level::arr ----- */
+
 double gyro::art(double r, double theta, int verbose)
 {
     double t;
@@ -561,7 +552,7 @@ double gyro::art(double r, double theta, int verbose)
     double Rmax      = dcntl[Param::R];
 
     double detDFinv_r = detDFinv(r, theta, verbose);
-    double coeff_r    = coeff(r, verbose);
+    double coeff_r    = coeff_alpha(r, verbose);
     double Jrr        = functions->J_rr(r, theta, kappa_eps, delta_e, Rmax);
     double Jrt        = functions->J_rt(r, theta, kappa_eps, delta_e, Rmax);
     double Jtr        = functions->J_tr(r, theta, kappa_eps, delta_e, Rmax);
@@ -575,6 +566,7 @@ double gyro::art(double r, double theta, int verbose)
     }
     return art_r;
 } /* ----- end of level::art ----- */
+
 double gyro::att(double r, double theta, int verbose)
 {
     double t;
@@ -585,7 +577,7 @@ double gyro::att(double r, double theta, int verbose)
     double Rmax      = dcntl[Param::R];
 
     double detDFinv_r = detDFinv(r, theta, verbose);
-    double coeff_r    = coeff(r, verbose);
+    double coeff_r    = coeff_alpha(r, verbose);
     double Jrr        = functions->J_rr(r, theta, kappa_eps, delta_e, Rmax);
     double Jtr        = functions->J_tr(r, theta, kappa_eps, delta_e, Rmax);
     double att_r      = 0.5 * (Jtr * Jtr + Jrr * Jrr) * coeff_r / fabs(detDFinv_r);
@@ -597,6 +589,7 @@ double gyro::att(double r, double theta, int verbose)
     }
     return att_r;
 } /* ----- end of level::att ----- */
+
 void gyro::arr_att_art(double r, double theta, double& arr, double& att, double& art, int verbose)
 {
     double t;
@@ -607,7 +600,7 @@ void gyro::arr_att_art(double r, double theta, double& arr, double& att, double&
     double Rmax      = dcntl[Param::R];
 
     double detDFinv_r = detDFinv(r, theta, verbose);
-    double coeff_r    = coeff(r, verbose);
+    double coeff_r    = coeff_alpha(r, verbose);
     double Jrr        = functions->J_rr(r, theta, kappa_eps, delta_e, Rmax);
     double Jrt        = functions->J_rt(r, theta, kappa_eps, delta_e, Rmax);
     double Jtr        = functions->J_tr(r, theta, kappa_eps, delta_e, Rmax);
@@ -741,7 +734,7 @@ std::vector<double> gyro::arr(double r, std::vector<double> theta, std::vector<d
     std::vector<double> Jtt(ntheta);
     functions->J_rt(r, theta, kappa_eps, delta_e, Rmax, Jrt, sin_theta, cos_theta);
     functions->J_tt(r, theta, kappa_eps, delta_e, Rmax, Jtt, sin_theta, cos_theta);
-    double coeff_r = coeff(r, verbose);
+    double coeff_r = coeff_alpha(r, verbose);
     ;
     for (int i = 0; i < ntheta; i++) {
         arr_r[i] = 0.5 * (Jtt[i] * Jtt[i] + Jrt[i] * Jrt[i]) * coeff_r / fabs(detDFinv_r[i]);
@@ -777,7 +770,7 @@ std::vector<double> gyro::art(double r, std::vector<double> theta, std::vector<d
     functions->J_rt(r, theta, kappa_eps, delta_e, Rmax, Jrt, sin_theta, cos_theta);
     functions->J_tr(r, theta, kappa_eps, delta_e, Rmax, Jtr, sin_theta, cos_theta);
     functions->J_tt(r, theta, kappa_eps, delta_e, Rmax, Jtt, sin_theta, cos_theta);
-    double coeff_r = coeff(r, verbose);
+    double coeff_r = coeff_alpha(r, verbose);
     ;
     for (int i = 0; i < ntheta; i++) {
         art_r[i] = -0.25 * (Jtt[i] * Jtr[i] + Jrt[i] * Jrr[i]) * coeff_r / fabs(detDFinv_r[i]);
@@ -811,7 +804,7 @@ std::vector<double> gyro::att(double r, std::vector<double> theta, std::vector<d
     std::vector<double> Jtt(ntheta);
     functions->J_rr(r, theta, kappa_eps, delta_e, Rmax, Jrr, sin_theta, cos_theta);
     functions->J_tr(r, theta, kappa_eps, delta_e, Rmax, Jtr, sin_theta, cos_theta);
-    double coeff_r = coeff(r, verbose);
+    double coeff_r = coeff_alpha(r, verbose);
     ;
     for (int i = 0; i < ntheta; i++) {
         att_r[i] = 0.5 * (Jtr[i] * Jtr[i] + Jrr[i] * Jrr[i]) * coeff_r / fabs(detDFinv_r[i]);
@@ -850,7 +843,7 @@ void gyro::arr_att_art(double r, std::vector<double> theta, std::vector<double>
     functions->J_rt(r, theta, kappa_eps, delta_e, Rmax, Jrt, sin_theta, cos_theta);
     functions->J_tr(r, theta, kappa_eps, delta_e, Rmax, Jtr, sin_theta, cos_theta);
     functions->J_tt(r, theta, kappa_eps, delta_e, Rmax, Jtt, sin_theta, cos_theta);
-    double coeff_r = coeff(r, verbose);
+    double coeff_r = coeff_alpha(r, verbose);
     for (int i = 0; i < ntheta; i++) {
         double coeff = coeff_r / fabs(detDFinv_r[i]);
         arr[i]       = 0.5 * (Jtt[i] * Jtt[i] + Jrt[i] * Jrt[i]) * coeff;
@@ -888,7 +881,7 @@ void gyro::arr_att_art(std::vector<double> r, double theta, std::vector<double>&
     std::vector<double> Jrt(size);
     std::vector<double> Jtr(size);
     std::vector<double> Jtt(size);
-    std::vector<double> coeff_r = coeff(r, verbose);
+    std::vector<double> coeff_r = coeff_alpha(r, verbose);
     functions->J_rr(r, theta, kappa_eps, delta_e, Rmax, Jrr);
     functions->J_rt(r, theta, kappa_eps, delta_e, Rmax, Jrt);
     functions->J_tr(r, theta, kappa_eps, delta_e, Rmax, Jtr);
diff --git a/src/main.cpp b/src/main.cpp
index 3b1c8b9e..5e18b313 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -53,6 +53,10 @@
  */
 int main(int argc, char* argv[])
 {
+#ifdef GMGPOLAR_USE_LIKWID
+    LIKWID_MARKER_INIT;
+#endif
+
     int error = 0;
 
     ////////////////////////////////////////////////////////////////////////////////
@@ -227,7 +231,7 @@ int main(int argc, char* argv[])
             error = 1;
         }
         // if (gyro::icntl[Param::verbose] > 0)
-        std::cout << "Total_execution_time: " << TOC
+        std::cout << "Total execution time: " << TOC
                   << "\n---------------------------------------------------------------------------\n";
     }
 
@@ -287,9 +291,20 @@ int main(int argc, char* argv[])
         TIC;
         gmgpolar gmg;
         try {
+#ifdef GMGPOLAR_USE_LIKWID            
+#pragma omp parallel
+{
+    LIKWID_MARKER_START("create_grid_polar");
+}
+#endif
             // Create polar grids
             gmg.create_grid_polar();
-
+#ifdef GMGPOLAR_USE_LIKWID            
+#pragma omp parallel
+{
+    LIKWID_MARKER_STOP("create_grid_polar");
+}
+#endif
             // Solve using multigrid
             gmg.polar_multigrid();
         }
@@ -305,7 +320,7 @@ int main(int argc, char* argv[])
             error = 1;
         }
         if (gyro::icntl[Param::verbose] > 0)
-            std::cout << "Total_execution_time: " << TOC << "\n";
+            std::cout << "Total execution time: " << TOC << "\n";
 
         std::cout << "\n\n\n\n\n";
 
@@ -375,7 +390,7 @@ int main(int argc, char* argv[])
                     error = 1;
                 }
                 if (gyro::icntl[Param::verbose] > 0)
-                    std::cout << "Total_execution_time: " << TOC << "\n";
+                    std::cout << "Total execution time: " << TOC << "\n";
             }
             // }
         }
@@ -454,13 +469,17 @@ int main(int argc, char* argv[])
                                 error = 1;
                             }
                             if (gyro::icntl[Param::verbose] > 0)
-                                std::cout << "Total_execution_time: " << TOC << "\n";
+                                std::cout << "Total execution time: " << TOC << "\n";
                         }
                     }
                 }
             }
         }
     }
-  
+
+#ifdef GMGPOLAR_USE_LIKWID
+    LIKWID_MARKER_CLOSE;
+#endif
+
     return error;
 } /* ----- end of main ----- */
diff --git a/src/multigrid_iter.cpp b/src/multigrid_iter.cpp
index b33b39a5..4e5b960e 100644
--- a/src/multigrid_iter.cpp
+++ b/src/multigrid_iter.cpp
@@ -104,6 +104,12 @@ void gmgpolar::multigrid_iter()
     t_fine_residual += TOC;
     TIC;
 
+#ifdef GMGPOLAR_USE_LIKWID
+#pragma omp parallel
+{
+    LIKWID_MARKER_START("Iteration");
+}
+#endif
     //! Start the Multigrid-Iteration
     while (it < gyro::icntl[Param::maxiter] && convergence_criterium > rel_red_conv) {
         it++;
@@ -187,7 +193,13 @@ void gmgpolar::multigrid_iter()
         }
         t_fine_residual += TOC;
         TIC;
-    }
+    }    
+#ifdef GMGPOLAR_USE_LIKWID
+#pragma omp parallel
+{
+    LIKWID_MARKER_STOP("Iteration");
+}    
+#endif
     if (gyro::icntl[Param::verbose] > 0) {
         if (it == gyro::icntl[Param::maxiter]) {
             std::cout << "Multigrid reached maxiter=" << gyro::icntl[Param::maxiter] << "\n";
@@ -295,7 +307,7 @@ void gmgpolar::multigrid_cycle_extrapol(int l)
                 v_level[l]->multigrid_smoothing0(smoother);
 
                 if (gyro::icntl[Param::verbose] > 4)
-                    std::cout << "SMOOTHER: " << smoother << " = " << TOC << "\n";
+                    std::cout << "Finishing running of pre-smoother:  " << smoother << " (" << TOC << " s)\n";
             }
         }
         else {
@@ -320,7 +332,7 @@ void gmgpolar::multigrid_cycle_extrapol(int l)
                                 v_level[l]->dep_Asc_ortho[smoother]);
 
                             if (gyro::icntl[Param::verbose] > 4)
-                                std::cout << "SMOOTHER: " << smoother << " = " << TOC << "\n";
+                                std::cout << "Finishing running of pre-smoother: " << smoother << " (" << TOC << " s)\n";
                         }
                     }
                 } // omp single
@@ -341,21 +353,25 @@ void gmgpolar::multigrid_cycle_extrapol(int l)
     }
     //std::cout << "pre-smoothing done \n";
 
-    if (gyro::icntl[Param::verbose] > 5)
-        gyro::disp(v_level[l]->u, "u");
-
     t = t_smoothing_tmp;
     t_smoothing += TOC;
+    
+    if (gyro::icntl[Param::verbose] > 5){ // no timing of large output
+        gyro::disp(v_level[l]->u, "u");
+    }
+
     TIC;
 
     //! compute residual (of level l)
     //even if we have extrapolation, compute just normal residual (extrapolation-restriction follows in the next step)
     gmgpolar::compute_residual(l, 0);
 
-    if (gyro::icntl[Param::verbose] > 5)
+    t_residual += TOC;
+
+    if (gyro::icntl[Param::verbose] > 5){ // no timing of large output
         gyro::disp(v_level[l]->res, "res");
+    }
 
-    t_residual += TOC;
     TIC;
 
     //! Restriction of residual (coarsening)
@@ -411,9 +427,14 @@ void gmgpolar::multigrid_cycle_extrapol(int l)
 
         if (gyro::icntl[Param::matrix_free] == 1) {
             if (gyro::icntl[Param::optimized] == 0)
+            {
                 v_level[l + 1]->apply_A0(u_coarse, Au_coarse);
-            else
+            }else{
+                double start = omp_get_wtime();
                 v_level[l + 1]->apply_A(u_coarse, Au_coarse);
+                double end = omp_get_wtime();
+                t_applyA += (end - start);                
+            }
         }
         else {
             if (gyro::icntl[Param::openmp] == 1) {
@@ -471,12 +492,13 @@ void gmgpolar::multigrid_cycle_extrapol(int l)
     t_restriction += TOC;
     TIC;
 
-    //! iterative call of multigrid_cycle_extrapol
-    v_level[l + 1]->u.assign(v_level[l + 1]->m, 0); //zero u in every iteration
+    //! recursive call of multigrid_cycle_extrapol
+    v_level[l + 1]->u.assign(v_level[l + 1]->m, 0); // zero u in every iteration
     std::vector<double> error_coarse;
     if (l == levels - 2) {
-//solve exactly on the coarsest level for the error (A * error = res) (use whole A from coarsest level)
-// check for the second coarsest level (levels-2), because we have no smoothing on the coarsest level (and thus no Asc and no Asc_ortho)
+    // exact solve on the coarsest level for the error (A * error = res) (use whole A from coarsest level)
+    // check for the second to coarsest level (levels-2), as no smoothing on the coarsest level exists
+    // (and thus no Asc and no Asc_ortho)
 #ifdef GMGPOLAR_USE_MUMPS
         if (gyro::icntl[Param::optimized] == 0) {
 #endif
@@ -492,7 +514,7 @@ void gmgpolar::multigrid_cycle_extrapol(int l)
     }
     else {
         multigrid_cycle_extrapol(l + 1);
-        error_coarse = v_level[l + 1]->u; //the coarse_error on level l is u on level l+1
+        error_coarse = v_level[l + 1]->u; // the coarse_error on level l is u on level l+1
         TIC;
     }
 
@@ -588,7 +610,7 @@ void gmgpolar::multigrid_cycle_extrapol(int l)
                 v_level[l]->multigrid_smoothing0(smoother);
 
                 if (gyro::icntl[Param::verbose] > 4)
-                    std::cout << "SMOOTHER: " << smoother << " = " << TOC << "\n";
+                    std::cout << "Finishing running of post-smoother: " << smoother << " (" << TOC << " s)\n";
             }
         }
         else {
@@ -613,7 +635,7 @@ void gmgpolar::multigrid_cycle_extrapol(int l)
                                 v_level[l]->dep_Asc_ortho[smoother]);
 
                             if (gyro::icntl[Param::verbose] > 4)
-                                std::cout << "SMOOTHER: " << smoother << " = " << TOC << "\n";
+                                std::cout << "Finishing running of post-smoother: " << smoother << " ( " << TOC << " s)\n";
                         }
                     }
                 } // omp single
@@ -626,7 +648,7 @@ void gmgpolar::multigrid_cycle_extrapol(int l)
     TIC;
 
     t = t_total_tmp;
-    t_total += TOC;
+    t_total_mgcycle += TOC;
 
     if (gyro::icntl[Param::verbose] > 5)
         gyro::disp(v_level[l]->u, "u");
@@ -653,7 +675,6 @@ void gmgpolar::compute_residual(int l, int extrapol)
         else {
             double start = omp_get_wtime();
             v_level[l]->apply_A(v_level[l]->u, Au);
-
             double end = omp_get_wtime();
             t_applyA += (end - start);
         }
@@ -725,10 +746,15 @@ void gmgpolar::compute_residual(int l, int extrapol)
         //apply A(l+1) to Pu
         std::vector<double> APu(v_level[l + 1]->m, 0);
         if (gyro::icntl[Param::matrix_free] == 1) {
-            if (gyro::icntl[Param::optimized] == 0)
+            if (gyro::icntl[Param::optimized] == 0) {
                 v_level[l + 1]->apply_A0(Pu, APu); //APu = A(l+1) * Pu
-            else
+            }
+            else {
+                double start = omp_get_wtime();
                 v_level[l + 1]->apply_A(Pu, APu); //APu = A(l+1) * Pu
+                double end = omp_get_wtime();
+                t_applyA += (end - start);
+            }
         }
         else {
             if (gyro::icntl[Param::openmp] == 1) {
diff --git a/src/polar_multigrid.cpp b/src/polar_multigrid.cpp
index 8c79e02f..ab0f5add 100644
--- a/src/polar_multigrid.cpp
+++ b/src/polar_multigrid.cpp
@@ -131,10 +131,23 @@ void gmgpolar::polar_multigrid()
         std::cout << "t_applyA: " << t_applyA << std::endl;
     }
     else {
-        if (gyro::icntl[Param::verbose] > 3)
+        if (gyro::icntl[Param::verbose] > 3){
             std::cout
                 << "Building discretized system, restriction and interpolation operators, and defining splittings...\n";
+        }
+#ifdef GMGPOLAR_USE_LIKWID        
+#pragma omp parallel
+{
+    LIKWID_MARKER_START("Setup");
+}        
+#endif
         prepare_op_levels();
+#ifdef GMGPOLAR_USE_LIKWID        
+#pragma omp parallel
+{
+    LIKWID_MARKER_STOP("Setup");
+}        
+#endif
 
         int m = v_level[0]->m;
 
@@ -171,34 +184,41 @@ void gmgpolar::polar_multigrid()
             if (gyro::icntl[Param::verbose] > 1) {
                 for (int l = 0; l < levels; l++) {
                     std::cout << "LEVEL " << l << "\n";
-                    std::cout << "\nt_smoothing: " << v_level[l]->t_smoothing << ", t_f_sc: " << v_level[l]->t_f_sc
+                    std::cout << "\tSmoothing: " << v_level[l]->t_smoothing << ", t_f_sc: " << v_level[l]->t_f_sc
                               << ", t_Asc_ortho: " << v_level[l]->t_Asc_ortho << ", t_Asc: " << v_level[l]->t_Asc
                               << "\n";
-                    std::cout << "\nt_get_ptr: " << v_level[l]->t_get_ptr
+                    std::cout << "\tt_get_ptr: " << v_level[l]->t_get_ptr
                               << ", t_get_stencil: " << v_level[l]->t_get_stencil
                               << ", t_get_smoother: " << v_level[l]->t_get_smoother
-                              << ", t_get_row: " << v_level[l]->t_get_row << "\n";
+                              << ", t_get_row: " << v_level[l]->t_get_row;
                     std::cout << "\n";
                 }
             }
 
             if (gyro::icntl[Param::verbose] > 1) {
-                std::cout << "\nt_setup: " << t_setup << ", t_build: " << t_build << ", t_facto_Ac: " << t_facto_Ac
-                          << ", t_build_P: " << t_build_P << ", t_build_Asc: " << t_build_Asc
-                          << ", t_facto_Asc: " << t_facto_Asc << "\n";
-                std::cout << "t_total_(fine): " << t_total << ", t_smoothing: " << t_smoothing
-                          << ", t_residual: " << t_residual << ", t_restriction: " << t_restriction
-                          << ", t_Ac: " << t_Ac << ", t_prolongation: " << t_prolongation
-                          << ", t_fine_residual: " << t_fine_residual << ", t_error: " << t_error << "\n";
-                std::cout << "t_applyA: " << t_applyA << std::endl;
+                std::cout << "\nTotal setup: " << t_setup << "\n\tBuilding system matrix A and RHS: " << t_build
+                          << "\n\tFactorization of coarse operator Ac: " << t_facto_Ac << "\n\tBuilding intergrid operators (e.g. projections): " << t_build_P
+                          << "\n\tBuilding smoothing operators A_sc: " << t_build_Asc << "\n\tFactorizing smoothing operators A_sc: " << t_facto_Asc << "\n";
+                std::cout << "Total multigrid cycle: " << t_total_mgcycle << "\n\tComplete smoothing: " << t_smoothing
+                          << "\n\tComputing residual: " << t_residual << "\n\tApplying restriction: " << t_restriction
+                          << "\n\tSolve coarse system: " << t_Ac << "\n\tApplying prolongation (+ coarse grid correction): " << t_prolongation
+                          << "\nComputing residual on finest level: " << t_fine_residual;
+                if (gyro::icntl[Param::check_error] == 1) {
+                           std::cout << "\nComputing final error: " << t_error;
+                }
+                std::cout << "\nTotal application of A: " << t_applyA;
+                std::cout << "\n";
             }
 
             if (gyro::icntl[Param::verbose] > 1) {
-                std::cout << "\nt_coeff: " << gyro::dcntl[Param::t_coeff]
-                          << ", t_arr_art_att: " << gyro::dcntl[Param::t_arr_art_att]
-                          << ", t_sol: " << gyro::dcntl[Param::t_sol]
-                          << ", t_detDFinv: " << gyro::dcntl[Param::t_detDFinv]
-                          << ", t_trafo: " << gyro::dcntl[Param::t_trafo] << "\n";
+                std::cout << "\nEvaluation of arr, art, and att: " << gyro::dcntl[Param::t_arr_art_att]
+                          << "\n\tEvaluation of alpha and beta: " << gyro::dcntl[Param::t_coeff]
+                          << "\n\tComputing determinant of Jacobian of inverse mapping: " << gyro::dcntl[Param::t_detDFinv];
+                    if (gyro::icntl[Param::check_error] == 1) {
+                          std::cout <<  "\nComputing exact solution: " << gyro::dcntl[Param::t_sol];
+                    }
+                          
+                std::cout << "\n";
             }
         }
     }
diff --git a/src/test_cases/CartesianR2GyroZoniShiftedCircular.cpp b/src/test_cases/CartesianR2GyroZoniShiftedCircular.cpp
index 12d4525d..92a37a45 100644
--- a/src/test_cases/CartesianR2GyroZoniShiftedCircular.cpp
+++ b/src/test_cases/CartesianR2GyroZoniShiftedCircular.cpp
@@ -257,12 +257,12 @@ void CartesianR2GyroZoniShiftedCircular::rho_pole(double r, std::vector<double>
 }
 /*........................................*/
 double CartesianR2GyroZoniShiftedCircular::coeffs1(double r, double Rmax) const
-{
+{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249
     return exp(-tanh(20.0 * (r/Rmax) - 14.0));
 }
 /*........................................*/
 void CartesianR2GyroZoniShiftedCircular::coeffs1(std::vector<double> const& r, double Rmax, std::vector<double>& sol) const
-{
+{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249
     for (std::size_t i=0; i < sol.size(); ++i)
     {
         sol[i] = exp(-tanh(20.0 * (r[i]/Rmax) - 14.0));
diff --git a/src/test_cases/CartesianR2GyroZoniShiftedShafranov.cpp b/src/test_cases/CartesianR2GyroZoniShiftedShafranov.cpp
index e0069469..8d992420 100644
--- a/src/test_cases/CartesianR2GyroZoniShiftedShafranov.cpp
+++ b/src/test_cases/CartesianR2GyroZoniShiftedShafranov.cpp
@@ -257,12 +257,12 @@ void CartesianR2GyroZoniShiftedShafranov::rho_pole(double r, std::vector<double>
 }
 /*........................................*/
 double CartesianR2GyroZoniShiftedShafranov::coeffs1(double r, double Rmax) const
-{
+{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249
     return exp(-tanh(20.0 * (r/Rmax) - 14.0));
 }
 /*........................................*/
 void CartesianR2GyroZoniShiftedShafranov::coeffs1(std::vector<double> const& r, double Rmax, std::vector<double>& sol) const
-{
+{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249
     for (std::size_t i=0; i < sol.size(); ++i)
     {
         sol[i] = exp(-tanh(20.0 * (r[i]/Rmax) - 14.0));
diff --git a/src/test_cases/CartesianR2GyroZoniShiftedTriangular.cpp b/src/test_cases/CartesianR2GyroZoniShiftedTriangular.cpp
index 863db829..b37c88e1 100644
--- a/src/test_cases/CartesianR2GyroZoniShiftedTriangular.cpp
+++ b/src/test_cases/CartesianR2GyroZoniShiftedTriangular.cpp
@@ -257,12 +257,12 @@ void CartesianR2GyroZoniShiftedTriangular::rho_pole(double r, std::vector<double
 }
 /*........................................*/
 double CartesianR2GyroZoniShiftedTriangular::coeffs1(double r, double Rmax) const
-{
+{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249
     return exp(-tanh(20.0 * (r/Rmax) - 14.0));
 }
 /*........................................*/
 void CartesianR2GyroZoniShiftedTriangular::coeffs1(std::vector<double> const& r, double Rmax, std::vector<double>& sol) const
-{
+{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249
     for (std::size_t i=0; i < sol.size(); ++i)
     {
         sol[i] = exp(-tanh(20.0 * (r[i]/Rmax) - 14.0));
diff --git a/src/test_cases/CartesianR2ZoniShiftedCircular.cpp b/src/test_cases/CartesianR2ZoniShiftedCircular.cpp
index 90b7a50c..7c7c31d9 100644
--- a/src/test_cases/CartesianR2ZoniShiftedCircular.cpp
+++ b/src/test_cases/CartesianR2ZoniShiftedCircular.cpp
@@ -257,12 +257,12 @@ void CartesianR2ZoniShiftedCircular::rho_pole(double r, std::vector<double> cons
 }
 /*........................................*/
 double CartesianR2ZoniShiftedCircular::coeffs1(double r, double Rmax) const
-{
+{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249
     return exp(-tanh(20.0 * (r/Rmax) - 14.0));
 }
 /*........................................*/
 void CartesianR2ZoniShiftedCircular::coeffs1(std::vector<double> const& r, double Rmax, std::vector<double>& sol) const
-{
+{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249
     for (std::size_t i=0; i < sol.size(); ++i)
     {
         sol[i] = exp(-tanh(20.0 * (r[i]/Rmax) - 14.0));
diff --git a/src/test_cases/CartesianR2ZoniShiftedShafranov.cpp b/src/test_cases/CartesianR2ZoniShiftedShafranov.cpp
index dd21e61f..6af2ef5d 100644
--- a/src/test_cases/CartesianR2ZoniShiftedShafranov.cpp
+++ b/src/test_cases/CartesianR2ZoniShiftedShafranov.cpp
@@ -257,12 +257,12 @@ void CartesianR2ZoniShiftedShafranov::rho_pole(double r, std::vector<double> con
 }
 /*........................................*/
 double CartesianR2ZoniShiftedShafranov::coeffs1(double r, double Rmax) const
-{
+{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249
     return exp(-tanh(20.0 * (r/Rmax) - 14.0));
 }
 /*........................................*/
 void CartesianR2ZoniShiftedShafranov::coeffs1(std::vector<double> const& r, double Rmax, std::vector<double>& sol) const
-{
+{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249
     for (std::size_t i=0; i < sol.size(); ++i)
     {
         sol[i] = exp(-tanh(20.0 * (r[i]/Rmax) - 14.0));
diff --git a/src/test_cases/CartesianR2ZoniShiftedTriangular.cpp b/src/test_cases/CartesianR2ZoniShiftedTriangular.cpp
index 6c78e82e..21f2d195 100644
--- a/src/test_cases/CartesianR2ZoniShiftedTriangular.cpp
+++ b/src/test_cases/CartesianR2ZoniShiftedTriangular.cpp
@@ -257,12 +257,12 @@ void CartesianR2ZoniShiftedTriangular::rho_pole(double r, std::vector<double> co
 }
 /*........................................*/
 double CartesianR2ZoniShiftedTriangular::coeffs1(double r, double Rmax) const
-{
+{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249
     return exp(-tanh(20.0 * (r/Rmax) - 14.0));
 }
 /*........................................*/
 void CartesianR2ZoniShiftedTriangular::coeffs1(std::vector<double> const& r, double Rmax, std::vector<double>& sol) const
-{
+{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249
     for (std::size_t i=0; i < sol.size(); ++i)
     {
         sol[i] = exp(-tanh(20.0 * (r[i]/Rmax) - 14.0));
diff --git a/src/test_cases/CartesianR6GyroZoniShiftedShafranov.cpp b/src/test_cases/CartesianR6GyroZoniShiftedShafranov.cpp
index 59c024e1..9e9c4700 100644
--- a/src/test_cases/CartesianR6GyroZoniShiftedShafranov.cpp
+++ b/src/test_cases/CartesianR6GyroZoniShiftedShafranov.cpp
@@ -258,12 +258,12 @@ void CartesianR6GyroZoniShiftedShafranov::rho_pole(double r, std::vector<double>
 }
 /*........................................*/
 double CartesianR6GyroZoniShiftedShafranov::coeffs1(double r, double Rmax) const
-{
+{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249
     return exp(-tanh(20.0 * (r/Rmax) - 14.0));
 }
 /*........................................*/
 void CartesianR6GyroZoniShiftedShafranov::coeffs1(std::vector<double> const& r, double Rmax, std::vector<double>& sol) const
-{
+{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249
     for (std::size_t i=0; i < sol.size(); ++i)
     {
         sol[i] = exp(-tanh(20.0 * (r[i]/Rmax) - 14.0));
diff --git a/src/test_cases/CartesianR6ZoniShiftedCircular.cpp b/src/test_cases/CartesianR6ZoniShiftedCircular.cpp
index ff90dfd1..0ce60df7 100644
--- a/src/test_cases/CartesianR6ZoniShiftedCircular.cpp
+++ b/src/test_cases/CartesianR6ZoniShiftedCircular.cpp
@@ -258,12 +258,12 @@ void CartesianR6ZoniShiftedCircular::rho_pole(double r, std::vector<double> cons
 }
 /*........................................*/
 double CartesianR6ZoniShiftedCircular::coeffs1(double r, double Rmax) const
-{
+{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249
     return exp(-tanh(20.0 * (r/Rmax) - 14.0));
 }
 /*........................................*/
 void CartesianR6ZoniShiftedCircular::coeffs1(std::vector<double> const& r, double Rmax, std::vector<double>& sol) const
-{
+{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249
     for (std::size_t i=0; i < sol.size(); ++i)
     {
         sol[i] = exp(-tanh(20.0 * (r[i]/Rmax) - 14.0));
diff --git a/src/test_cases/CartesianR6ZoniShiftedShafranov.cpp b/src/test_cases/CartesianR6ZoniShiftedShafranov.cpp
index 390a3d20..c225079d 100644
--- a/src/test_cases/CartesianR6ZoniShiftedShafranov.cpp
+++ b/src/test_cases/CartesianR6ZoniShiftedShafranov.cpp
@@ -258,12 +258,12 @@ void CartesianR6ZoniShiftedShafranov::rho_pole(double r, std::vector<double> con
 }
 /*........................................*/
 double CartesianR6ZoniShiftedShafranov::coeffs1(double r, double Rmax) const
-{
+{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249
     return exp(-tanh(20.0 * (r/Rmax) - 14.0));
 }
 /*........................................*/
 void CartesianR6ZoniShiftedShafranov::coeffs1(std::vector<double> const& r, double Rmax, std::vector<double>& sol) const
-{
+{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249
     for (std::size_t i=0; i < sol.size(); ++i)
     {
         sol[i] = exp(-tanh(20.0 * (r[i]/Rmax) - 14.0));
diff --git a/src/test_cases/CartesianR6ZoniShiftedTriangular.cpp b/src/test_cases/CartesianR6ZoniShiftedTriangular.cpp
index f6eb3ed4..7a8098e5 100644
--- a/src/test_cases/CartesianR6ZoniShiftedTriangular.cpp
+++ b/src/test_cases/CartesianR6ZoniShiftedTriangular.cpp
@@ -258,12 +258,12 @@ void CartesianR6ZoniShiftedTriangular::rho_pole(double r, std::vector<double> co
 }
 /*........................................*/
 double CartesianR6ZoniShiftedTriangular::coeffs1(double r, double Rmax) const
-{
+{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249
     return exp(-tanh(20.0 * (r/Rmax) - 14.0));
 }
 /*........................................*/
 void CartesianR6ZoniShiftedTriangular::coeffs1(std::vector<double> const& r, double Rmax, std::vector<double>& sol) const
-{
+{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249
     for (std::size_t i=0; i < sol.size(); ++i)
     {
         sol[i] = exp(-tanh(20.0 * (r[i]/Rmax) - 14.0));
diff --git a/src/test_cases/PolarR6GyroZoniShiftedCircular.cpp b/src/test_cases/PolarR6GyroZoniShiftedCircular.cpp
index 858a7f5e..fc0e8a60 100644
--- a/src/test_cases/PolarR6GyroZoniShiftedCircular.cpp
+++ b/src/test_cases/PolarR6GyroZoniShiftedCircular.cpp
@@ -258,12 +258,12 @@ void PolarR6GyroZoniShiftedCircular::rho_pole(double r, std::vector<double> cons
 }
 /*........................................*/
 double PolarR6GyroZoniShiftedCircular::coeffs1(double r, double Rmax) const
-{
+{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249
     return exp(-tanh(20.0 * (r/Rmax) - 14.0));
 }
 /*........................................*/
 void PolarR6GyroZoniShiftedCircular::coeffs1(std::vector<double> const& r, double Rmax, std::vector<double>& sol) const
-{
+{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249
     for (std::size_t i=0; i < sol.size(); ++i)
     {
         sol[i] = exp(-tanh(20.0 * (r[i]/Rmax) - 14.0));
diff --git a/src/test_cases/PolarR6GyroZoniShiftedCulham.cpp b/src/test_cases/PolarR6GyroZoniShiftedCulham.cpp
index a1b72161..f3743745 100644
--- a/src/test_cases/PolarR6GyroZoniShiftedCulham.cpp
+++ b/src/test_cases/PolarR6GyroZoniShiftedCulham.cpp
@@ -572,12 +572,12 @@ void PolarR6GyroZoniShiftedCulham::J_yq(double r, std::vector<double> const& the
 }
 /*........................................*/
 double PolarR6GyroZoniShiftedCulham::coeffs1(double r, double Rmax) const
-{
+{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249
     return exp(-tanh(20.0 * (r/Rmax) - 14.0));
 }
 /*........................................*/
 void PolarR6GyroZoniShiftedCulham::coeffs1(std::vector<double> const& r, double Rmax, std::vector<double>& sol) const
-{
+{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249
     for (std::size_t i=0; i < sol.size(); ++i)
     {
         sol[i] = exp(-tanh(20.0 * (r[i]/Rmax) - 14.0));
diff --git a/src/test_cases/PolarR6GyroZoniShiftedShafranov.cpp b/src/test_cases/PolarR6GyroZoniShiftedShafranov.cpp
index 99d7520e..7975c455 100644
--- a/src/test_cases/PolarR6GyroZoniShiftedShafranov.cpp
+++ b/src/test_cases/PolarR6GyroZoniShiftedShafranov.cpp
@@ -258,12 +258,12 @@ void PolarR6GyroZoniShiftedShafranov::rho_pole(double r, std::vector<double> con
 }
 /*........................................*/
 double PolarR6GyroZoniShiftedShafranov::coeffs1(double r, double Rmax) const
-{
+{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249
     return exp(-tanh(20.0 * (r/Rmax) - 14.0));
 }
 /*........................................*/
 void PolarR6GyroZoniShiftedShafranov::coeffs1(std::vector<double> const& r, double Rmax, std::vector<double>& sol) const
-{
+{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249
     for (std::size_t i=0; i < sol.size(); ++i)
     {
         sol[i] = exp(-tanh(20.0 * (r[i]/Rmax) - 14.0));
diff --git a/src/test_cases/PolarR6GyroZoniShiftedTriangular.cpp b/src/test_cases/PolarR6GyroZoniShiftedTriangular.cpp
index fd0e8603..92ed8b7a 100644
--- a/src/test_cases/PolarR6GyroZoniShiftedTriangular.cpp
+++ b/src/test_cases/PolarR6GyroZoniShiftedTriangular.cpp
@@ -258,12 +258,12 @@ void PolarR6GyroZoniShiftedTriangular::rho_pole(double r, std::vector<double> co
 }
 /*........................................*/
 double PolarR6GyroZoniShiftedTriangular::coeffs1(double r, double Rmax) const
-{
+{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249
     return exp(-tanh(20.0 * (r/Rmax) - 14.0));
 }
 /*........................................*/
 void PolarR6GyroZoniShiftedTriangular::coeffs1(std::vector<double> const& r, double Rmax, std::vector<double>& sol) const
-{
+{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249
     for (std::size_t i=0; i < sol.size(); ++i)
     {
         sol[i] = exp(-tanh(20.0 * (r[i]/Rmax) - 14.0));
diff --git a/src/test_cases/PolarR6ZoniShiftedCircular.cpp b/src/test_cases/PolarR6ZoniShiftedCircular.cpp
index 3cbf6e23..969760b1 100644
--- a/src/test_cases/PolarR6ZoniShiftedCircular.cpp
+++ b/src/test_cases/PolarR6ZoniShiftedCircular.cpp
@@ -258,12 +258,12 @@ void PolarR6ZoniShiftedCircular::rho_pole(double r, std::vector<double> const& t
 }
 /*........................................*/
 double PolarR6ZoniShiftedCircular::coeffs1(double r, double Rmax) const
-{
+{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249
     return exp(-tanh(20.0 * (r/Rmax) - 14.0));
 }
 /*........................................*/
 void PolarR6ZoniShiftedCircular::coeffs1(std::vector<double> const& r, double Rmax, std::vector<double>& sol) const
-{
+{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249
     for (std::size_t i=0; i < sol.size(); ++i)
     {
         sol[i] = exp(-tanh(20.0 * (r[i]/Rmax) - 14.0));
diff --git a/src/test_cases/PolarR6ZoniShiftedShafranov.cpp b/src/test_cases/PolarR6ZoniShiftedShafranov.cpp
index 473d174d..45be29e6 100644
--- a/src/test_cases/PolarR6ZoniShiftedShafranov.cpp
+++ b/src/test_cases/PolarR6ZoniShiftedShafranov.cpp
@@ -258,12 +258,12 @@ void PolarR6ZoniShiftedShafranov::rho_pole(double r, std::vector<double> const&
 }
 /*........................................*/
 double PolarR6ZoniShiftedShafranov::coeffs1(double r, double Rmax) const
-{
+{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249
     return exp(-tanh(20.0 * (r/Rmax) - 14.0));
 }
 /*........................................*/
 void PolarR6ZoniShiftedShafranov::coeffs1(std::vector<double> const& r, double Rmax, std::vector<double>& sol) const
-{
+{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249
     for (std::size_t i=0; i < sol.size(); ++i)
     {
         sol[i] = exp(-tanh(20.0 * (r[i]/Rmax) - 14.0));
diff --git a/src/test_cases/PolarR6ZoniShiftedTriangular.cpp b/src/test_cases/PolarR6ZoniShiftedTriangular.cpp
index 437fc5c7..626d1eb0 100644
--- a/src/test_cases/PolarR6ZoniShiftedTriangular.cpp
+++ b/src/test_cases/PolarR6ZoniShiftedTriangular.cpp
@@ -258,12 +258,12 @@ void PolarR6ZoniShiftedTriangular::rho_pole(double r, std::vector<double> const&
 }
 /*........................................*/
 double PolarR6ZoniShiftedTriangular::coeffs1(double r, double Rmax) const
-{
+{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249
     return exp(-tanh(20.0 * (r/Rmax) - 14.0));
 }
 /*........................................*/
 void PolarR6ZoniShiftedTriangular::coeffs1(std::vector<double> const& r, double Rmax, std::vector<double>& sol) const
-{
+{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249
     for (std::size_t i=0; i < sol.size(); ++i)
     {
         sol[i] = exp(-tanh(20.0 * (r[i]/Rmax) - 14.0));
diff --git a/src/test_cases/README.md b/src/test_cases/README.md
new file mode 100644
index 00000000..6a9d9e60
--- /dev/null
+++ b/src/test_cases/README.md
@@ -0,0 +1,16 @@
+The following files define the geometry, coefficients of the ordinary differential equation and the manufactured solution. We always solve
+
+$-\nabla \cdot \left( \alpha \nabla u \right) + \beta u = f\quad \text{ in }\quad \Omega$
+
+The naming is defined in `constants.h` and it follows the following rules:
+
+- The first part, e.g., `PolarR6` or `CartesianR6`, defines the manufactured solution.
+- The **potential** second part `Gyro` or empty, defines if the $\beta$ coefficient is nontrivial or zero.
+- The following part, e.g., `Sonnendrucker` or `Poisson`, defines the value of the $\alpha$ coefficient.
+- The last part, e.g., `Shafranov` or `Triangular`, defines the geometry to be used
+
+For the particular solutions, see:
+- Bourne et al. - Solver comparison for Poisson-like equations on tokamak geometries (2023) https://doi.org/10.1016/j.jcp.2023.112249 
+- Kuehn, Kruse, Ruede - Implicitly extrapolated geometric multigrid on disk-like domains for the gyrokinetic Poisson equation from fusion plasma applications (2022) https://doi.org/10.1007/s10915-022-01802-1
+
+A great thanks to Emily Bourne for preparing these scripts!
\ No newline at end of file
diff --git a/src/test_cases/RefinedGyroZoniShiftedCulham.cpp b/src/test_cases/RefinedGyroZoniShiftedCulham.cpp
index b3d73047..7dccf508 100644
--- a/src/test_cases/RefinedGyroZoniShiftedCulham.cpp
+++ b/src/test_cases/RefinedGyroZoniShiftedCulham.cpp
@@ -571,12 +571,12 @@ void RefinedGyroZoniShiftedCulham::J_yq(double r, std::vector<double> const& the
 }
 /*........................................*/
 double RefinedGyroZoniShiftedCulham::coeffs1(double r, double Rmax) const
-{
+{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249
     return exp(-tanh(20.0 * (r/Rmax) - 14.0));
 }
 /*........................................*/
 void RefinedGyroZoniShiftedCulham::coeffs1(std::vector<double> const& r, double Rmax, std::vector<double>& sol) const
-{
+{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249
     for (std::size_t i=0; i < sol.size(); ++i)
     {
         sol[i] = exp(-tanh(20.0 * (r[i]/Rmax) - 14.0));
diff --git a/src/test_cases/RefinedGyroZoniShiftedShafranov.cpp b/src/test_cases/RefinedGyroZoniShiftedShafranov.cpp
index 25e39975..6966f995 100644
--- a/src/test_cases/RefinedGyroZoniShiftedShafranov.cpp
+++ b/src/test_cases/RefinedGyroZoniShiftedShafranov.cpp
@@ -257,12 +257,12 @@ void RefinedGyroZoniShiftedShafranov::rho_pole(double r, std::vector<double> con
 }
 /*........................................*/
 double RefinedGyroZoniShiftedShafranov::coeffs1(double r, double Rmax) const
-{
+{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249
     return exp(-tanh(20.0 * (r/Rmax) - 14.0));
 }
 /*........................................*/
 void RefinedGyroZoniShiftedShafranov::coeffs1(std::vector<double> const& r, double Rmax, std::vector<double>& sol) const
-{
+{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249
     for (std::size_t i=0; i < sol.size(); ++i)
     {
         sol[i] = exp(-tanh(20.0 * (r[i]/Rmax) - 14.0));
diff --git a/src/test_cases/RefinedGyroZoniShiftedTriangular.cpp b/src/test_cases/RefinedGyroZoniShiftedTriangular.cpp
index d5c4dc39..0f2d14cf 100644
--- a/src/test_cases/RefinedGyroZoniShiftedTriangular.cpp
+++ b/src/test_cases/RefinedGyroZoniShiftedTriangular.cpp
@@ -257,12 +257,12 @@ void RefinedGyroZoniShiftedTriangular::rho_pole(double r, std::vector<double> co
 }
 /*........................................*/
 double RefinedGyroZoniShiftedTriangular::coeffs1(double r, double Rmax) const
-{
+{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249
     return exp(-tanh(20.0 * (r/Rmax) - 14.0));
 }
 /*........................................*/
 void RefinedGyroZoniShiftedTriangular::coeffs1(std::vector<double> const& r, double Rmax, std::vector<double>& sol) const
-{
+{ // With Rmax=1, equals alpha(r) from equation (18) of Bourne et al. https://doi.org/10.1016/j.jcp.2023.112249
     for (std::size_t i=0; i < sol.size(); ++i)
     {
         sol[i] = exp(-tanh(20.0 * (r[i]/Rmax) - 14.0));
diff --git a/treat_output.py b/treat_output.py
index 8da4d0b8..a2f9e1a3 100644
--- a/treat_output.py
+++ b/treat_output.py
@@ -99,7 +99,7 @@
         latex.write(error_norm_inf)
         latex.write(" & ")
 
-    time = line.find('Total_execution_time:')
+    time = line.find('Total execution time:')
     if (time != -1):
         #print(f'{line}')
         #execution time is found at: time+22, until the end