diff --git a/README.md b/README.md index 996c76a..99dc908 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ ForMatmul -**ForMatmul**: A Fortran library that overloads the `matmul` function to enable efficient matrix multiplication with coarray. +**ForMatmul**: A Fortran library that overloads the `matmul` function to enable efficient matrix multiplication with/without coarray. ## Usage @@ -118,13 +118,13 @@ To set the stack size to unlimited, use the following command: `ulimit -s unlimi **Intel Fortran Compiler (ifort)** ```shell -fpm run --example benchmark3 --compiler ifort --flag "-Ofast -xHost -qopenmp -qmkl -coarray -coarray-num-images=4 -DUSE_COARRAY" +fpm run --example benchmark3 --compiler ifort --flag "-Ofast -mtune=native -xHost -qmkl -qopenmp -ipo -coarray -coarray-num-images=4 -DUSE_COARRAY" ``` **Intel Fortran Compiler (ifx)** ```shell -fpm run --example benchmark3 --compiler ifx --flag "-Ofast -xHost -qopenmp -qmkl -coarray -coarray-num-images=4 -DUSE_COARRAY" +fpm run --example --all --compiler ifx --flag "-Ofast -mtune=native -xHost -qmkl -qopenmp -coarray -coarray-num-images=4 -DUSE_COARRAY" ``` You can then use the provided Python script to generate visual plots for the benchmark3 data: @@ -133,31 +133,27 @@ You can then use the provided Python script to generate visual plots for the ben python benchmark/benchmark3_co.py ``` -Results obtained on an `Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz` using `ifort (IFORT) 2021.10.0 20230609` are as follows: +Results obtained on an `Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz` using `ifort (IFORT) 2021.11.0 20231010` are as follows: - with `-coarray-num-images=4`, `MKL_NUM_THREADS=1` and `OMP_NUM_THREADS=1`: -ForMatmul +ForMatmul +ForMatmul - with `-coarray-num-images=4` and Multithread: -ForMatmul +ForMatmul +ForMatmul - with `-coarray-num-images=5`, `MKL_NUM_THREADS=1` and `OMP_NUM_THREADS=1`: -ForMatmul +ForMatmul +ForMatmul - with `-coarray-num-images=5` and Multithread: -ForMatmul - -- with `-coarray-num-images=6`, `MKL_NUM_THREADS=1` and `OMP_NUM_THREADS=1`: - -ForMatmul - -- with `-coarray-num-images=6` and Multithread: - -ForMatmul +ForMatmul +ForMatmul ## API documentation diff --git a/benchmark/multithread/benchmark3a_nim4.png b/benchmark/multithread/benchmark3a_nim4.png deleted file mode 100644 index 12f04a7..0000000 Binary files a/benchmark/multithread/benchmark3a_nim4.png and /dev/null differ diff --git a/benchmark/multithread/benchmark3a_nim5.png b/benchmark/multithread/benchmark3a_nim5.png deleted file mode 100644 index d1b3fb0..0000000 Binary files a/benchmark/multithread/benchmark3a_nim5.png and /dev/null differ diff --git a/benchmark/multithread/benchmark3a_nim6.png b/benchmark/multithread/benchmark3a_nim6.png deleted file mode 100644 index 156f4e7..0000000 Binary files a/benchmark/multithread/benchmark3a_nim6.png and /dev/null differ diff --git a/benchmark/multithread/benchmark3b_nim4.png b/benchmark/multithread/benchmark3b_nim4.png deleted file mode 100644 index d702096..0000000 Binary files a/benchmark/multithread/benchmark3b_nim4.png and /dev/null differ diff --git a/benchmark/multithread/benchmark3b_nim5.png b/benchmark/multithread/benchmark3b_nim5.png deleted file mode 100644 index 594fdc7..0000000 Binary files a/benchmark/multithread/benchmark3b_nim5.png and /dev/null differ diff --git a/benchmark/multithread/benchmark3b_nim6.png b/benchmark/multithread/benchmark3b_nim6.png deleted file mode 100644 index 4e8bc0a..0000000 Binary files a/benchmark/multithread/benchmark3b_nim6.png and /dev/null differ diff --git a/benchmark/multithread/benchmark3p_nim4.png b/benchmark/multithread/benchmark3p_nim4.png new file mode 100644 index 0000000..9917aec Binary files /dev/null and b/benchmark/multithread/benchmark3p_nim4.png differ diff --git a/benchmark/multithread/benchmark3p_nim5.png b/benchmark/multithread/benchmark3p_nim5.png new file mode 100644 index 0000000..d0e2c69 Binary files /dev/null and b/benchmark/multithread/benchmark3p_nim5.png differ diff --git a/benchmark/multithread/benchmark3t_nim4.png b/benchmark/multithread/benchmark3t_nim4.png new file mode 100644 index 0000000..b0a3c83 Binary files /dev/null and b/benchmark/multithread/benchmark3t_nim4.png differ diff --git a/benchmark/multithread/benchmark3t_nim5.png b/benchmark/multithread/benchmark3t_nim5.png new file mode 100644 index 0000000..4d3b556 Binary files /dev/null and b/benchmark/multithread/benchmark3t_nim5.png differ diff --git a/benchmark/singlethread/benchmark3a_nim4.png b/benchmark/singlethread/benchmark3a_nim4.png deleted file mode 100644 index 4741e86..0000000 Binary files a/benchmark/singlethread/benchmark3a_nim4.png and /dev/null differ diff --git a/benchmark/singlethread/benchmark3a_nim5.png b/benchmark/singlethread/benchmark3a_nim5.png deleted file mode 100644 index 4d88a3f..0000000 Binary files a/benchmark/singlethread/benchmark3a_nim5.png and /dev/null differ diff --git a/benchmark/singlethread/benchmark3a_nim6.png b/benchmark/singlethread/benchmark3a_nim6.png deleted file mode 100644 index c2686c2..0000000 Binary files a/benchmark/singlethread/benchmark3a_nim6.png and /dev/null differ diff --git a/benchmark/singlethread/benchmark3b_nim4.png b/benchmark/singlethread/benchmark3b_nim4.png deleted file mode 100644 index fb12695..0000000 Binary files a/benchmark/singlethread/benchmark3b_nim4.png and /dev/null differ diff --git a/benchmark/singlethread/benchmark3b_nim5.png b/benchmark/singlethread/benchmark3b_nim5.png deleted file mode 100644 index 065c9f3..0000000 Binary files a/benchmark/singlethread/benchmark3b_nim5.png and /dev/null differ diff --git a/benchmark/singlethread/benchmark3b_nim6.png b/benchmark/singlethread/benchmark3b_nim6.png deleted file mode 100644 index 29f6ea4..0000000 Binary files a/benchmark/singlethread/benchmark3b_nim6.png and /dev/null differ diff --git a/benchmark/singlethread/benchmark3p_nim4.png b/benchmark/singlethread/benchmark3p_nim4.png new file mode 100644 index 0000000..cc28941 Binary files /dev/null and b/benchmark/singlethread/benchmark3p_nim4.png differ diff --git a/benchmark/singlethread/benchmark3p_nim5.png b/benchmark/singlethread/benchmark3p_nim5.png new file mode 100644 index 0000000..e81db39 Binary files /dev/null and b/benchmark/singlethread/benchmark3p_nim5.png differ diff --git a/benchmark/singlethread/benchmark3t_nim4.png b/benchmark/singlethread/benchmark3t_nim4.png new file mode 100644 index 0000000..fbdcf41 Binary files /dev/null and b/benchmark/singlethread/benchmark3t_nim4.png differ diff --git a/benchmark/singlethread/benchmark3t_nim5.png b/benchmark/singlethread/benchmark3t_nim5.png new file mode 100644 index 0000000..9e691e7 Binary files /dev/null and b/benchmark/singlethread/benchmark3t_nim5.png differ diff --git a/example/benchmark3.f90 b/example/benchmark3.f90 index 65a3dd7..4c7c19d 100644 --- a/example/benchmark3.f90 +++ b/example/benchmark3.f90 @@ -40,7 +40,7 @@ program benchmark3 #endif close(unit_num) - do p = 250,4000,250 + do p = 250,1500,250 ! C(m,o) = A(m,n).B(n,o) m = p