From 2b0b67979f0d5dafbcc8e674b5d4948148654809 Mon Sep 17 00:00:00 2001 From: Seyed Ali Ghasemi Date: Wed, 31 Jan 2024 10:38:30 +0100 Subject: [PATCH] Remove reduce from do concurrent loops --- fpm.rsp | 4 ++-- src/formatmul.f90 | 24 ++++++++++++------------ 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/fpm.rsp b/fpm.rsp index 594536e..bade393 100644 --- a/fpm.rsp +++ b/fpm.rsp @@ -8,7 +8,7 @@ options --flag "-O3 -mtune=native -xHost -qmkl -qopenmp -ipo" options test options --compiler ifort options --profile release -options --flag "-O3 -mtune=native -xHost -qmkl -qopenmp -ipo -coarray -coarray-num-images=4 -DUSE_COARRAY" +options --flag "-O3 -mtune=native -xHost -qmkl -qopenmp -DUSE_DO_CONCURRENT -ipo -coarray -coarray-num-images=4 -DUSE_COARRAY" @ifx-test options test @@ -32,4 +32,4 @@ options --flag "-O3 -fast -march=native -mtune=native -stdpar=gpu,multicore -lbl options test options --compiler gfortran options --profile release -options --flag "-O3 -march=native -lblas -fopenmp -flto" \ No newline at end of file +options --flag "-O3 -march=native -lblas -fopenmp -DUSE_DO_CONCURRENT -flto" \ No newline at end of file diff --git a/src/formatmul.f90 b/src/formatmul.f90 index 1f8d949..2093d38 100644 --- a/src/formatmul.f90 +++ b/src/formatmul.f90 @@ -324,7 +324,7 @@ pure function mat_mat_block_rel(a, b, transA, transB, option, nblock) result(c) allocate(C(size(A,1), size(B,2)), source=0.0_rk) call compute_block_ranges(size(B,2), nblock, block_size, start_elem, end_elem) #if defined(USE_DO_CONCURRENT) - do concurrent (ib = 1: nblock) reduce(+:C) + do concurrent (ib = 1: nblock) se = start_elem(ib) ee = end_elem(ib) C(:, se:ee) = & @@ -343,7 +343,7 @@ pure function mat_mat_block_rel(a, b, transA, transB, option, nblock) result(c) allocate(C(size(A,2), size(B,1)), source=0.0_rk) call compute_block_ranges(size(A,2), nblock, block_size, start_elem, end_elem) #if defined(USE_DO_CONCURRENT) - do concurrent (ib = 1: nblock) reduce(+:C) + do concurrent (ib = 1: nblock) se = start_elem(ib) ee = end_elem(ib) C(se:ee, :) = & @@ -362,7 +362,7 @@ pure function mat_mat_block_rel(a, b, transA, transB, option, nblock) result(c) allocate(C(size(A,2), size(B,2)), source=0.0_rk) call compute_block_ranges(size(A,2), nblock, block_size, start_elem, end_elem) #if defined(USE_DO_CONCURRENT) - do concurrent (ib = 1: nblock) reduce(+:C) + do concurrent (ib = 1: nblock) se = start_elem(ib) ee = end_elem(ib) C(se:ee, :) = & @@ -381,7 +381,7 @@ pure function mat_mat_block_rel(a, b, transA, transB, option, nblock) result(c) allocate(C(size(A,1), size(B,1)), source=0.0_rk) call compute_block_ranges(size(A,2), nblock, block_size, start_elem, end_elem) #if defined(USE_DO_CONCURRENT) - do concurrent (ib = 1: nblock) reduce(+:C) + do concurrent (ib = 1: nblock) se = start_elem(ib) ee = end_elem(ib) C(:, :) = C(:, :) + & @@ -403,7 +403,7 @@ pure function mat_mat_block_rel(a, b, transA, transB, option, nblock) result(c) allocate(C(size(A,2), size(B,2)), source=0.0_rk) call compute_block_ranges(size(A,2), nblock, block_size, start_elem, end_elem) #if defined(USE_DO_CONCURRENT) - do concurrent (ib = 1: nblock) reduce(+:C) + do concurrent (ib = 1: nblock) se = start_elem(ib) ee = end_elem(ib) C(se:ee, :) = & @@ -422,7 +422,7 @@ pure function mat_mat_block_rel(a, b, transA, transB, option, nblock) result(c) allocate(C(size(A,1), size(B,1)), source=0.0_rk) call compute_block_ranges(size(A,2), nblock, block_size, start_elem, end_elem) #if defined(USE_DO_CONCURRENT) - do concurrent (ib = 1: nblock) reduce(+:C) + do concurrent (ib = 1: nblock) se = start_elem(ib) ee = end_elem(ib) C(:, :) = C(:, :) + & @@ -443,7 +443,7 @@ pure function mat_mat_block_rel(a, b, transA, transB, option, nblock) result(c) allocate(C(size(A,1), size(B,1)), source=0.0_rk) call compute_block_ranges(size(A,2), nblock, block_size, start_elem, end_elem) #if defined(USE_DO_CONCURRENT) - do concurrent (ib = 1: nblock) reduce(+:C) + do concurrent (ib = 1: nblock) se = start_elem(ib) ee = end_elem(ib) C(:, :) = C(:, :) + & @@ -462,7 +462,7 @@ pure function mat_mat_block_rel(a, b, transA, transB, option, nblock) result(c) allocate(C(size(A,2), size(B,2)), source=0.0_rk) call compute_block_ranges(size(A,2), nblock, block_size, start_elem, end_elem) #if defined(USE_DO_CONCURRENT) - do concurrent (ib = 1: nblock) reduce(+:C) + do concurrent (ib = 1: nblock) se = start_elem(ib) ee = end_elem(ib) C(se:ee, :) = & @@ -483,7 +483,7 @@ pure function mat_mat_block_rel(a, b, transA, transB, option, nblock) result(c) allocate(C(size(A,1), size(B,2)), source=0.0_rk) call compute_block_ranges(size(B,2), nblock, block_size, start_elem, end_elem) #if defined(USE_DO_CONCURRENT) - do concurrent (ib = 1: nblock) reduce(+:C) + do concurrent (ib = 1: nblock) se = start_elem(ib) ee = end_elem(ib) C(:, se:ee) = & @@ -521,7 +521,7 @@ pure function mat_vec_block_rel(A, v, transA, option, nblock) result(w) allocate(w(size(A,2)), source=0.0_rk) call compute_block_ranges(size(A,2), nblock, block_size, start_elem, end_elem) #if defined(USE_DO_CONCURRENT) - do concurrent (ib = 1: nblock) reduce(+:w) + do concurrent (ib = 1: nblock) se = start_elem(ib) ee = end_elem(ib) w(se:ee) = & @@ -540,7 +540,7 @@ pure function mat_vec_block_rel(A, v, transA, option, nblock) result(w) allocate(w(size(A,1)), source=0.0_rk) call compute_block_ranges(size(A,2), nblock, block_size, start_elem, end_elem) #if defined(USE_DO_CONCURRENT) - do concurrent (ib = 1: nblock) reduce(+:w) + do concurrent (ib = 1: nblock) se = start_elem(ib) ee = end_elem(ib) w(:) = & @@ -560,7 +560,7 @@ pure function mat_vec_block_rel(A, v, transA, option, nblock) result(w) allocate(w(size(A,1)), source=0.0_rk) call compute_block_ranges(size(A,2), nblock, block_size, start_elem, end_elem) #if defined(USE_DO_CONCURRENT) - do concurrent (ib = 1: nblock) reduce(+:w) + do concurrent (ib = 1: nblock) se = start_elem(ib) ee = end_elem(ib) w(:) = &