From 29c979578615e24be20c293175ae4b1b5c1b1347 Mon Sep 17 00:00:00 2001 From: "Benjamin T. Liu" Date: Fri, 22 Nov 2024 13:47:57 -0800 Subject: [PATCH] Use gpu_reduce instead of parallel_reduce --- src/care/DefaultMacros.h | 6 +++--- src/care/forall.h | 6 +++--- src/care/policies.h | 4 ++-- src/care/scan.h | 16 ++++++++-------- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/care/DefaultMacros.h b/src/care/DefaultMacros.h index ef1dd566..0066cf1b 100644 --- a/src/care/DefaultMacros.h +++ b/src/care/DefaultMacros.h @@ -736,7 +736,7 @@ OMP_FOR_BEGIN for (auto INDEX = _care_openmp_for_loop_chunk_begin_ndx; INDEX < _ #define CARE_CHECKED_PARALLEL_LOOP_END(CHECK) CARE_CHECKED_POLICY_LOOP_END(CHECK) #define CARE_CHECKED_REDUCE_LOOP_START(INDEX, START_INDEX, END_INDEX, CHECK) \ - CARE_CHECKED_POLICY_LOOP_START(care::parallel_reduce,INDEX, START_INDEX, END_INDEX, CHECK) + CARE_CHECKED_POLICY_LOOP_START(care::gpu_reduce,INDEX, START_INDEX, END_INDEX, CHECK) #define CARE_CHECKED_REDUCE_LOOP_END(CHECK) CARE_CHECKED_POLICY_LOOP_END(CHECK) @@ -771,7 +771,7 @@ OMP_FOR_BEGIN for (auto INDEX = _care_openmp_for_loop_chunk_begin_ndx; INDEX < _ #define CARE_CHECKED_CHUNKED_PARALLEL_LOOP_END(CHECK) CARE_CHECKED_CHUNKED_POLICY_LOOP_END(CHECK) #define CARE_CHECKED_CHUNKED_REDUCE_LOOP_START(INDEX, START_INDEX, END_INDEX, CHUNK_SIZE, CHECK) \ - CARE_CHECKED_CHUNKED_POLICY_LOOP_START(care::parallel_reduce,INDEX, START_INDEX, END_INDEX, CHUNK_SIZE, CHECK) + CARE_CHECKED_CHUNKED_POLICY_LOOP_START(care::gpu_reduce,INDEX, START_INDEX, END_INDEX, CHUNK_SIZE, CHECK) #define CARE_CHECKED_CHUNKED_REDUCE_LOOP_END(CHECK) CARE_CHECKED_CHUNKED_POLICY_LOOP_END(CHECK) @@ -1279,7 +1279,7 @@ OMP_FOR_BEGIN for (auto INDEX = _care_openmp_for_loop_chunk_begin_ndx; INDEX < _ #define CARE_LOOP_2D_STREAM_JAGGED_END }); #define CARE_LOOP_2D_REDUCE_JAGGED(XINDEX, XSTART, XEND, XLENGTHS, YINDEX, YSTART, YLENGTH, FLAT_INDEX) \ - launch_2D_jagged(care::parallel_reduce{}, XSTART, XEND, XLENGTHS.data(chai::DEFAULT, true), YSTART, YLENGTH, __FILE__, __LINE__, [=] CARE_DEVICE (int XINDEX, int YINDEX)->void { + launch_2D_jagged(care::gpu_reduce{}, XSTART, XEND, XLENGTHS.data(chai::DEFAULT, true), YSTART, YLENGTH, __FILE__, __LINE__, [=] CARE_DEVICE (int XINDEX, int YINDEX)->void { #define CARE_LOOP_2D_REDUCE_JAGGED_END }); #endif // !defined(_CARE_DEFAULT_MACROS_H_) diff --git a/src/care/forall.h b/src/care/forall.h index e4a2f618..fb60ae47 100644 --- a/src/care/forall.h +++ b/src/care/forall.h @@ -265,7 +265,7 @@ namespace care { /// /// @brief Execute using the care::RAJAReductionExec policy /// - /// @arg[in] parallel_reduce Used to choose this overload of forall + /// @arg[in] gpu_reduce Used to choose this overload of forall /// @arg[in] fileName The name of the file where this function is called /// @arg[in] lineNumber The line number in the file where this function is called /// @arg[in] start The starting index (inclusive) @@ -275,7 +275,7 @@ namespace care { /// //////////////////////////////////////////////////////////////////////////////// template - void forall(parallel_reduce, const char * fileName, const int lineNumber, + void forall(gpu_reduce, const char * fileName, const int lineNumber, const int start, const int end, const int batch_size, LB&& body) { #if CARE_ENABLE_PARALLEL_LOOP_BACKWARDS s_reverseLoopOrder = true; @@ -629,7 +629,7 @@ namespace care { } template - void launch_2D_jagged(care::parallel_reduce, int xstart, int xend, int const * gpu_lengths, int ystart, int ylength, const char * fileName, int lineNumber , LB && body) { + void launch_2D_jagged(care::gpu_reduce, int xstart, int xend, int const * gpu_lengths, int ystart, int ylength, const char * fileName, int lineNumber , LB && body) { launch_2D_jagged(care::gpu{}, xstart, xend, gpu_lengths, ystart, ylength, fileName, lineNumber, body) ; } #endif diff --git a/src/care/policies.h b/src/care/policies.h index b134c5b0..4eac22e3 100644 --- a/src/care/policies.h +++ b/src/care/policies.h @@ -14,8 +14,8 @@ namespace care { struct sequential {}; struct openmp {}; struct gpu {}; + struct gpu_reduce {}; struct parallel {}; - struct parallel_reduce {}; struct raja_fusible {}; struct raja_fusible_seq {}; struct managed_ptr_read {}; @@ -27,8 +27,8 @@ namespace care { sequential, openmp, gpu, + gpu_reduce, parallel, - parallel_reduce, managed_ptr_read, managed_ptr_write }; diff --git a/src/care/scan.h b/src/care/scan.h index d054f0fd..0d5e9d3b 100644 --- a/src/care/scan.h +++ b/src/care/scan.h @@ -345,15 +345,15 @@ using ScanVarGID = chai::ManagedArray; #define SCAN_REDUCE_LOOP_INIT(INDX, START, END, SCANVAR, SCANVARLENGTH, SCANVAR_OFFSET, EXPR) \ if (END - START > 0) { \ int const SCANVARENDNAME(SCANVAR) = END; \ - CARE_CHECKED_REDUCE_LOOP_START(INDX, START, END+1, scan_loop_init_check) { \ + CARE_CHECKED_REDUCE_LOOP_START(INDX, START, END+1, scan_reduce_loop_init_check) { \ SCANVAR[INDX-START] = (INDX != SCANVARENDNAME(SCANVAR)) && (EXPR) ; \ - } CARE_CHECKED_REDUCE_LOOP_END(scan_loop_init_check) \ + } CARE_CHECKED_REDUCE_LOOP_END(scan_reduce_loop_init_check) \ care::exclusive_scan(RAJAExec{}, SCANVAR, nullptr, END-START+1, SCANVAR_OFFSET, true); \ } else { \ - CARE_CHECKED_SEQUENTIAL_LOOP_START(INDX, 0, 1, scan_loop_init_check) { \ + CARE_CHECKED_SEQUENTIAL_LOOP_START(INDX, 0, 1, scan_reduce_loop_init_check) { \ SCANVAR[INDX] = SCANVAR_OFFSET; \ SCANVARLENGTH[0] = SCANVAR_OFFSET; \ - } CARE_CHECKED_SEQUENTIAL_LOOP_END(scan_loop_init_check) \ + } CARE_CHECKED_SEQUENTIAL_LOOP_END(scan_reduce_loop_init_check) \ } #if CARE_HAVE_LLNL_GLOBALID @@ -375,15 +375,15 @@ using ScanVarGID = chai::ManagedArray; #define SCAN_REDUCE_LOOP_GID_INIT(INDX, START, END, SCANVAR, SCANVARLENGTH, SCANVAR_OFFSET, EXPR) \ if (END - START > 0) { \ int const SCANVARENDNAME(SCANVAR) = END; \ - CARE_CHECKED_REDUCE_LOOP_START(INDX, START, END+1, scan_loop_gid_init_check) { \ + CARE_CHECKED_REDUCE_LOOP_START(INDX, START, END+1, scan_reduce_loop_gid_init_check) { \ SCANVAR[INDX-START] = (INDX != SCANVARENDNAME(SCANVAR)) && (EXPR) ; \ - } CARE_CHECKED_REDUCE_LOOP_END(scan_loop_gid_init_check) \ + } CARE_CHECKED_REDUCE_LOOP_END(scan_reduce_loop_gid_init_check) \ care::exclusive_scan(RAJAExec{}, SCANVAR, nullptr, END-START+1, SCANVAR_OFFSET.Value(), true); \ } else { \ - CARE_CHECKED_SEQUENTIAL_LOOP_START(INDX, 0, 1, scan_loop_gid_init_check) { \ + CARE_CHECKED_SEQUENTIAL_LOOP_START(INDX, 0, 1, scan_reduce_loop_gid_init_check) { \ SCANVAR[INDX] = SCANVAR_OFFSET.Value(); \ SCANVARLENGTH[0] = SCANVAR_OFFSET.Value(); \ - } CARE_CHECKED_SEQUENTIAL_LOOP_END(scan_loop_gid_init_check) \ + } CARE_CHECKED_SEQUENTIAL_LOOP_END(scan_reduce_loop_gid_init_check) \ } #endif // CARE_HAVE_LLNL_GLOBALID