Skip to content

Commit bd4cb53

Browse files
committed
Merge branch 'develop' into reduceLoops
2 parents 3150845 + f818cc3 commit bd4cb53

18 files changed

+285
-612
lines changed

RELEASE_NOTES.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,26 @@ in this file.
1212

1313
The format of this file is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
1414

15+
## [Unreleased] - Release date YYYY-MM-DD
16+
17+
### Added
18+
- Added CARE\_DEEP\_COPY\_RAW\_PTR configuration option.
19+
20+
### Added
21+
- Added ATOMIC\_SUB, ATOMIC\_LOAD, ATOMIC\_STORE, ATOMIC\_EXCHANGE, and ATOMIC\_CAS macros.
22+
- Added TSAN\_ONLY\_ATOMIC\_\* macros to suppress tsan data race reports. Controlled by CARE\_ENABLE\_TSAN\_ONLY\_ATOMICS configuration option.
23+
24+
### Removed
25+
- Removed Accessor template parameter from host\_device\_ptr.
26+
- Removed NoOpAccessor and RaceConditionAccessor. It is recommended to use ThreadSanitizer (TSAN) instead to locate race conditions.
27+
- Removed CARE\_ENABLE\_RACE\_DETECTION configuration option.
28+
29+
### Changed
30+
- Renamed host\_device\_ptr::getPointer to host\_device\_ptr::data.
31+
32+
### Fixed
33+
- Replaced calls to chai::ManagedArray::getPointer (previously deprecated and now removed) with calls to chai::ManagedArray::data.
34+
1535
## [Version 0.14.1] - Release date 2024-10-15
1636

1737
### Fixed

cmake/SetupOptions.cmake

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ option(CARE_ENABLE_IMPLICIT_CONVERSIONS "Enable implicit conversions to-from raw
1919
# CHAI must also be configured with the same settings for implicit conversions.
2020
set(CHAI_ENABLE_IMPLICIT_CONVERSIONS ${CARE_ENABLE_IMPLICIT_CONVERSIONS} CACHE BOOL "Enable implicit conversions to-from raw pointers")
2121
option(CARE_LEGACY_COMPATIBILITY_MODE "Enable legacy compatibility mode" OFF)
22+
option(CARE_DEEP_COPY_RAW_PTR "Use deep copy for managed array initialization from raw pointer" OFF)
2223
option(CARE_ENABLE_MANAGED_PTR "Enable managed_ptr aliases, tests, and reproducer" ON)
2324
option(CARE_DISABLE_RAJAPLUGIN "Disable use of the RAJA plugin. WILL ALSO DISABLE MEMORY MOTION." OFF)
2425
option(CARE_ENABLE_EXTERN_INSTANTIATE "Enable extern instantiation of template functions" OFF)
@@ -32,6 +33,8 @@ option(CARE_NEVER_USE_RAJA_PARALLEL_SCAN "Disable RAJA parallel scans in SCAN lo
3233
option(CARE_ENABLE_FUSER_BIN_32 "Enable the 32 register fusible loop bin." OFF)
3334
option(CARE_ENABLE_PARALLEL_LOOP_BACKWARDS "Reverse the start and end for parallel loops." OFF)
3435
option(CARE_ENABLE_STALE_DATA_CHECK "Enable checking for stale host data. Only applicable for GPU (or GPU simulation) builds." OFF)
36+
# TODO: Investigate correctness and performance impact of this option
37+
option(CARE_ENABLE_TSAN_ONLY_ATOMICS "Enable atomics for ThreadSanitizer (TSAN) build." OFF)
3538

3639
# Extra components
3740
cmake_dependent_option(CARE_ENABLE_TESTS "Build CARE tests"

src/care/Accessor.h

Lines changed: 0 additions & 188 deletions
This file was deleted.

src/care/CHAIDataGetter.h

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,12 @@ class CHAIDataGetter {
2828
T * getRawArrayData(chai::ManagedArray<T> data) {
2929
data.move(chai::CPU);
3030
data.registerTouch(chai::CPU);
31-
return (T*)data.getPointer(chai::CPU);
31+
return (T*)data.data(chai::CPU);
3232
}
3333

3434
const T * getConstRawArrayData(chai::ManagedArray<T> data) {
3535
data.move(chai::CPU);
36-
return (const T*)data.getPointer(chai::CPU);
36+
return (const T*)data.data(chai::CPU);
3737
}
3838

3939
static const auto ChaiPolicy = chai::CPU;
@@ -49,12 +49,12 @@ class CHAIDataGetter<T, RAJADeviceExec> {
4949
T * getRawArrayData(chai::ManagedArray<T> data) {
5050
data.move(chai::GPU);
5151
data.registerTouch(chai::GPU);
52-
return (T*)data.getPointer(chai::GPU);
52+
return (T*)data.data(chai::GPU);
5353
}
5454

5555
const T * getConstRawArrayData(chai::ManagedArray<T> data) {
5656
data.move(chai::GPU);
57-
return (const T*)data.getPointer(chai::GPU);
57+
return (const T*)data.data(chai::GPU);
5858
}
5959

6060
static const auto ChaiPolicy = chai::GPU;
@@ -70,12 +70,12 @@ class CHAIDataGetter<globalID, RAJADeviceExec> {
7070
GIDTYPE * getRawArrayData(chai::ManagedArray<globalID> data) {
7171
data.move(chai::GPU);
7272
data.registerTouch(chai::GPU);
73-
return (GIDTYPE*)data.getPointer(chai::GPU);
73+
return (GIDTYPE*)data.data(chai::GPU);
7474
}
7575

7676
const GIDTYPE * getConstRawArrayData(chai::ManagedArray<globalID> data) {
7777
data.move(chai::GPU);
78-
return (GIDTYPE*)data.getPointer(chai::GPU);
78+
return (GIDTYPE*)data.data(chai::GPU);
7979
}
8080

8181
static const auto ChaiPolicy = chai::GPU;
@@ -95,12 +95,12 @@ class CHAIDataGetter<globalID, RAJA::seq_exec> {
9595
GIDTYPE * getRawArrayData(chai::ManagedArray<globalID> data) {
9696
data.move(chai::CPU);
9797
data.registerTouch(chai::CPU);
98-
return (GIDTYPE*)data.getPointer(chai::CPU);
98+
return (GIDTYPE*)data.data(chai::CPU);
9999
}
100100

101101
const GIDTYPE * getConstRawArrayData(chai::ManagedArray<globalID> data) {
102102
data.move(chai::CPU);
103-
return (GIDTYPE*)data.getPointer(chai::CPU);
103+
return (GIDTYPE*)data.data(chai::CPU);
104104
}
105105

106106
static const auto ChaiPolicy = chai::CPU;

src/care/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ configure_file(
1717

1818
set(care_headers
1919
${PROJECT_BINARY_DIR}/include/care/config.h
20-
Accessor.h
2120
algorithm.h
2221
algorithm_decl.h
2322
algorithm_impl.h

src/care/DebugPlugin.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,6 @@ namespace care{
6868
it.second(space, PluginData::getFileName(), PluginData::getLineNumber());
6969
}
7070
PluginData::clear_post_parallel_forall_actions();
71-
PluginData::s_threadID = -1;
7271
}
7372
#endif // !defined(CHAI_DISABLE_RM)
7473
}

src/care/DefaultMacros.h

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,6 @@
3434
/// Used to capture variables by reference into a lambda (combine with FOR_EACH)
3535
#define CARE_REF_CAPTURE(X) , &X
3636

37-
#ifdef CARE_ENABLE_RACE_DETECTION
38-
#define CARE_SET_THREAD_ID(INDEX) care::DebugPlugin::s_threadID = INDEX ;
39-
#else
40-
#define CARE_SET_THREAD_ID(INDEX)
41-
#endif
42-
43-
4437

4538

4639

@@ -732,8 +725,7 @@ OMP_FOR_BEGIN for (auto INDEX = _care_openmp_for_loop_chunk_begin_ndx; INDEX < _
732725
decltype(_care_checked_loop_end) _care_checked_loop_begin = START_INDEX; \
733726
if (_care_checked_loop_end > _care_checked_loop_begin) { \
734727
CARE_NEST_BEGIN(CHECK) \
735-
care::forall(POLICY{}, __FILE__, __LINE__, _care_checked_loop_begin, _care_checked_loop_end, 0, [=] CARE_DEVICE (decltype(_care_checked_loop_end) INDEX) { \
736-
CARE_SET_THREAD_ID(INDEX)
728+
care::forall(POLICY{}, __FILE__, __LINE__, _care_checked_loop_begin, _care_checked_loop_end, 0, [=] CARE_DEVICE (decltype(_care_checked_loop_end) INDEX) {
737729

738730
#define CARE_CHECKED_POLICY_LOOP_END(CHECK) }); \
739731
CARE_NEST_END(CHECK) }}
@@ -768,8 +760,7 @@ OMP_FOR_BEGIN for (auto INDEX = _care_openmp_for_loop_chunk_begin_ndx; INDEX < _
768760
decltype(_care_checked_loop_end) _care_checked_loop_begin = START_INDEX; \
769761
if (_care_checked_loop_end > _care_checked_loop_begin) { \
770762
CARE_NEST_BEGIN(CHECK) \
771-
care::forall(POLICY{}, __FILE__, __LINE__, _care_checked_loop_begin, _care_checked_loop_end, CHUNK_SIZE, [=] CARE_DEVICE (decltype(_care_checked_loop_end) INDEX) { \
772-
CARE_SET_THREAD_ID(INDEX)
763+
care::forall(POLICY{}, __FILE__, __LINE__, _care_checked_loop_begin, _care_checked_loop_end, CHUNK_SIZE, [=] CARE_DEVICE (decltype(_care_checked_loop_end) INDEX) {
773764

774765
#define CARE_CHECKED_CHUNKED_POLICY_LOOP_END(CHECK) }); \
775766
CARE_NEST_END(CHECK) }}

src/care/PluginData.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ namespace care{
1313
CARE_DLL_API bool PluginData::s_parallel_context = false;
1414
CARE_DLL_API ActionMap PluginData::s_post_parallel_forall_actions = ActionMap{};
1515
CARE_DLL_API std::vector<const chai::PointerRecord*> PluginData::s_active_pointers_in_loop = std::vector<const chai::PointerRecord*>{};
16-
CARE_DLL_API int PluginData::s_threadID = -1;
1716

1817
void PluginData::setFileName(const char * name) {PluginData::s_file_name = name;}
1918

src/care/PluginData.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ namespace care{
2222
using ActionMap = std::unordered_map<void *, std::function<void(chai::ExecutionSpace, const char *, int)>>;
2323

2424
//class for shared plugin functions and variables
25-
CARE_DLL_API class PluginData {
25+
class CARE_DLL_API PluginData {
2626
public:
2727
PluginData() = default;
2828

@@ -54,8 +54,6 @@ namespace care{
5454

5555
static void clearActivePointers();
5656

57-
static int s_threadID;
58-
5957
private:
6058
static const char * s_file_name;
6159

src/care/SortFuser.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -299,8 +299,8 @@ namespace care {
299299
// set up a 2D kernel, put per-array meta-data in pinned memory to eliminate cudaMemcpy's of the smaller dimension of data
300300
host_device_ptr<int> lengths(chai::ManagedArray<int>(m_num_arrays, chai::ZERO_COPY));
301301
host_device_ptr<host_device_ptr<int> > out_arrays(chai::ManagedArray<host_device_ptr<int>>(m_num_arrays, chai::ZERO_COPY));
302-
host_ptr<int> pinned_lengths = lengths.getPointer(care::ZERO_COPY, false);
303-
host_ptr<host_device_ptr<int>> pinned_out_arrays = out_arrays.getPointer(care::ZERO_COPY, false);
302+
host_ptr<int> pinned_lengths = lengths.data(care::ZERO_COPY, false);
303+
host_ptr<host_device_ptr<int>> pinned_out_arrays = out_arrays.data(care::ZERO_COPY, false);
304304
// initialized lengths, maxLength, and array of arrays for the 2D kernel
305305
int maxLength = 0;
306306
for (int a = 0; a < m_num_arrays; ++a ) {
@@ -316,7 +316,7 @@ namespace care {
316316
}
317317
// subtract out the offset, copy the result into individual arrays
318318
// (use of device pointer is to avoid clang-query rules that prevent capture of raw pointer)
319-
device_ptr<int> dev_pinned_lengths = lengths.getPointer(ZERO_COPY, false);
319+
device_ptr<int> dev_pinned_lengths = lengths.data(ZERO_COPY, false);
320320
CARE_LOOP_2D_STREAM_JAGGED(i, 0, maxLength, lengths, a, 0, m_num_arrays, iFlattened) {
321321
result[i+out_offsets[a]] -= max_range*a;
322322
out_arrays[a][i] = result[i+out_offsets[a]];

0 commit comments

Comments
 (0)