Skip to content

Commit

Permalink
[CUDA][LIBCLC] Fix fence impl to satisfy SYCL 2020. (#12713)
Browse files Browse the repository at this point in the history
- make relaxed fence a no op to satisfy the SYCL spec.
- make acquire/release/acq_rel use the lighter acq_rel fence for sm_70
instead of the seq_cst fence.

---------

Signed-off-by: JackAKirk <jack.kirk@codeplay.com>
  • Loading branch information
JackAKirk authored Feb 27, 2024
1 parent 9edd27a commit 95e183e
Showing 1 changed file with 28 additions and 6 deletions.
34 changes: 28 additions & 6 deletions libclc/ptx-nvidiacl/libspirv/synchronization/barrier.cl
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,36 @@
#include <spirv/spirv.h>
#include <spirv/spirv_types.h>

int __clc_nvvm_reflect_arch();

_CLC_OVERLOAD _CLC_DEF void __spirv_MemoryBarrier(unsigned int memory,
unsigned int semantics) {
if (memory == CrossDevice) {
__nvvm_membar_sys();
} else if (memory == Device) {
__nvvm_membar_gl();
} else {
__nvvm_membar_cta();

// for sm_70 and above membar becomes semantically identical to fence.sc.
// However sm_70 and above also introduces a lightweight fence.acq_rel that
// can be used to form either acquire or release strong operations.
// Consult
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-membar-fence
// for details.

unsigned int order = semantics & 0x1F;
if (__clc_nvvm_reflect_arch() < 700 ||
order == SequentiallyConsistent) {
if (memory == CrossDevice) {
__nvvm_membar_sys();
} else if (memory == Device) {
__nvvm_membar_gl();
} else {
__nvvm_membar_cta();
}
} else if (order != None) {
if (memory == CrossDevice) {
__asm__ __volatile__("fence.acq_rel.sys;");
} else if (memory == Device) {
__asm__ __volatile__("fence.acq_rel.gpu;");
} else {
__asm__ __volatile__("fence.acq_rel.cta;");
}
}
}

Expand Down

0 comments on commit 95e183e

Please sign in to comment.