From 3806d68ba72800e5ac931ea6ac3f59d149eb1323 Mon Sep 17 00:00:00 2001 From: Malcolm Roberts Date: Fri, 11 Oct 2024 13:59:58 -0600 Subject: [PATCH] Merge address sanitizer build to 6.3 --- CHANGELOG.md | 3 +++ CMakeLists.txt | 52 ++++++++++++++++++++++++++++++++----- library/src/rtc_cache.cpp | 8 +++++- library/src/rtc_compile.cpp | 4 ++- 4 files changed, 59 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 38e33543..4f62c3ce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,9 @@ Documentation for rocFFT is available at * Compilation uses amdclang++ instead of hipcc. * CLI11 replaces Boost Program Options as the command line parser for clients and samples. +* Building with the address sanitizer option sets xnack+ on relevant GPU + architectures and address-sanitizer support is added to runtime-compiled + kernels. ## rocFFT 1.0.30 for ROCm 6.2.4 diff --git a/CMakeLists.txt b/CMakeLists.txt index b8a00b34..16fa58cb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -116,12 +116,6 @@ option(ROCFFT_RUNTIME_COMPILE_DEFAULT "Compile kernels at runtime by default" OF # Set default to OFF since users are not likely to tune option(ROCFFT_BUILD_OFFLINE_TUNER "Build with offline tuner executable rocfft_offline_tuner" OFF) -if(BUILD_ADDRESS_SANITIZER) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -shared-libasan") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address -shared-libasan") - add_link_options(-fuse-ld=lld) -endif() - # FOR HANDLING ENABLE/DISABLE OPTIONAL BACKWARD COMPATIBILITY for FILE/FOLDER REORG option(BUILD_FILE_REORG_BACKWARD_COMPATIBILITY "Build with file/folder reorg with backward compatibility enabled" OFF) if(BUILD_FILE_REORG_BACKWARD_COMPATIBILITY AND NOT WIN32) @@ -138,6 +132,52 @@ if( WERROR ) set( WARNING_FLAGS ${WARNING_FLAGS} -Werror ) endif( ) +set(DEFAULT_GPUS + gfx803 + gfx900 + gfx906 + gfx908 + gfx90a + gfx940 + gfx941 + gfx942 + gfx1030 + gfx1100 + gfx1101 + gfx1102 + gfx1151 + gfx1200 + gfx1201) + +if(BUILD_ADDRESS_SANITIZER) + add_compile_options(-fsanitize=address) + add_link_options(-fsanitize=address) + add_link_options(-shared-libasan) + SET(DEFAULT_GPUS + gfx908:xnack+ + gfx90a:xnack+ + gfx940:xnack+ + gfx941:xnack+ + gfx942:xnack+) + add_link_options(-fuse-ld=lld) + set(ROCFFT_KERNEL_CACHE_ENABLE off) + add_compile_definitions(ADDRESS_SANITIZER) +endif() + +# Build only for local GPU architecture +if (BUILD_LOCAL_GPU_TARGET_ONLY) + message(STATUS "Building only for local GPU target") + if (COMMAND rocm_local_targets) + rocm_local_targets(DEFAULT_GPUS) + else() + message(WARNING "Unable to determine local GPU targets. Falling back to default GPUs.") + endif() +endif() + + +set(AMDGPU_TARGETS "${DEFAULT_GPUS}" CACHE STRING "Target default GPUs if AMDGPU_TARGETS is not defined.") +rocm_check_target_ids(AMDGPU_TARGETS TARGETS "${AMDGPU_TARGETS}") + # Use target ID syntax if supported for AMDGPU_TARGETS rocm_check_target_ids(DEFAULT_AMDGPU_TARGETS TARGETS "gfx803;gfx900;gfx906;gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1151;gfx1200;gfx1201") diff --git a/library/src/rtc_cache.cpp b/library/src/rtc_cache.cpp index 273ebc35..fdcd8472 100644 --- a/library/src/rtc_cache.cpp +++ b/library/src/rtc_cache.cpp @@ -565,6 +565,11 @@ std::vector RTCCache::cached_compile(const std::string& kernel_na kernel_src_gen_t generate_src, const std::array& generator_sum) { +#ifdef ADDRESS_SANITIZER + // The address sanitizer is reported to work better when we include xnack+, so don't strip this + // from the architecture string when building: + const std::string gpu_arch = gpu_arch_with_flags; +#else // Supplied gpu arch may have extra flags on it // (e.g. gfx90a:sramecc+:xnack-), Strip those from the arch name // since omitting them will generate code that handles either @@ -573,7 +578,8 @@ std::vector RTCCache::cached_compile(const std::string& kernel_na // As of this writing, there are no known performance benefits to // including the flags. If that changes, we may need to be more // selective about which flags to strip. - std::string gpu_arch = gpu_arch_strip_flags(gpu_arch_with_flags); + const std::string gpu_arch = gpu_arch_strip_flags(gpu_arch_with_flags); +#endif std::shared_future> result; diff --git a/library/src/rtc_compile.cpp b/library/src/rtc_compile.cpp index 711eafd3..2e831f7f 100644 --- a/library/src/rtc_compile.cpp +++ b/library/src/rtc_compile.cpp @@ -36,11 +36,13 @@ std::vector compile_inprocess(const std::string& kernel_src, const std::st std::string gpu_arch_arg = "--gpu-architecture=" + gpu_arch; std::vector options; - options.reserve(4); options.push_back("-O3"); options.push_back("-std=c++14"); options.push_back(gpu_arch_arg.c_str()); options.push_back("-mcumode"); +#ifdef ADDRESS_SANITIZER + options.push_back("-fsanitize=address"); +#endif auto compileResult = hiprtcCompileProgram(prog, options.size(), options.data()); if(compileResult != HIPRTC_SUCCESS)