From e047204465e75a62904f95152732c30026534111 Mon Sep 17 00:00:00 2001 From: Maneesh Gupta Date: Fri, 10 Mar 2023 09:03:34 +0000 Subject: [PATCH] SWDEV-380035 - Check for agent and ptr match for hsa LOCKED ptr Also do not create Arena Memobj for pinned memory Change-Id: Ibecfe90c62cfa252e3da45408041f3d1cb3acbbb --- device/rocm/rocdevice.cpp | 32 ++++++++++++++++++++------------ device/rocm/rocdevice.hpp | 2 +- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/device/rocm/rocdevice.cpp b/device/rocm/rocdevice.cpp index 34395a24..69d5ff05 100644 --- a/device/rocm/rocdevice.cpp +++ b/device/rocm/rocdevice.cpp @@ -3221,7 +3221,9 @@ device::Signal* Device::createSignal() const { amd::Memory* Device::GetArenaMemObj(const void* ptr, size_t& offset, size_t size) { // Only create arena_mem_object if CPU memory is accessible from HMM // or if runtime received an interop from another ROCr's client - if (!info_.hmmCpuMemoryAccessible_ && !IsValidAllocation(ptr, size)) { + hsa_amd_pointer_info_t ptr_info = {}; + ptr_info.size = sizeof(hsa_amd_pointer_info_t); + if (!info_.hmmCpuMemoryAccessible_ && !IsValidAllocation(ptr, size, &ptr_info)) { return nullptr; } @@ -3238,8 +3240,9 @@ amd::Memory* Device::GetArenaMemObj(const void* ptr, size_t& offset, size_t size } // Calculate the offset of the pointer. - const void* dev_ptr = reinterpret_cast(arena_mem_obj_->getDeviceMemory( - *arena_mem_obj_->getContext().devices()[0])->virtualAddress()); + const void* dev_ptr = reinterpret_cast( + arena_mem_obj_->getDeviceMemory(*arena_mem_obj_->getContext().devices()[0]) + ->virtualAddress()); offset = reinterpret_cast(ptr) - reinterpret_cast(dev_ptr); return arena_mem_obj_; @@ -3253,20 +3256,25 @@ void Device::ReleaseGlobalSignal(void* signal) const { } // ================================================================================================ -bool Device::IsValidAllocation(const void* dev_ptr, size_t size) const { - hsa_amd_pointer_info_t ptr_info = {}; - ptr_info.size = sizeof(hsa_amd_pointer_info_t); +bool Device::IsValidAllocation(const void* dev_ptr, size_t size, hsa_amd_pointer_info_t* ptr_info) { // Query ptr type to see if it's a HMM allocation - hsa_status_t status = hsa_amd_pointer_info( - const_cast(dev_ptr), &ptr_info, nullptr, nullptr, nullptr); + hsa_status_t status = + hsa_amd_pointer_info(const_cast(dev_ptr), ptr_info, nullptr, nullptr, nullptr); // The call should never fail in ROCR, but just check for an error and continue if (status != HSA_STATUS_SUCCESS) { LogError("hsa_amd_pointer_info() failed"); } - // Check if it's a legacy non-HMM allocation in ROCr - if (ptr_info.type != HSA_EXT_POINTER_TYPE_UNKNOWN) { - if ((size != 0) && ((reinterpret_cast(dev_ptr) - - reinterpret_cast(ptr_info.agentBaseAddress)) > size)) { + + // Return false for pinned memory. A true return may result in a race because + // ROCclr may attempt to do a pin/copy/unpin underneath in a multithreaded environment + if (ptr_info->type == HSA_EXT_POINTER_TYPE_LOCKED) { + return false; + } + + if (ptr_info->type != HSA_EXT_POINTER_TYPE_UNKNOWN) { + if ((size != 0) && + ((reinterpret_cast(dev_ptr) - + reinterpret_cast(ptr_info->agentBaseAddress)) > size)) { return false; } return true; diff --git a/device/rocm/rocdevice.hpp b/device/rocm/rocdevice.hpp index 9619abe6..43898c8e 100644 --- a/device/rocm/rocdevice.hpp +++ b/device/rocm/rocdevice.hpp @@ -551,7 +551,7 @@ class Device : public NullDevice { const bool isFineGrainSupported() const; //! Returns True if memory pointer is known to ROCr (excludes HMM allocations) - bool IsValidAllocation(const void* dev_ptr, size_t size) const; + bool IsValidAllocation(const void* dev_ptr, size_t size, hsa_amd_pointer_info_t* ptr_info); //! Allocates hidden heap for device memory allocations void HiddenHeapAlloc();