From 131d6644b9bfa98e6d2fd8a6524a6c9253bbde78 Mon Sep 17 00:00:00 2001 From: "Zhao, Yang2" Date: Thu, 2 Jan 2025 15:16:30 +0100 Subject: [PATCH 01/15] wip --- .../layers/sanitizer/asan/asan_shadow.cpp | 8 +- .../loader/layers/sanitizer/msan/msan_ddi.cpp | 186 +++++++++++++ .../sanitizer/msan/msan_interceptor.hpp | 1 + .../layers/sanitizer/msan/msan_shadow.cpp | 255 ++++++++++++------ .../layers/sanitizer/msan/msan_shadow.hpp | 47 +++- .../sanitizer_common/sanitizer_utils.cpp | 11 +- .../sanitizer_common/sanitizer_utils.hpp | 2 +- 7 files changed, 411 insertions(+), 99 deletions(-) diff --git a/source/loader/layers/sanitizer/asan/asan_shadow.cpp b/source/loader/layers/sanitizer/asan/asan_shadow.cpp index 145fd232c1..2e5b0db0cc 100644 --- a/source/loader/layers/sanitizer/asan/asan_shadow.cpp +++ b/source/loader/layers/sanitizer/asan/asan_shadow.cpp @@ -223,7 +223,7 @@ ur_result_t ShadowMemoryGPU::EnqueuePoisonShadow(ur_queue_handle_t Queue, (void *)(MappedPtr + PageSize - 1)); // Initialize to zero - URes = EnqueueUSMBlockingSet(Queue, (void *)MappedPtr, 0, + URes = EnqueueUSMSet(Queue, (void *)MappedPtr, 0, PageSize); if (URes != UR_RESULT_SUCCESS) { getContext()->logger.error("EnqueueUSMBlockingSet(): {}", @@ -236,7 +236,7 @@ ur_result_t ShadowMemoryGPU::EnqueuePoisonShadow(ur_queue_handle_t Queue, } } - auto URes = EnqueueUSMBlockingSet(Queue, (void *)ShadowBegin, Value, + auto URes = EnqueueUSMSet(Queue, (void *)ShadowBegin, Value, ShadowEnd - ShadowBegin + 1); getContext()->logger.debug( "EnqueuePoisonShadow (addr={}, count={}, value={}): {}", @@ -272,7 +272,7 @@ ur_result_t ShadowMemoryGPU::AllocLocalShadow(ur_queue_handle_t Queue, (void **)&LocalShadowOffset)); // Initialize shadow memory - ur_result_t URes = EnqueueUSMBlockingSet( + ur_result_t URes = EnqueueUSMSet( Queue, (void *)LocalShadowOffset, 0, RequiredShadowSize); if (URes != UR_RESULT_SUCCESS) { UR_CALL(getContext()->urDdiTable.USM.pfnFree( @@ -312,7 +312,7 @@ ur_result_t ShadowMemoryGPU::AllocPrivateShadow(ur_queue_handle_t Queue, (void **)&PrivateShadowOffset)); // Initialize shadow memory - ur_result_t URes = EnqueueUSMBlockingSet( + ur_result_t URes = EnqueueUSMSet( Queue, (void *)PrivateShadowOffset, 0, RequiredShadowSize); if (URes != UR_RESULT_SUCCESS) { UR_CALL(getContext()->urDdiTable.USM.pfnFree( diff --git a/source/loader/layers/sanitizer/msan/msan_ddi.cpp b/source/loader/layers/sanitizer/msan/msan_ddi.cpp index 87438a1f99..6476128161 100644 --- a/source/loader/layers/sanitizer/msan/msan_ddi.cpp +++ b/source/loader/layers/sanitizer/msan/msan_ddi.cpp @@ -110,6 +110,17 @@ ur_result_t urUSMDeviceAlloc( pool, size, ppMem); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urUSMFree +__urdlllocal ur_result_t UR_APICALL urUSMFree( + ur_context_handle_t hContext, ///< [in] handle of the context object + void *pMem ///< [in] pointer to USM memory object +) { + getContext()->logger.debug("==== urUSMFree"); + + return getMsanInterceptor()->releaseMemory(hContext, pMem); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urProgramCreateWithIL ur_result_t urProgramCreateWithIL( @@ -1271,6 +1282,176 @@ ur_result_t urKernelSetArgMemObj( return UR_RESULT_SUCCESS; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueUSMFill +ur_result_t UR_APICALL urEnqueueUSMFill( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + void *pMem, ///< [in][bounds(0, size)] pointer to USM memory object + size_t + patternSize, ///< [in] the size in bytes of the pattern. Must be a power of 2 and less + ///< than or equal to width. + const void + *pPattern, ///< [in] pointer with the bytes of the pattern to set. + size_t + size, ///< [in] size in bytes to be set. Must be a multiple of patternSize. + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before this command can be executed. + ///< If nullptr, the numEventsInWaitList must be 0, indicating that this + ///< command does not wait on any event to complete. + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies this particular + ///< command instance. If phEventWaitList and phEvent are not NULL, phEvent + ///< must not refer to an element of the phEventWaitList array. +) { + auto pfnUSMFill = getContext()->urDdiTable.Enqueue.pfnUSMFill; + + getContext()->logger.debug("==== urEnqueueUSMFill"); + + auto Mem = (uptr)pMem; + auto MemInfoItOp = getMsanInterceptor()->findAllocInfoByAddress(Mem); + if (MemInfoItOp) { + auto MemInfo = (*MemInfoItOp)->second; + + const auto &DeviceInfo = + getMsanInterceptor()->getDeviceInfo(MemInfo->Device); + UR_CALL(DeviceInfo->Shadow->EnqueuePoisonShadow( + hQueue, Mem, size, 0, numEventsInWaitList, phEventWaitList, + phEvent)); + } + + return pfnUSMFill(hQueue, pMem, patternSize, pPattern, size, + numEventsInWaitList, phEventWaitList, phEvent); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueUSMMemcpy +ur_result_t UR_APICALL urEnqueueUSMMemcpy( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + bool blocking, ///< [in] blocking or non-blocking copy + void * + pDst, ///< [in][bounds(0, size)] pointer to the destination USM memory object + const void * + pSrc, ///< [in][bounds(0, size)] pointer to the source USM memory object + size_t size, ///< [in] size in bytes to be copied + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before this command can be executed. + ///< If nullptr, the numEventsInWaitList must be 0, indicating that this + ///< command does not wait on any event to complete. + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies this particular + ///< command instance. If phEventWaitList and phEvent are not NULL, phEvent + ///< must not refer to an element of the phEventWaitList array. +) { + auto pfnUSMMemcpy = getContext()->urDdiTable.Enqueue.pfnUSMMemcpy; + getContext()->logger.debug("==== pfnUSMMemcpy"); + + auto Src = (uptr)pSrc, Dst = (uptr)pDst; + auto SrcInfoItOp = getMsanInterceptor()->findAllocInfoByAddress(Src); + auto DstInfoItOp = getMsanInterceptor()->findAllocInfoByAddress(Dst); + + if (SrcInfoItOp && DstInfoItOp) { + auto SrcInfo = (*SrcInfoItOp)->second; + auto DstInfo = (*DstInfoItOp)->second; + + const auto &DeviceInfo = + getMsanInterceptor()->getDeviceInfo(SrcInfo->Device); + UR_CALL(DeviceInfo->Shadow->EnqueueCopyShadow( + hQueue, blocking, Dst, Src, size, numEventsInWaitList, + phEventWaitList, phEvent)); + } else if (DstInfoItOp) { + auto DstInfo = (*DstInfoItOp)->second; + + const auto &DeviceInfo = + getMsanInterceptor()->getDeviceInfo(DstInfo->Device); + UR_CALL(DeviceInfo->Shadow->EnqueuePoisonShadow( + hQueue, Dst, size, 0, numEventsInWaitList, phEventWaitList, + phEvent)); + } + + return pfnUSMMemcpy(hQueue, blocking, pDst, pSrc, size, numEventsInWaitList, + phEventWaitList, phEvent); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueUSMFill2D +ur_result_t UR_APICALL urEnqueueUSMFill2D( + ur_queue_handle_t hQueue, ///< [in] handle of the queue to submit to. + void * + pMem, ///< [in][bounds(0, pitch * height)] pointer to memory to be filled. + size_t + pitch, ///< [in] the total width of the destination memory including padding. + size_t + patternSize, ///< [in] the size in bytes of the pattern. Must be a power of 2 and less + ///< than or equal to width. + const void + *pPattern, ///< [in] pointer with the bytes of the pattern to set. + size_t + width, ///< [in] the width in bytes of each row to fill. Must be a multiple of + ///< patternSize. + size_t height, ///< [in] the height of the columns to fill. + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating that no wait event. + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies this particular + ///< kernel execution instance. If phEventWaitList and phEvent are not + ///< NULL, phEvent must not refer to an element of the phEventWaitList array. +) { + auto pfnUSMFill2D = getContext()->urDdiTable.Enqueue.pfnUSMFill2D; + getContext()->logger.debug("==== urEnqueueUSMFill2D"); + + auto Mem = (uptr)pMem; + auto MemInfoItOp = getMsanInterceptor()->findAllocInfoByAddress(Mem); + if (MemInfoItOp) { + auto MemInfo = (*MemInfoItOp)->second; + + const auto &DeviceInfo = + getMsanInterceptor()->getDeviceInfo(MemInfo->Device); + UR_CALL(DeviceInfo->Shadow->EnqueuePoisonShadow( + hQueue, Mem, width * height, 0, numEventsInWaitList, + phEventWaitList, phEvent)); + } + + return pfnUSMFill2D(hQueue, pMem, pitch, patternSize, pPattern, width, + height, numEventsInWaitList, phEventWaitList, phEvent); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueUSMMemcpy2D +ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( + ur_queue_handle_t hQueue, ///< [in] handle of the queue to submit to. + bool blocking, ///< [in] indicates if this operation should block the host. + void * + pDst, ///< [in][bounds(0, dstPitch * height)] pointer to memory where data will + ///< be copied. + size_t + dstPitch, ///< [in] the total width of the source memory including padding. + const void * + pSrc, ///< [in][bounds(0, srcPitch * height)] pointer to memory to be copied. + size_t + srcPitch, ///< [in] the total width of the source memory including padding. + size_t width, ///< [in] the width in bytes of each row to be copied. + size_t height, ///< [in] the height of columns to be copied. + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating that no wait event. + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies this particular + ///< kernel execution instance. If phEventWaitList and phEvent are not + ///< NULL, phEvent must not refer to an element of the phEventWaitList array. +) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Exported function for filling application's Global table /// with current process' addresses @@ -1429,6 +1610,10 @@ ur_result_t urGetEnqueueProcAddrTable( pDdiTable->pfnMemUnmap = ur_sanitizer_layer::msan::urEnqueueMemUnmap; pDdiTable->pfnKernelLaunch = ur_sanitizer_layer::msan::urEnqueueKernelLaunch; + pDdiTable->pfnUSMFill = ur_sanitizer_layer::msan::urEnqueueUSMFill; + pDdiTable->pfnUSMMemcpy = ur_sanitizer_layer::msan::urEnqueueUSMMemcpy; + pDdiTable->pfnUSMFill2D = ur_sanitizer_layer::msan::urEnqueueUSMFill2D; + pDdiTable->pfnUSMMemcpy2D = ur_sanitizer_layer::msan::urEnqueueUSMMemcpy2D; return result; } @@ -1446,6 +1631,7 @@ ur_result_t urGetUSMProcAddrTable( ur_result_t result = UR_RESULT_SUCCESS; pDdiTable->pfnDeviceAlloc = ur_sanitizer_layer::msan::urUSMDeviceAlloc; + pDdiTable->pfnFree = ur_sanitizer_layer::msan::urUSMFree; return result; } diff --git a/source/loader/layers/sanitizer/msan/msan_interceptor.hpp b/source/loader/layers/sanitizer/msan/msan_interceptor.hpp index 80dbf389a4..940655de98 100644 --- a/source/loader/layers/sanitizer/msan/msan_interceptor.hpp +++ b/source/loader/layers/sanitizer/msan/msan_interceptor.hpp @@ -181,6 +181,7 @@ class MsanInterceptor { const ur_usm_desc_t *Properties, ur_usm_pool_handle_t Pool, size_t Size, void **ResultPtr); + ur_result_t releaseMemory(ur_context_handle_t Context, void *Ptr); ur_result_t registerProgram(ur_program_handle_t Program); ur_result_t unregisterProgram(ur_program_handle_t Program); diff --git a/source/loader/layers/sanitizer/msan/msan_shadow.cpp b/source/loader/layers/sanitizer/msan/msan_shadow.cpp index 75866203f3..969e415628 100644 --- a/source/loader/layers/sanitizer/msan/msan_shadow.cpp +++ b/source/loader/layers/sanitizer/msan/msan_shadow.cpp @@ -111,22 +111,61 @@ uptr MsanShadowMemoryCPU::MemToShadow(uptr Ptr) { return Ptr ^ CPU_SHADOW_MASK; } -ur_result_t MsanShadowMemoryCPU::EnqueuePoisonShadow(ur_queue_handle_t, - uptr Ptr, uptr Size, - u8 Value) { +ur_result_t MsanShadowMemoryCPU::EnqueuePoisonShadow( + ur_queue_handle_t Queue, uptr Ptr, uptr Size, u8 Value, uint32_t NumEvents, + const ur_event_handle_t *EventWaitList, ur_event_handle_t *OutEvent) { + + if (Size) { + uptr ShadowBegin = MemToShadow(Ptr); + uptr ShadowEnd = MemToShadow(Ptr + Size - 1); + assert(ShadowBegin <= ShadowEnd); + getContext()->logger.debug( + "EnqueuePoisonShadow(addr={}, count={}, value={})", + (void *)ShadowBegin, ShadowEnd - ShadowBegin + 1, + (void *)(size_t)Value); + memset((void *)ShadowBegin, Value, ShadowEnd - ShadowBegin + 1); + } + + if (OutEvent) { + UR_CALL(getContext()->urDdiTable.Enqueue.pfnEventsWait( + Queue, NumEvents, EventWaitList, OutEvent)); + } + return UR_RESULT_SUCCESS; +} + +ur_result_t MsanShadowMemoryCPU::EnqueueCopyShadow( + ur_queue_handle_t Queue, bool Blocking, uptr Dst, uptr Src, uptr Size, + uint32_t NumEvents, const ur_event_handle_t *EventWaitList, + ur_event_handle_t *OutEvent) { if (Size == 0) { + if (OutEvent) { + UR_CALL(getContext()->urDdiTable.Enqueue.pfnEventsWait( + Queue, NumEvents, EventWaitList, OutEvent)); + } return UR_RESULT_SUCCESS; } - uptr ShadowBegin = MemToShadow(Ptr); - uptr ShadowEnd = MemToShadow(Ptr + Size - 1); - assert(ShadowBegin <= ShadowEnd); - getContext()->logger.debug( - "EnqueuePoisonShadow(addr={}, count={}, value={})", (void *)ShadowBegin, - ShadowEnd - ShadowBegin + 1, (void *)(size_t)Value); - memset((void *)ShadowBegin, Value, ShadowEnd - ShadowBegin + 1); + uptr SrcShadowBegin = MemToShadow(Src); + uptr SrcShadowEnd = MemToShadow(Src + Size - 1); + assert(SrcShadowBegin <= SrcShadowEnd); - return UR_RESULT_SUCCESS; + uptr DstShadowBegin = MemToShadow(Dst); + uptr DstShadowEnd = MemToShadow(Dst + Size - 1); + assert(DstShadowBegin <= DstShadowEnd); + + assert(SrcShadowEnd - SrcShadowBegin == DstShadowEnd - DstShadowBegin); + + // FIXME: host asan will not support to use this function + auto Result = getContext()->urDdiTable.Enqueue.pfnUSMMemcpy( + Queue, Blocking, (void *)DstShadowBegin, (void *)SrcShadowBegin, + SrcShadowEnd - SrcShadowBegin + 1, NumEvents, EventWaitList, OutEvent); + + getContext()->logger.debug("EnqueueCopyShadow(dst={}, src={}, size={}): {}", + (void *)DstShadowBegin, (void *)SrcShadowBegin, + (void *)(SrcShadowEnd - SrcShadowBegin + 1), + Result); + + return Result; } ur_result_t MsanShadowMemoryGPU::Setup() { @@ -169,88 +208,144 @@ ur_result_t MsanShadowMemoryGPU::Destory() { return Result; } -ur_result_t MsanShadowMemoryGPU::EnqueuePoisonShadow(ur_queue_handle_t Queue, - uptr Ptr, uptr Size, - u8 Value) { - if (Size == 0) { - return UR_RESULT_SUCCESS; - } +ur_result_t MsanShadowMemoryGPU::EnqueueMappingShadow( + ur_queue_handle_t Queue, uptr Ptr, uptr Size, + std::vector &EventWaitList, + ur_event_handle_t *OutEvent) { + + ur_physical_mem_properties_t Desc{UR_STRUCTURE_TYPE_PHYSICAL_MEM_PROPERTIES, + nullptr, 0}; + + const size_t PageSize = GetVirtualMemGranularity(Context, Device); - uptr ShadowBegin = MemToShadow(Ptr); - uptr ShadowEnd = MemToShadow(Ptr + Size - 1); + const uptr ShadowBegin = MemToShadow(Ptr); + const uptr ShadowEnd = MemToShadow(Ptr + Size - 1); assert(ShadowBegin <= ShadowEnd); - { - static const size_t PageSize = - GetVirtualMemGranularity(Context, Device); - - ur_physical_mem_properties_t Desc{ - UR_STRUCTURE_TYPE_PHYSICAL_MEM_PROPERTIES, nullptr, 0}; - - // Make sure [Ptr, Ptr + Size] is mapped to physical memory - for (auto MappedPtr = RoundDownTo(ShadowBegin, PageSize); - MappedPtr <= ShadowEnd; MappedPtr += PageSize) { - std::scoped_lock Guard(VirtualMemMapsMutex); - if (VirtualMemMaps.find(MappedPtr) == VirtualMemMaps.end()) { - ur_physical_mem_handle_t PhysicalMem{}; - auto URes = getContext()->urDdiTable.PhysicalMem.pfnCreate( - Context, Device, PageSize, &Desc, &PhysicalMem); - if (URes != UR_RESULT_SUCCESS) { - getContext()->logger.error("urPhysicalMemCreate(): {}", - URes); - return URes; - } - - URes = getContext()->urDdiTable.VirtualMem.pfnMap( - Context, (void *)MappedPtr, PageSize, PhysicalMem, 0, - UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE); - if (URes != UR_RESULT_SUCCESS) { - getContext()->logger.error("urVirtualMemMap({}, {}): {}", - (void *)MappedPtr, PageSize, - URes); - return URes; - } - - getContext()->logger.debug("urVirtualMemMap: {} ~ {}", - (void *)MappedPtr, - (void *)(MappedPtr + PageSize - 1)); - - // Initialize to zero - URes = EnqueueUSMBlockingSet(Queue, (void *)MappedPtr, 0, - PageSize); - if (URes != UR_RESULT_SUCCESS) { - getContext()->logger.error("EnqueueUSMBlockingSet(): {}", - URes); - return URes; - } - - VirtualMemMaps[MappedPtr].first = PhysicalMem; + + // Make sure [Ptr, Ptr + Size] is mapped to physical memory + for (auto MappedPtr = RoundDownTo(ShadowBegin, PageSize); + MappedPtr <= ShadowEnd; MappedPtr += PageSize) { + std::scoped_lock Guard(VirtualMemMapsMutex); + if (VirtualMemMaps.find(MappedPtr) == VirtualMemMaps.end()) { + ur_physical_mem_handle_t PhysicalMem{}; + auto URes = getContext()->urDdiTable.PhysicalMem.pfnCreate( + Context, Device, PageSize, &Desc, &PhysicalMem); + if (URes != UR_RESULT_SUCCESS) { + getContext()->logger.error("urPhysicalMemCreate(): {}", URes); + return URes; } - // We don't need to record virtual memory map for null pointer, - // since it doesn't have an alloc info. - if (Ptr == 0) { - continue; + URes = getContext()->urDdiTable.VirtualMem.pfnMap( + Context, (void *)MappedPtr, PageSize, PhysicalMem, 0, + UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE); + if (URes != UR_RESULT_SUCCESS) { + getContext()->logger.error("urVirtualMemMap({}, {}): {}", + (void *)MappedPtr, PageSize, URes); + return URes; } - auto AllocInfoIt = - getMsanInterceptor()->findAllocInfoByAddress(Ptr); - assert(AllocInfoIt); - VirtualMemMaps[MappedPtr].second.insert((*AllocInfoIt)->second); + getContext()->logger.debug("urVirtualMemMap: {} ~ {}", + (void *)MappedPtr, + (void *)(MappedPtr + PageSize - 1)); + + // Initialize to zero + URes = EnqueueUSMSet(Queue, (void *)MappedPtr, 0, PageSize, + EventWaitList.size(), EventWaitList.data(), + OutEvent); + if (URes != UR_RESULT_SUCCESS) { + getContext()->logger.error("EnqueueUSMSet(): {}", URes); + return URes; + } + + EventWaitList.clear(); + EventWaitList.push_back(*OutEvent); + + VirtualMemMaps[MappedPtr].first = PhysicalMem; } + + // We don't need to record virtual memory map for null pointer, + // since it doesn't have an alloc info. + if (Ptr == 0) { + continue; + } + + auto AllocInfoIt = getMsanInterceptor()->findAllocInfoByAddress(Ptr); + assert(AllocInfoIt); + VirtualMemMaps[MappedPtr].second.insert((*AllocInfoIt)->second); + } + + return UR_RESULT_SUCCESS; +} + +ur_result_t MsanShadowMemoryGPU::EnqueuePoisonShadow( + ur_queue_handle_t Queue, uptr Ptr, uptr Size, u8 Value, uint32_t NumEvents, + const ur_event_handle_t *EventWaitList, ur_event_handle_t *OutEvent) { + if (Size == 0) { + if (OutEvent) { + UR_CALL(getContext()->urDdiTable.Enqueue.pfnEventsWait( + Queue, NumEvents, EventWaitList, OutEvent)); + } + return UR_RESULT_SUCCESS; } - auto URes = EnqueueUSMBlockingSet(Queue, (void *)ShadowBegin, Value, - ShadowEnd - ShadowBegin + 1); + std::vector Events(EventWaitList, + EventWaitList + NumEvents); + UR_CALL(EnqueueMappingShadow(Queue, Ptr, Size, Events, OutEvent)); + + const uptr ShadowBegin = MemToShadow(Ptr); + const uptr ShadowEnd = MemToShadow(Ptr + Size - 1); + assert(ShadowBegin <= ShadowEnd); + + auto Result = EnqueueUSMSet(Queue, (void *)ShadowBegin, Value, + ShadowEnd - ShadowBegin + 1, Events.size(), + Events.data(), OutEvent); + getContext()->logger.debug( - "EnqueuePoisonShadow (addr={}, count={}, value={}): {}", + "EnqueuePoisonShadow(addr={}, count={}, value={}): {}", (void *)ShadowBegin, ShadowEnd - ShadowBegin + 1, (void *)(size_t)Value, - URes); - if (URes != UR_RESULT_SUCCESS) { - getContext()->logger.error("EnqueueUSMBlockingSet(): {}", URes); - return URes; + Result); + + return Result; +} + +ur_result_t MsanShadowMemoryGPU::EnqueueCopyShadow( + ur_queue_handle_t Queue, bool Blocking, uptr Dst, uptr Src, uptr Size, + uint32_t NumEvents, const ur_event_handle_t *EventWaitList, + ur_event_handle_t *OutEvent) { + if (Size == 0) { + if (OutEvent) { + UR_CALL(getContext()->urDdiTable.Enqueue.pfnEventsWait( + Queue, NumEvents, EventWaitList, OutEvent)); + } + return UR_RESULT_SUCCESS; } - return UR_RESULT_SUCCESS; + std::vector Events(EventWaitList, + EventWaitList + NumEvents); + UR_CALL(EnqueueMappingShadow(Queue, Src, Size, Events, OutEvent)); + UR_CALL(EnqueueMappingShadow(Queue, Dst, Size, Events, OutEvent)); + + const uptr SrcShadowBegin = MemToShadow(Src); + const uptr SrcShadowEnd = MemToShadow(Src + Size - 1); + assert(SrcShadowBegin <= SrcShadowEnd); + + const uptr DstShadowBegin = MemToShadow(Dst); + const uptr DstShadowEnd = MemToShadow(Dst + Size - 1); + assert(DstShadowBegin <= DstShadowEnd); + + assert(DstShadowEnd - DstShadowBegin == SrcShadowEnd - SrcShadowBegin); + + auto Result = getContext()->urDdiTable.Enqueue.pfnUSMMemcpy( + Queue, Blocking, (void *)DstShadowBegin, (void *)SrcShadowBegin, + SrcShadowEnd - SrcShadowBegin + 1, Events.size(), Events.data(), + OutEvent); + + getContext()->logger.debug("EnqueueCopyShadow(dst={}, src={}, size={}): {}", + (void *)DstShadowBegin, (void *)SrcShadowBegin, + (void *)(SrcShadowEnd - SrcShadowBegin + 1), + Result); + + return Result; } ur_result_t diff --git a/source/loader/layers/sanitizer/msan/msan_shadow.hpp b/source/loader/layers/sanitizer/msan/msan_shadow.hpp index de13683cbc..194f27f60d 100644 --- a/source/loader/layers/sanitizer/msan/msan_shadow.hpp +++ b/source/loader/layers/sanitizer/msan/msan_shadow.hpp @@ -32,8 +32,17 @@ struct MsanShadowMemory { virtual uptr MemToShadow(uptr Ptr) = 0; - virtual ur_result_t EnqueuePoisonShadow(ur_queue_handle_t Queue, uptr Ptr, - uptr Size, u8 Value) = 0; + virtual ur_result_t + EnqueuePoisonShadow(ur_queue_handle_t Queue, uptr Ptr, uptr Size, u8 Value, + uint32_t NumEvents = 0, + const ur_event_handle_t *EventWaitList = nullptr, + ur_event_handle_t *OutEvent = nullptr) = 0; + + virtual ur_result_t + EnqueueCopyShadow(ur_queue_handle_t Queue, bool Blocking, uptr Dst, + uptr Src, uptr Size, uint32_t NumEvents = 0, + const ur_event_handle_t *EventWaitList = nullptr, + ur_event_handle_t *OutEvent = nullptr) = 0; virtual ur_result_t ReleaseShadow(std::shared_ptr) { return UR_RESULT_SUCCESS; @@ -74,8 +83,17 @@ struct MsanShadowMemoryCPU final : public MsanShadowMemory { uptr MemToShadow(uptr Ptr) override; - ur_result_t EnqueuePoisonShadow(ur_queue_handle_t Queue, uptr Ptr, - uptr Size, u8 Value) override; + ur_result_t + EnqueuePoisonShadow(ur_queue_handle_t Queue, uptr Ptr, uptr Size, u8 Value, + uint32_t NumEvents = 0, + const ur_event_handle_t *EventWaitList = nullptr, + ur_event_handle_t *OutEvent = nullptr) override; + + ur_result_t + EnqueueCopyShadow(ur_queue_handle_t Queue, bool Blocking, uptr Dst, + uptr Src, uptr Size, uint32_t NumEvents = 0, + const ur_event_handle_t *EventWaitList = nullptr, + ur_event_handle_t *OutEvent = nullptr) override; }; struct MsanShadowMemoryGPU : public MsanShadowMemory { @@ -85,19 +103,34 @@ struct MsanShadowMemoryGPU : public MsanShadowMemory { ur_result_t Setup() override; ur_result_t Destory() override; - ur_result_t EnqueuePoisonShadow(ur_queue_handle_t Queue, uptr Ptr, - uptr Size, u8 Value) override final; + + ur_result_t + EnqueuePoisonShadow(ur_queue_handle_t Queue, uptr Ptr, uptr Size, u8 Value, + uint32_t NumEvents = 0, + const ur_event_handle_t *EventWaitList = nullptr, + ur_event_handle_t *OutEvent = nullptr) override final; + + ur_result_t + EnqueueCopyShadow(ur_queue_handle_t Queue, bool Blocking, uptr Dst, + uptr Src, uptr Size, uint32_t NumEvents = 0, + const ur_event_handle_t *EventWaitList = nullptr, + ur_event_handle_t *OutEvent = nullptr) override final; ur_result_t ReleaseShadow(std::shared_ptr AI) override final; virtual size_t GetShadowSize() = 0; - ur_mutex VirtualMemMapsMutex; + private: + ur_result_t + EnqueueMappingShadow(ur_queue_handle_t Queue, uptr Ptr, uptr Size, + std::vector &EventWaitList, + ur_event_handle_t *OutEvent); std::unordered_map< uptr, std::pair>>> VirtualMemMaps; + ur_mutex VirtualMemMapsMutex; }; /// Shadow Memory layout of GPU PVC device diff --git a/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp b/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp index 900eae405b..97bcaa7bc3 100644 --- a/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp +++ b/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp @@ -243,13 +243,10 @@ size_t GetVirtualMemGranularity(ur_context_handle_t Context, return Size; } -ur_result_t EnqueueUSMBlockingSet(ur_queue_handle_t Queue, void *Ptr, - char Value, size_t Size, uint32_t NumEvents, - const ur_event_handle_t *EventWaitList, - ur_event_handle_t *OutEvent) { - if (Size == 0) { - return UR_RESULT_SUCCESS; - } +ur_result_t EnqueueUSMSet(ur_queue_handle_t Queue, void *Ptr, char Value, + size_t Size, uint32_t NumEvents, + const ur_event_handle_t *EventWaitList, + ur_event_handle_t *OutEvent) { return getContext()->urDdiTable.Enqueue.pfnUSMFill( Queue, Ptr, 1, &Value, Size, NumEvents, EventWaitList, OutEvent); } diff --git a/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp b/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp index 6fcb05894e..f5b14c38b0 100644 --- a/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp +++ b/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp @@ -59,7 +59,7 @@ size_t GetVirtualMemGranularity(ur_context_handle_t Context, ur_device_handle_t Device); ur_result_t -EnqueueUSMBlockingSet(ur_queue_handle_t Queue, void *Ptr, char Value, +EnqueueUSMSet(ur_queue_handle_t Queue, void *Ptr, char Value, size_t Size, uint32_t NumEvents = 0, const ur_event_handle_t *EventWaitList = nullptr, ur_event_handle_t *OutEvent = nullptr); From 0afd6cd05da77e166f00a30deb1e24eaaafe7ab9 Mon Sep 17 00:00:00 2001 From: "Zhao, Yang2" Date: Thu, 2 Jan 2025 15:18:03 +0100 Subject: [PATCH 02/15] wip --- source/loader/layers/sanitizer/asan/asan_shadow.cpp | 8 ++++---- .../layers/sanitizer/msan/msan_interceptor.cpp | 13 +++++++++++++ source/loader/layers/sanitizer/msan/msan_shadow.cpp | 4 ++-- .../sanitizer/sanitizer_common/sanitizer_utils.cpp | 2 +- .../sanitizer/sanitizer_common/sanitizer_utils.hpp | 2 +- 5 files changed, 21 insertions(+), 8 deletions(-) diff --git a/source/loader/layers/sanitizer/asan/asan_shadow.cpp b/source/loader/layers/sanitizer/asan/asan_shadow.cpp index 2e5b0db0cc..145fd232c1 100644 --- a/source/loader/layers/sanitizer/asan/asan_shadow.cpp +++ b/source/loader/layers/sanitizer/asan/asan_shadow.cpp @@ -223,7 +223,7 @@ ur_result_t ShadowMemoryGPU::EnqueuePoisonShadow(ur_queue_handle_t Queue, (void *)(MappedPtr + PageSize - 1)); // Initialize to zero - URes = EnqueueUSMSet(Queue, (void *)MappedPtr, 0, + URes = EnqueueUSMBlockingSet(Queue, (void *)MappedPtr, 0, PageSize); if (URes != UR_RESULT_SUCCESS) { getContext()->logger.error("EnqueueUSMBlockingSet(): {}", @@ -236,7 +236,7 @@ ur_result_t ShadowMemoryGPU::EnqueuePoisonShadow(ur_queue_handle_t Queue, } } - auto URes = EnqueueUSMSet(Queue, (void *)ShadowBegin, Value, + auto URes = EnqueueUSMBlockingSet(Queue, (void *)ShadowBegin, Value, ShadowEnd - ShadowBegin + 1); getContext()->logger.debug( "EnqueuePoisonShadow (addr={}, count={}, value={}): {}", @@ -272,7 +272,7 @@ ur_result_t ShadowMemoryGPU::AllocLocalShadow(ur_queue_handle_t Queue, (void **)&LocalShadowOffset)); // Initialize shadow memory - ur_result_t URes = EnqueueUSMSet( + ur_result_t URes = EnqueueUSMBlockingSet( Queue, (void *)LocalShadowOffset, 0, RequiredShadowSize); if (URes != UR_RESULT_SUCCESS) { UR_CALL(getContext()->urDdiTable.USM.pfnFree( @@ -312,7 +312,7 @@ ur_result_t ShadowMemoryGPU::AllocPrivateShadow(ur_queue_handle_t Queue, (void **)&PrivateShadowOffset)); // Initialize shadow memory - ur_result_t URes = EnqueueUSMSet( + ur_result_t URes = EnqueueUSMBlockingSet( Queue, (void *)PrivateShadowOffset, 0, RequiredShadowSize); if (URes != UR_RESULT_SUCCESS) { UR_CALL(getContext()->urDdiTable.USM.pfnFree( diff --git a/source/loader/layers/sanitizer/msan/msan_interceptor.cpp b/source/loader/layers/sanitizer/msan/msan_interceptor.cpp index b9fd9d1ed6..00f35d82e2 100644 --- a/source/loader/layers/sanitizer/msan/msan_interceptor.cpp +++ b/source/loader/layers/sanitizer/msan/msan_interceptor.cpp @@ -82,6 +82,19 @@ ur_result_t MsanInterceptor::allocateMemory(ur_context_handle_t Context, return UR_RESULT_SUCCESS; } +ur_result_t MsanInterceptor::releaseMemory(ur_context_handle_t Context, + void *Ptr) { + auto Addr = reinterpret_cast(Ptr); + auto AddrInfoItOp = findAllocInfoByAddress(Addr); + + if (AddrInfoItOp) { + std::scoped_lock Guard(m_AllocationMapMutex); + m_AllocationMap.erase(*AddrInfoItOp); + } + + return getContext()->urDdiTable.USM.pfnFree(Context, Ptr); +} + ur_result_t MsanInterceptor::preLaunchKernel(ur_kernel_handle_t Kernel, ur_queue_handle_t Queue, USMLaunchInfo &LaunchInfo) { diff --git a/source/loader/layers/sanitizer/msan/msan_shadow.cpp b/source/loader/layers/sanitizer/msan/msan_shadow.cpp index 969e415628..d3282f5d63 100644 --- a/source/loader/layers/sanitizer/msan/msan_shadow.cpp +++ b/source/loader/layers/sanitizer/msan/msan_shadow.cpp @@ -249,7 +249,7 @@ ur_result_t MsanShadowMemoryGPU::EnqueueMappingShadow( (void *)(MappedPtr + PageSize - 1)); // Initialize to zero - URes = EnqueueUSMSet(Queue, (void *)MappedPtr, 0, PageSize, + URes = EnqueueUSMBlockingSet(Queue, (void *)MappedPtr, 0, PageSize, EventWaitList.size(), EventWaitList.data(), OutEvent); if (URes != UR_RESULT_SUCCESS) { @@ -296,7 +296,7 @@ ur_result_t MsanShadowMemoryGPU::EnqueuePoisonShadow( const uptr ShadowEnd = MemToShadow(Ptr + Size - 1); assert(ShadowBegin <= ShadowEnd); - auto Result = EnqueueUSMSet(Queue, (void *)ShadowBegin, Value, + auto Result = EnqueueUSMBlockingSet(Queue, (void *)ShadowBegin, Value, ShadowEnd - ShadowBegin + 1, Events.size(), Events.data(), OutEvent); diff --git a/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp b/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp index 97bcaa7bc3..86c0286c62 100644 --- a/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp +++ b/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp @@ -243,7 +243,7 @@ size_t GetVirtualMemGranularity(ur_context_handle_t Context, return Size; } -ur_result_t EnqueueUSMSet(ur_queue_handle_t Queue, void *Ptr, char Value, +ur_result_t EnqueueUSMBlockingSet(ur_queue_handle_t Queue, void *Ptr, char Value, size_t Size, uint32_t NumEvents, const ur_event_handle_t *EventWaitList, ur_event_handle_t *OutEvent) { diff --git a/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp b/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp index f5b14c38b0..6fcb05894e 100644 --- a/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp +++ b/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp @@ -59,7 +59,7 @@ size_t GetVirtualMemGranularity(ur_context_handle_t Context, ur_device_handle_t Device); ur_result_t -EnqueueUSMSet(ur_queue_handle_t Queue, void *Ptr, char Value, +EnqueueUSMBlockingSet(ur_queue_handle_t Queue, void *Ptr, char Value, size_t Size, uint32_t NumEvents = 0, const ur_event_handle_t *EventWaitList = nullptr, ur_event_handle_t *OutEvent = nullptr); From afdfb91014e635f98b04ea8abd0f1aa1b3a2484b Mon Sep 17 00:00:00 2001 From: "Zhao, Yang2" Date: Thu, 2 Jan 2025 15:23:09 +0100 Subject: [PATCH 03/15] wip --- source/loader/layers/sanitizer/msan/msan_shadow.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/source/loader/layers/sanitizer/msan/msan_shadow.cpp b/source/loader/layers/sanitizer/msan/msan_shadow.cpp index d3282f5d63..a41332994d 100644 --- a/source/loader/layers/sanitizer/msan/msan_shadow.cpp +++ b/source/loader/layers/sanitizer/msan/msan_shadow.cpp @@ -116,8 +116,8 @@ ur_result_t MsanShadowMemoryCPU::EnqueuePoisonShadow( const ur_event_handle_t *EventWaitList, ur_event_handle_t *OutEvent) { if (Size) { - uptr ShadowBegin = MemToShadow(Ptr); - uptr ShadowEnd = MemToShadow(Ptr + Size - 1); + const uptr ShadowBegin = MemToShadow(Ptr); + const uptr ShadowEnd = MemToShadow(Ptr + Size - 1); assert(ShadowBegin <= ShadowEnd); getContext()->logger.debug( "EnqueuePoisonShadow(addr={}, count={}, value={})", @@ -145,12 +145,12 @@ ur_result_t MsanShadowMemoryCPU::EnqueueCopyShadow( return UR_RESULT_SUCCESS; } - uptr SrcShadowBegin = MemToShadow(Src); - uptr SrcShadowEnd = MemToShadow(Src + Size - 1); + const uptr SrcShadowBegin = MemToShadow(Src); + const uptr SrcShadowEnd = MemToShadow(Src + Size - 1); assert(SrcShadowBegin <= SrcShadowEnd); - uptr DstShadowBegin = MemToShadow(Dst); - uptr DstShadowEnd = MemToShadow(Dst + Size - 1); + const uptr DstShadowBegin = MemToShadow(Dst); + const uptr DstShadowEnd = MemToShadow(Dst + Size - 1); assert(DstShadowBegin <= DstShadowEnd); assert(SrcShadowEnd - SrcShadowBegin == DstShadowEnd - DstShadowBegin); From 64ff65b017ba7739f79da548e594db33e4ab7819 Mon Sep 17 00:00:00 2001 From: "Zhao, Yang2" Date: Fri, 3 Jan 2025 03:55:25 +0100 Subject: [PATCH 04/15] wip --- .../loader/layers/sanitizer/msan/msan_ddi.cpp | 96 ++++++++++++++----- .../sanitizer/msan/msan_interceptor.cpp | 35 +------ .../sanitizer/msan/msan_interceptor.hpp | 19 ---- .../layers/sanitizer/msan/msan_shadow.cpp | 79 +-------------- .../layers/sanitizer/msan/msan_shadow.hpp | 20 +--- 5 files changed, 79 insertions(+), 170 deletions(-) diff --git a/source/loader/layers/sanitizer/msan/msan_ddi.cpp b/source/loader/layers/sanitizer/msan/msan_ddi.cpp index 6476128161..3f3227a4e5 100644 --- a/source/loader/layers/sanitizer/msan/msan_ddi.cpp +++ b/source/loader/layers/sanitizer/msan/msan_ddi.cpp @@ -45,7 +45,6 @@ ur_result_t setupContext(ur_context_handle_t Context, uint32_t numDevices, UR_CALL(DI->allocShadowMemory(Context)); } CI->DeviceList.emplace_back(hDevice); - CI->AllocInfosMap[hDevice]; } return UR_RESULT_SUCCESS; } @@ -1306,23 +1305,25 @@ ur_result_t UR_APICALL urEnqueueUSMFill( ///< must not refer to an element of the phEventWaitList array. ) { auto pfnUSMFill = getContext()->urDdiTable.Enqueue.pfnUSMFill; - getContext()->logger.debug("==== urEnqueueUSMFill"); - auto Mem = (uptr)pMem; + UR_CALL(pfnUSMFill(hQueue, pMem, patternSize, pPattern, size, + numEventsInWaitList, phEventWaitList, phEvent)); + + const auto Mem = (uptr)pMem; auto MemInfoItOp = getMsanInterceptor()->findAllocInfoByAddress(Mem); if (MemInfoItOp) { auto MemInfo = (*MemInfoItOp)->second; const auto &DeviceInfo = getMsanInterceptor()->getDeviceInfo(MemInfo->Device); - UR_CALL(DeviceInfo->Shadow->EnqueuePoisonShadow( - hQueue, Mem, size, 0, numEventsInWaitList, phEventWaitList, - phEvent)); + const auto MemShadow = DeviceInfo->Shadow->MemToShadow(Mem); + + UR_CALL(EnqueueUSMBlockingSet(hQueue, (void *)MemShadow, 0, size, 1, + phEvent, phEvent)); } - return pfnUSMFill(hQueue, pMem, patternSize, pPattern, size, - numEventsInWaitList, phEventWaitList, phEvent); + return UR_RESULT_SUCCESS; } /////////////////////////////////////////////////////////////////////////////// @@ -1349,7 +1350,10 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy( auto pfnUSMMemcpy = getContext()->urDdiTable.Enqueue.pfnUSMMemcpy; getContext()->logger.debug("==== pfnUSMMemcpy"); - auto Src = (uptr)pSrc, Dst = (uptr)pDst; + UR_CALL(pfnUSMMemcpy(hQueue, blocking, pDst, pSrc, size, + numEventsInWaitList, phEventWaitList, phEvent)); + + const auto Src = (uptr)pSrc, Dst = (uptr)pDst; auto SrcInfoItOp = getMsanInterceptor()->findAllocInfoByAddress(Src); auto DstInfoItOp = getMsanInterceptor()->findAllocInfoByAddress(Dst); @@ -1359,21 +1363,23 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy( const auto &DeviceInfo = getMsanInterceptor()->getDeviceInfo(SrcInfo->Device); - UR_CALL(DeviceInfo->Shadow->EnqueueCopyShadow( - hQueue, blocking, Dst, Src, size, numEventsInWaitList, - phEventWaitList, phEvent)); + const auto SrcShadow = DeviceInfo->Shadow->MemToShadow(Src); + const auto DstShadow = DeviceInfo->Shadow->MemToShadow(Dst); + + UR_CALL(pfnUSMMemcpy(hQueue, blocking, (void *)DstShadow, + (void *)SrcShadow, size, 1, phEvent, phEvent)); } else if (DstInfoItOp) { auto DstInfo = (*DstInfoItOp)->second; const auto &DeviceInfo = getMsanInterceptor()->getDeviceInfo(DstInfo->Device); - UR_CALL(DeviceInfo->Shadow->EnqueuePoisonShadow( - hQueue, Dst, size, 0, numEventsInWaitList, phEventWaitList, - phEvent)); + auto DstShadow = DeviceInfo->Shadow->MemToShadow(Dst); + + UR_CALL(EnqueueUSMBlockingSet(hQueue, (void *)DstShadow, 0, size, 1, + phEvent, phEvent)); } - return pfnUSMMemcpy(hQueue, blocking, pDst, pSrc, size, numEventsInWaitList, - phEventWaitList, phEvent); + return UR_RESULT_SUCCESS; } /////////////////////////////////////////////////////////////////////////////// @@ -1406,20 +1412,25 @@ ur_result_t UR_APICALL urEnqueueUSMFill2D( auto pfnUSMFill2D = getContext()->urDdiTable.Enqueue.pfnUSMFill2D; getContext()->logger.debug("==== urEnqueueUSMFill2D"); - auto Mem = (uptr)pMem; + UR_CALL(pfnUSMFill2D(hQueue, pMem, pitch, patternSize, pPattern, width, + height, numEventsInWaitList, phEventWaitList, + phEvent)); + + const auto Mem = (uptr)pMem; auto MemInfoItOp = getMsanInterceptor()->findAllocInfoByAddress(Mem); if (MemInfoItOp) { auto MemInfo = (*MemInfoItOp)->second; const auto &DeviceInfo = getMsanInterceptor()->getDeviceInfo(MemInfo->Device); - UR_CALL(DeviceInfo->Shadow->EnqueuePoisonShadow( - hQueue, Mem, width * height, 0, numEventsInWaitList, - phEventWaitList, phEvent)); + const auto MemShadow = DeviceInfo->Shadow->MemToShadow(Mem); + + const char Pattern = 0; + UR_CALL(pfnUSMFill2D(hQueue, (void *)MemShadow, pitch, 1, &Pattern, + width, height, 1, phEvent, phEvent)); } - return pfnUSMFill2D(hQueue, pMem, pitch, patternSize, pPattern, width, - height, numEventsInWaitList, phEventWaitList, phEvent); + return UR_RESULT_SUCCESS; } /////////////////////////////////////////////////////////////////////////////// @@ -1448,8 +1459,43 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( ///< kernel execution instance. If phEventWaitList and phEvent are not ///< NULL, phEvent must not refer to an element of the phEventWaitList array. ) { - ur_result_t result = UR_RESULT_SUCCESS; - return result; + auto pfnUSMMemcpy2D = getContext()->urDdiTable.Enqueue.pfnUSMMemcpy2D; + getContext()->logger.debug("==== pfnUSMMemcpy2D"); + + UR_CALL(pfnUSMMemcpy2D(hQueue, blocking, pDst, dstPitch, pSrc, srcPitch, + width, height, numEventsInWaitList, phEventWaitList, + phEvent)); + + auto Src = (uptr)pSrc, Dst = (uptr)pDst; + auto SrcInfoItOp = getMsanInterceptor()->findAllocInfoByAddress(Src); + auto DstInfoItOp = getMsanInterceptor()->findAllocInfoByAddress(Dst); + + if (SrcInfoItOp && DstInfoItOp) { + auto SrcInfo = (*SrcInfoItOp)->second; + auto DstInfo = (*DstInfoItOp)->second; + + const auto &DeviceInfo = + getMsanInterceptor()->getDeviceInfo(SrcInfo->Device); + auto SrcShadow = DeviceInfo->Shadow->MemToShadow(Src); + auto DstShadow = DeviceInfo->Shadow->MemToShadow(Dst); + + UR_CALL(pfnUSMMemcpy2D(hQueue, blocking, (void *)DstShadow, dstPitch, + (void *)SrcShadow, srcPitch, width, height, + numEventsInWaitList, phEventWaitList, phEvent)); + } else if (DstInfoItOp) { + auto DstInfo = (*DstInfoItOp)->second; + + const auto &DeviceInfo = + getMsanInterceptor()->getDeviceInfo(DstInfo->Device); + auto DstShadow = DeviceInfo->Shadow->MemToShadow(Dst); + + const char Pattern = 0; + UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMFill2D( + hQueue, (void *)DstShadow, dstPitch, 1, &Pattern, width, height, 1, + phEvent, phEvent)); + } + + return UR_RESULT_SUCCESS; } /////////////////////////////////////////////////////////////////////////////// diff --git a/source/loader/layers/sanitizer/msan/msan_interceptor.cpp b/source/loader/layers/sanitizer/msan/msan_interceptor.cpp index 00f35d82e2..614614ebdd 100644 --- a/source/loader/layers/sanitizer/msan/msan_interceptor.cpp +++ b/source/loader/layers/sanitizer/msan/msan_interceptor.cpp @@ -49,8 +49,7 @@ ur_result_t MsanInterceptor::allocateMemory(ur_context_handle_t Context, size_t Size, void **ResultPtr) { auto ContextInfo = getContextInfo(Context); - std::shared_ptr DeviceInfo = - Device ? getDeviceInfo(Device) : nullptr; + std::shared_ptr DeviceInfo = getDeviceInfo(Device); void *Allocated = nullptr; @@ -70,15 +69,16 @@ ur_result_t MsanInterceptor::allocateMemory(ur_context_handle_t Context, AI->print(); - // For updating shadow memory - ContextInfo->insertAllocInfo({Device}, AI); - // For memory release { std::scoped_lock Guard(m_AllocationMapMutex); m_AllocationMap.emplace(AI->AllocBegin, std::move(AI)); } + ManagedQueue Queue(Context, Device); + DeviceInfo->Shadow->EnqueuePoisonShadow(Queue, AI->AllocBegin, + AI->AllocSize, 0xff); + return UR_RESULT_SUCCESS; } @@ -111,8 +111,6 @@ ur_result_t MsanInterceptor::preLaunchKernel(ur_kernel_handle_t Kernel, UR_CALL(prepareLaunch(DeviceInfo, InternalQueue, Kernel, LaunchInfo)); - UR_CALL(updateShadowMemory(ContextInfo, DeviceInfo, InternalQueue)); - return UR_RESULT_SUCCESS; } @@ -137,29 +135,6 @@ ur_result_t MsanInterceptor::postLaunchKernel(ur_kernel_handle_t Kernel, return Result; } -ur_result_t -MsanInterceptor::enqueueAllocInfo(std::shared_ptr &DeviceInfo, - ur_queue_handle_t Queue, - std::shared_ptr &AI) { - return DeviceInfo->Shadow->EnqueuePoisonShadow(Queue, AI->AllocBegin, - AI->AllocSize, 0xff); -} - -ur_result_t -MsanInterceptor::updateShadowMemory(std::shared_ptr &ContextInfo, - std::shared_ptr &DeviceInfo, - ur_queue_handle_t Queue) { - auto &AllocInfos = ContextInfo->AllocInfosMap[DeviceInfo->Handle]; - std::scoped_lock Guard(AllocInfos.Mutex); - - for (auto &AI : AllocInfos.List) { - UR_CALL(enqueueAllocInfo(DeviceInfo, Queue, AI)); - } - AllocInfos.List.clear(); - - return UR_RESULT_SUCCESS; -} - ur_result_t MsanInterceptor::registerProgram(ur_program_handle_t Program) { ur_result_t Result = UR_RESULT_SUCCESS; diff --git a/source/loader/layers/sanitizer/msan/msan_interceptor.hpp b/source/loader/layers/sanitizer/msan/msan_interceptor.hpp index 940655de98..6c065eb032 100644 --- a/source/loader/layers/sanitizer/msan/msan_interceptor.hpp +++ b/source/loader/layers/sanitizer/msan/msan_interceptor.hpp @@ -120,7 +120,6 @@ struct ContextInfo { std::atomic RefCount = 1; std::vector DeviceList; - std::unordered_map AllocInfosMap; explicit ContextInfo(ur_context_handle_t Context) : Handle(Context) { [[maybe_unused]] auto Result = @@ -129,15 +128,6 @@ struct ContextInfo { } ~ContextInfo(); - - void insertAllocInfo(const std::vector &Devices, - std::shared_ptr &AI) { - for (auto Device : Devices) { - auto &AllocInfos = AllocInfosMap[Device]; - std::scoped_lock Guard(AllocInfos.Mutex); - AllocInfos.List.emplace_back(AI); - } - } }; struct USMLaunchInfo { @@ -264,15 +254,6 @@ class MsanInterceptor { bool isNormalExit() { return m_NormalExit; } private: - ur_result_t - updateShadowMemory(std::shared_ptr &ContextInfo, - std::shared_ptr &DeviceInfo, - ur_queue_handle_t Queue); - - ur_result_t enqueueAllocInfo(std::shared_ptr &DeviceInfo, - ur_queue_handle_t Queue, - std::shared_ptr &AI); - /// Initialize Global Variables & Kernel Name at first Launch ur_result_t prepareLaunch(std::shared_ptr &DeviceInfo, ur_queue_handle_t Queue, diff --git a/source/loader/layers/sanitizer/msan/msan_shadow.cpp b/source/loader/layers/sanitizer/msan/msan_shadow.cpp index a41332994d..8af2335779 100644 --- a/source/loader/layers/sanitizer/msan/msan_shadow.cpp +++ b/source/loader/layers/sanitizer/msan/msan_shadow.cpp @@ -133,41 +133,6 @@ ur_result_t MsanShadowMemoryCPU::EnqueuePoisonShadow( return UR_RESULT_SUCCESS; } -ur_result_t MsanShadowMemoryCPU::EnqueueCopyShadow( - ur_queue_handle_t Queue, bool Blocking, uptr Dst, uptr Src, uptr Size, - uint32_t NumEvents, const ur_event_handle_t *EventWaitList, - ur_event_handle_t *OutEvent) { - if (Size == 0) { - if (OutEvent) { - UR_CALL(getContext()->urDdiTable.Enqueue.pfnEventsWait( - Queue, NumEvents, EventWaitList, OutEvent)); - } - return UR_RESULT_SUCCESS; - } - - const uptr SrcShadowBegin = MemToShadow(Src); - const uptr SrcShadowEnd = MemToShadow(Src + Size - 1); - assert(SrcShadowBegin <= SrcShadowEnd); - - const uptr DstShadowBegin = MemToShadow(Dst); - const uptr DstShadowEnd = MemToShadow(Dst + Size - 1); - assert(DstShadowBegin <= DstShadowEnd); - - assert(SrcShadowEnd - SrcShadowBegin == DstShadowEnd - DstShadowBegin); - - // FIXME: host asan will not support to use this function - auto Result = getContext()->urDdiTable.Enqueue.pfnUSMMemcpy( - Queue, Blocking, (void *)DstShadowBegin, (void *)SrcShadowBegin, - SrcShadowEnd - SrcShadowBegin + 1, NumEvents, EventWaitList, OutEvent); - - getContext()->logger.debug("EnqueueCopyShadow(dst={}, src={}, size={}): {}", - (void *)DstShadowBegin, (void *)SrcShadowBegin, - (void *)(SrcShadowEnd - SrcShadowBegin + 1), - Result); - - return Result; -} - ur_result_t MsanShadowMemoryGPU::Setup() { // Currently, Level-Zero doesn't create independent VAs for each contexts, if we reserve // shadow memory for each contexts, this will cause out-of-resource error when user uses @@ -208,7 +173,7 @@ ur_result_t MsanShadowMemoryGPU::Destory() { return Result; } -ur_result_t MsanShadowMemoryGPU::EnqueueMappingShadow( +ur_result_t MsanShadowMemoryGPU::EnqueueMapShadow( ur_queue_handle_t Queue, uptr Ptr, uptr Size, std::vector &EventWaitList, ur_event_handle_t *OutEvent) { @@ -290,7 +255,7 @@ ur_result_t MsanShadowMemoryGPU::EnqueuePoisonShadow( std::vector Events(EventWaitList, EventWaitList + NumEvents); - UR_CALL(EnqueueMappingShadow(Queue, Ptr, Size, Events, OutEvent)); + UR_CALL(EnqueueMapShadow(Queue, Ptr, Size, Events, OutEvent)); const uptr ShadowBegin = MemToShadow(Ptr); const uptr ShadowEnd = MemToShadow(Ptr + Size - 1); @@ -308,46 +273,6 @@ ur_result_t MsanShadowMemoryGPU::EnqueuePoisonShadow( return Result; } -ur_result_t MsanShadowMemoryGPU::EnqueueCopyShadow( - ur_queue_handle_t Queue, bool Blocking, uptr Dst, uptr Src, uptr Size, - uint32_t NumEvents, const ur_event_handle_t *EventWaitList, - ur_event_handle_t *OutEvent) { - if (Size == 0) { - if (OutEvent) { - UR_CALL(getContext()->urDdiTable.Enqueue.pfnEventsWait( - Queue, NumEvents, EventWaitList, OutEvent)); - } - return UR_RESULT_SUCCESS; - } - - std::vector Events(EventWaitList, - EventWaitList + NumEvents); - UR_CALL(EnqueueMappingShadow(Queue, Src, Size, Events, OutEvent)); - UR_CALL(EnqueueMappingShadow(Queue, Dst, Size, Events, OutEvent)); - - const uptr SrcShadowBegin = MemToShadow(Src); - const uptr SrcShadowEnd = MemToShadow(Src + Size - 1); - assert(SrcShadowBegin <= SrcShadowEnd); - - const uptr DstShadowBegin = MemToShadow(Dst); - const uptr DstShadowEnd = MemToShadow(Dst + Size - 1); - assert(DstShadowBegin <= DstShadowEnd); - - assert(DstShadowEnd - DstShadowBegin == SrcShadowEnd - SrcShadowBegin); - - auto Result = getContext()->urDdiTable.Enqueue.pfnUSMMemcpy( - Queue, Blocking, (void *)DstShadowBegin, (void *)SrcShadowBegin, - SrcShadowEnd - SrcShadowBegin + 1, Events.size(), Events.data(), - OutEvent); - - getContext()->logger.debug("EnqueueCopyShadow(dst={}, src={}, size={}): {}", - (void *)DstShadowBegin, (void *)SrcShadowBegin, - (void *)(SrcShadowEnd - SrcShadowBegin + 1), - Result); - - return Result; -} - ur_result_t MsanShadowMemoryGPU::ReleaseShadow(std::shared_ptr AI) { uptr ShadowBegin = MemToShadow(AI->AllocBegin); diff --git a/source/loader/layers/sanitizer/msan/msan_shadow.hpp b/source/loader/layers/sanitizer/msan/msan_shadow.hpp index 194f27f60d..1fa5196711 100644 --- a/source/loader/layers/sanitizer/msan/msan_shadow.hpp +++ b/source/loader/layers/sanitizer/msan/msan_shadow.hpp @@ -38,12 +38,6 @@ struct MsanShadowMemory { const ur_event_handle_t *EventWaitList = nullptr, ur_event_handle_t *OutEvent = nullptr) = 0; - virtual ur_result_t - EnqueueCopyShadow(ur_queue_handle_t Queue, bool Blocking, uptr Dst, - uptr Src, uptr Size, uint32_t NumEvents = 0, - const ur_event_handle_t *EventWaitList = nullptr, - ur_event_handle_t *OutEvent = nullptr) = 0; - virtual ur_result_t ReleaseShadow(std::shared_ptr) { return UR_RESULT_SUCCESS; } @@ -88,12 +82,6 @@ struct MsanShadowMemoryCPU final : public MsanShadowMemory { uint32_t NumEvents = 0, const ur_event_handle_t *EventWaitList = nullptr, ur_event_handle_t *OutEvent = nullptr) override; - - ur_result_t - EnqueueCopyShadow(ur_queue_handle_t Queue, bool Blocking, uptr Dst, - uptr Src, uptr Size, uint32_t NumEvents = 0, - const ur_event_handle_t *EventWaitList = nullptr, - ur_event_handle_t *OutEvent = nullptr) override; }; struct MsanShadowMemoryGPU : public MsanShadowMemory { @@ -110,19 +98,13 @@ struct MsanShadowMemoryGPU : public MsanShadowMemory { const ur_event_handle_t *EventWaitList = nullptr, ur_event_handle_t *OutEvent = nullptr) override final; - ur_result_t - EnqueueCopyShadow(ur_queue_handle_t Queue, bool Blocking, uptr Dst, - uptr Src, uptr Size, uint32_t NumEvents = 0, - const ur_event_handle_t *EventWaitList = nullptr, - ur_event_handle_t *OutEvent = nullptr) override final; - ur_result_t ReleaseShadow(std::shared_ptr AI) override final; virtual size_t GetShadowSize() = 0; private: ur_result_t - EnqueueMappingShadow(ur_queue_handle_t Queue, uptr Ptr, uptr Size, + EnqueueMapShadow(ur_queue_handle_t Queue, uptr Ptr, uptr Size, std::vector &EventWaitList, ur_event_handle_t *OutEvent); From d510a7d9c59256aef8ac4c80b2d522a55147e7e1 Mon Sep 17 00:00:00 2001 From: "Zhao, Yang2" Date: Fri, 3 Jan 2025 03:57:03 +0100 Subject: [PATCH 05/15] wip --- source/loader/layers/sanitizer/asan/asan_shadow.cpp | 8 ++++---- source/loader/layers/sanitizer/msan/msan_ddi.cpp | 4 ++-- source/loader/layers/sanitizer/msan/msan_shadow.cpp | 4 ++-- .../layers/sanitizer/sanitizer_common/sanitizer_utils.cpp | 2 +- .../layers/sanitizer/sanitizer_common/sanitizer_utils.hpp | 2 +- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/source/loader/layers/sanitizer/asan/asan_shadow.cpp b/source/loader/layers/sanitizer/asan/asan_shadow.cpp index 145fd232c1..40c1ad57e9 100644 --- a/source/loader/layers/sanitizer/asan/asan_shadow.cpp +++ b/source/loader/layers/sanitizer/asan/asan_shadow.cpp @@ -223,7 +223,7 @@ ur_result_t ShadowMemoryGPU::EnqueuePoisonShadow(ur_queue_handle_t Queue, (void *)(MappedPtr + PageSize - 1)); // Initialize to zero - URes = EnqueueUSMBlockingSet(Queue, (void *)MappedPtr, 0, + URes = EnqueueUSMMemset(Queue, (void *)MappedPtr, 0, PageSize); if (URes != UR_RESULT_SUCCESS) { getContext()->logger.error("EnqueueUSMBlockingSet(): {}", @@ -236,7 +236,7 @@ ur_result_t ShadowMemoryGPU::EnqueuePoisonShadow(ur_queue_handle_t Queue, } } - auto URes = EnqueueUSMBlockingSet(Queue, (void *)ShadowBegin, Value, + auto URes = EnqueueUSMMemset(Queue, (void *)ShadowBegin, Value, ShadowEnd - ShadowBegin + 1); getContext()->logger.debug( "EnqueuePoisonShadow (addr={}, count={}, value={}): {}", @@ -272,7 +272,7 @@ ur_result_t ShadowMemoryGPU::AllocLocalShadow(ur_queue_handle_t Queue, (void **)&LocalShadowOffset)); // Initialize shadow memory - ur_result_t URes = EnqueueUSMBlockingSet( + ur_result_t URes = EnqueueUSMMemset( Queue, (void *)LocalShadowOffset, 0, RequiredShadowSize); if (URes != UR_RESULT_SUCCESS) { UR_CALL(getContext()->urDdiTable.USM.pfnFree( @@ -312,7 +312,7 @@ ur_result_t ShadowMemoryGPU::AllocPrivateShadow(ur_queue_handle_t Queue, (void **)&PrivateShadowOffset)); // Initialize shadow memory - ur_result_t URes = EnqueueUSMBlockingSet( + ur_result_t URes = EnqueueUSMMemset( Queue, (void *)PrivateShadowOffset, 0, RequiredShadowSize); if (URes != UR_RESULT_SUCCESS) { UR_CALL(getContext()->urDdiTable.USM.pfnFree( diff --git a/source/loader/layers/sanitizer/msan/msan_ddi.cpp b/source/loader/layers/sanitizer/msan/msan_ddi.cpp index 3f3227a4e5..7a9b7de99d 100644 --- a/source/loader/layers/sanitizer/msan/msan_ddi.cpp +++ b/source/loader/layers/sanitizer/msan/msan_ddi.cpp @@ -1319,7 +1319,7 @@ ur_result_t UR_APICALL urEnqueueUSMFill( getMsanInterceptor()->getDeviceInfo(MemInfo->Device); const auto MemShadow = DeviceInfo->Shadow->MemToShadow(Mem); - UR_CALL(EnqueueUSMBlockingSet(hQueue, (void *)MemShadow, 0, size, 1, + UR_CALL(EnqueueUSMMemset(hQueue, (void *)MemShadow, 0, size, 1, phEvent, phEvent)); } @@ -1375,7 +1375,7 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy( getMsanInterceptor()->getDeviceInfo(DstInfo->Device); auto DstShadow = DeviceInfo->Shadow->MemToShadow(Dst); - UR_CALL(EnqueueUSMBlockingSet(hQueue, (void *)DstShadow, 0, size, 1, + UR_CALL(EnqueueUSMMemset(hQueue, (void *)DstShadow, 0, size, 1, phEvent, phEvent)); } diff --git a/source/loader/layers/sanitizer/msan/msan_shadow.cpp b/source/loader/layers/sanitizer/msan/msan_shadow.cpp index 8af2335779..fdddd8c8f8 100644 --- a/source/loader/layers/sanitizer/msan/msan_shadow.cpp +++ b/source/loader/layers/sanitizer/msan/msan_shadow.cpp @@ -214,7 +214,7 @@ ur_result_t MsanShadowMemoryGPU::EnqueueMapShadow( (void *)(MappedPtr + PageSize - 1)); // Initialize to zero - URes = EnqueueUSMBlockingSet(Queue, (void *)MappedPtr, 0, PageSize, + URes = EnqueueUSMMemset(Queue, (void *)MappedPtr, 0, PageSize, EventWaitList.size(), EventWaitList.data(), OutEvent); if (URes != UR_RESULT_SUCCESS) { @@ -261,7 +261,7 @@ ur_result_t MsanShadowMemoryGPU::EnqueuePoisonShadow( const uptr ShadowEnd = MemToShadow(Ptr + Size - 1); assert(ShadowBegin <= ShadowEnd); - auto Result = EnqueueUSMBlockingSet(Queue, (void *)ShadowBegin, Value, + auto Result = EnqueueUSMMemset(Queue, (void *)ShadowBegin, Value, ShadowEnd - ShadowBegin + 1, Events.size(), Events.data(), OutEvent); diff --git a/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp b/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp index 86c0286c62..c1db0f7484 100644 --- a/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp +++ b/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp @@ -243,7 +243,7 @@ size_t GetVirtualMemGranularity(ur_context_handle_t Context, return Size; } -ur_result_t EnqueueUSMBlockingSet(ur_queue_handle_t Queue, void *Ptr, char Value, +ur_result_t EnqueueUSMMemset(ur_queue_handle_t Queue, void *Ptr, char Value, size_t Size, uint32_t NumEvents, const ur_event_handle_t *EventWaitList, ur_event_handle_t *OutEvent) { diff --git a/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp b/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp index 6fcb05894e..57396388a3 100644 --- a/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp +++ b/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp @@ -59,7 +59,7 @@ size_t GetVirtualMemGranularity(ur_context_handle_t Context, ur_device_handle_t Device); ur_result_t -EnqueueUSMBlockingSet(ur_queue_handle_t Queue, void *Ptr, char Value, +EnqueueUSMMemset(ur_queue_handle_t Queue, void *Ptr, char Value, size_t Size, uint32_t NumEvents = 0, const ur_event_handle_t *EventWaitList = nullptr, ur_event_handle_t *OutEvent = nullptr); From 4f1951873be4d6cb8651a5aadae1dae0be7d574e Mon Sep 17 00:00:00 2001 From: "Zhao, Yang2" Date: Fri, 3 Jan 2025 03:58:23 +0100 Subject: [PATCH 06/15] wip --- source/loader/layers/sanitizer/asan/asan_shadow.cpp | 8 ++++---- source/loader/layers/sanitizer/msan/msan_ddi.cpp | 4 ++-- source/loader/layers/sanitizer/msan/msan_shadow.cpp | 4 ++-- .../layers/sanitizer/sanitizer_common/sanitizer_utils.cpp | 2 +- .../layers/sanitizer/sanitizer_common/sanitizer_utils.hpp | 2 +- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/source/loader/layers/sanitizer/asan/asan_shadow.cpp b/source/loader/layers/sanitizer/asan/asan_shadow.cpp index 40c1ad57e9..145fd232c1 100644 --- a/source/loader/layers/sanitizer/asan/asan_shadow.cpp +++ b/source/loader/layers/sanitizer/asan/asan_shadow.cpp @@ -223,7 +223,7 @@ ur_result_t ShadowMemoryGPU::EnqueuePoisonShadow(ur_queue_handle_t Queue, (void *)(MappedPtr + PageSize - 1)); // Initialize to zero - URes = EnqueueUSMMemset(Queue, (void *)MappedPtr, 0, + URes = EnqueueUSMBlockingSet(Queue, (void *)MappedPtr, 0, PageSize); if (URes != UR_RESULT_SUCCESS) { getContext()->logger.error("EnqueueUSMBlockingSet(): {}", @@ -236,7 +236,7 @@ ur_result_t ShadowMemoryGPU::EnqueuePoisonShadow(ur_queue_handle_t Queue, } } - auto URes = EnqueueUSMMemset(Queue, (void *)ShadowBegin, Value, + auto URes = EnqueueUSMBlockingSet(Queue, (void *)ShadowBegin, Value, ShadowEnd - ShadowBegin + 1); getContext()->logger.debug( "EnqueuePoisonShadow (addr={}, count={}, value={}): {}", @@ -272,7 +272,7 @@ ur_result_t ShadowMemoryGPU::AllocLocalShadow(ur_queue_handle_t Queue, (void **)&LocalShadowOffset)); // Initialize shadow memory - ur_result_t URes = EnqueueUSMMemset( + ur_result_t URes = EnqueueUSMBlockingSet( Queue, (void *)LocalShadowOffset, 0, RequiredShadowSize); if (URes != UR_RESULT_SUCCESS) { UR_CALL(getContext()->urDdiTable.USM.pfnFree( @@ -312,7 +312,7 @@ ur_result_t ShadowMemoryGPU::AllocPrivateShadow(ur_queue_handle_t Queue, (void **)&PrivateShadowOffset)); // Initialize shadow memory - ur_result_t URes = EnqueueUSMMemset( + ur_result_t URes = EnqueueUSMBlockingSet( Queue, (void *)PrivateShadowOffset, 0, RequiredShadowSize); if (URes != UR_RESULT_SUCCESS) { UR_CALL(getContext()->urDdiTable.USM.pfnFree( diff --git a/source/loader/layers/sanitizer/msan/msan_ddi.cpp b/source/loader/layers/sanitizer/msan/msan_ddi.cpp index 7a9b7de99d..3f3227a4e5 100644 --- a/source/loader/layers/sanitizer/msan/msan_ddi.cpp +++ b/source/loader/layers/sanitizer/msan/msan_ddi.cpp @@ -1319,7 +1319,7 @@ ur_result_t UR_APICALL urEnqueueUSMFill( getMsanInterceptor()->getDeviceInfo(MemInfo->Device); const auto MemShadow = DeviceInfo->Shadow->MemToShadow(Mem); - UR_CALL(EnqueueUSMMemset(hQueue, (void *)MemShadow, 0, size, 1, + UR_CALL(EnqueueUSMBlockingSet(hQueue, (void *)MemShadow, 0, size, 1, phEvent, phEvent)); } @@ -1375,7 +1375,7 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy( getMsanInterceptor()->getDeviceInfo(DstInfo->Device); auto DstShadow = DeviceInfo->Shadow->MemToShadow(Dst); - UR_CALL(EnqueueUSMMemset(hQueue, (void *)DstShadow, 0, size, 1, + UR_CALL(EnqueueUSMBlockingSet(hQueue, (void *)DstShadow, 0, size, 1, phEvent, phEvent)); } diff --git a/source/loader/layers/sanitizer/msan/msan_shadow.cpp b/source/loader/layers/sanitizer/msan/msan_shadow.cpp index fdddd8c8f8..8af2335779 100644 --- a/source/loader/layers/sanitizer/msan/msan_shadow.cpp +++ b/source/loader/layers/sanitizer/msan/msan_shadow.cpp @@ -214,7 +214,7 @@ ur_result_t MsanShadowMemoryGPU::EnqueueMapShadow( (void *)(MappedPtr + PageSize - 1)); // Initialize to zero - URes = EnqueueUSMMemset(Queue, (void *)MappedPtr, 0, PageSize, + URes = EnqueueUSMBlockingSet(Queue, (void *)MappedPtr, 0, PageSize, EventWaitList.size(), EventWaitList.data(), OutEvent); if (URes != UR_RESULT_SUCCESS) { @@ -261,7 +261,7 @@ ur_result_t MsanShadowMemoryGPU::EnqueuePoisonShadow( const uptr ShadowEnd = MemToShadow(Ptr + Size - 1); assert(ShadowBegin <= ShadowEnd); - auto Result = EnqueueUSMMemset(Queue, (void *)ShadowBegin, Value, + auto Result = EnqueueUSMBlockingSet(Queue, (void *)ShadowBegin, Value, ShadowEnd - ShadowBegin + 1, Events.size(), Events.data(), OutEvent); diff --git a/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp b/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp index c1db0f7484..86c0286c62 100644 --- a/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp +++ b/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp @@ -243,7 +243,7 @@ size_t GetVirtualMemGranularity(ur_context_handle_t Context, return Size; } -ur_result_t EnqueueUSMMemset(ur_queue_handle_t Queue, void *Ptr, char Value, +ur_result_t EnqueueUSMBlockingSet(ur_queue_handle_t Queue, void *Ptr, char Value, size_t Size, uint32_t NumEvents, const ur_event_handle_t *EventWaitList, ur_event_handle_t *OutEvent) { diff --git a/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp b/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp index 57396388a3..6fcb05894e 100644 --- a/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp +++ b/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp @@ -59,7 +59,7 @@ size_t GetVirtualMemGranularity(ur_context_handle_t Context, ur_device_handle_t Device); ur_result_t -EnqueueUSMMemset(ur_queue_handle_t Queue, void *Ptr, char Value, +EnqueueUSMBlockingSet(ur_queue_handle_t Queue, void *Ptr, char Value, size_t Size, uint32_t NumEvents = 0, const ur_event_handle_t *EventWaitList = nullptr, ur_event_handle_t *OutEvent = nullptr); From ed82004daf1c9d4fc7de23cdcf59fa92dca2b1b6 Mon Sep 17 00:00:00 2001 From: "Zhao, Yang2" Date: Fri, 3 Jan 2025 04:02:49 +0100 Subject: [PATCH 07/15] fix format --- .../layers/sanitizer/sanitizer_common/sanitizer_utils.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp b/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp index 86c0286c62..758e81377f 100644 --- a/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp +++ b/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp @@ -243,10 +243,10 @@ size_t GetVirtualMemGranularity(ur_context_handle_t Context, return Size; } -ur_result_t EnqueueUSMBlockingSet(ur_queue_handle_t Queue, void *Ptr, char Value, - size_t Size, uint32_t NumEvents, - const ur_event_handle_t *EventWaitList, - ur_event_handle_t *OutEvent) { +ur_result_t EnqueueUSMBlockingSet(ur_queue_handle_t Queue, void *Ptr, + char Value, size_t Size, uint32_t NumEvents, + const ur_event_handle_t *EventWaitList, + ur_event_handle_t *OutEvent) { return getContext()->urDdiTable.Enqueue.pfnUSMFill( Queue, Ptr, 1, &Value, Size, NumEvents, EventWaitList, OutEvent); } From 00ce2da2cbfddc812f679d59fb82dc1c26d31fc9 Mon Sep 17 00:00:00 2001 From: "Zhao, Yang2" Date: Fri, 3 Jan 2025 05:17:53 +0100 Subject: [PATCH 08/15] wip --- .../loader/layers/sanitizer/msan/msan_ddi.cpp | 19 ++++++++++--------- .../sanitizer/msan/msan_interceptor.cpp | 2 +- .../layers/sanitizer/msan/msan_shadow.cpp | 8 +++----- 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/source/loader/layers/sanitizer/msan/msan_ddi.cpp b/source/loader/layers/sanitizer/msan/msan_ddi.cpp index 3f3227a4e5..f90ce5eeda 100644 --- a/source/loader/layers/sanitizer/msan/msan_ddi.cpp +++ b/source/loader/layers/sanitizer/msan/msan_ddi.cpp @@ -1319,8 +1319,8 @@ ur_result_t UR_APICALL urEnqueueUSMFill( getMsanInterceptor()->getDeviceInfo(MemInfo->Device); const auto MemShadow = DeviceInfo->Shadow->MemToShadow(Mem); - UR_CALL(EnqueueUSMBlockingSet(hQueue, (void *)MemShadow, 0, size, 1, - phEvent, phEvent)); + UR_CALL(EnqueueUSMBlockingSet(hQueue, (void *)MemShadow, 0, size, + phEvent ? 1 : 0, phEvent, phEvent)); } return UR_RESULT_SUCCESS; @@ -1367,7 +1367,8 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy( const auto DstShadow = DeviceInfo->Shadow->MemToShadow(Dst); UR_CALL(pfnUSMMemcpy(hQueue, blocking, (void *)DstShadow, - (void *)SrcShadow, size, 1, phEvent, phEvent)); + (void *)SrcShadow, size, phEvent ? 1 : 0, phEvent, + phEvent)); } else if (DstInfoItOp) { auto DstInfo = (*DstInfoItOp)->second; @@ -1375,8 +1376,8 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy( getMsanInterceptor()->getDeviceInfo(DstInfo->Device); auto DstShadow = DeviceInfo->Shadow->MemToShadow(Dst); - UR_CALL(EnqueueUSMBlockingSet(hQueue, (void *)DstShadow, 0, size, 1, - phEvent, phEvent)); + UR_CALL(EnqueueUSMBlockingSet(hQueue, (void *)DstShadow, 0, size, + phEvent ? 1 : 0, phEvent, phEvent)); } return UR_RESULT_SUCCESS; @@ -1427,7 +1428,7 @@ ur_result_t UR_APICALL urEnqueueUSMFill2D( const char Pattern = 0; UR_CALL(pfnUSMFill2D(hQueue, (void *)MemShadow, pitch, 1, &Pattern, - width, height, 1, phEvent, phEvent)); + width, height, phEvent ? 1 : 0, phEvent, phEvent)); } return UR_RESULT_SUCCESS; @@ -1481,7 +1482,7 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( UR_CALL(pfnUSMMemcpy2D(hQueue, blocking, (void *)DstShadow, dstPitch, (void *)SrcShadow, srcPitch, width, height, - numEventsInWaitList, phEventWaitList, phEvent)); + phEvent ? 1 : 0, phEvent, phEvent)); } else if (DstInfoItOp) { auto DstInfo = (*DstInfoItOp)->second; @@ -1491,8 +1492,8 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( const char Pattern = 0; UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMFill2D( - hQueue, (void *)DstShadow, dstPitch, 1, &Pattern, width, height, 1, - phEvent, phEvent)); + hQueue, (void *)DstShadow, dstPitch, 1, &Pattern, width, height, + phEvent ? 1 : 0, phEvent, phEvent)); } return UR_RESULT_SUCCESS; diff --git a/source/loader/layers/sanitizer/msan/msan_interceptor.cpp b/source/loader/layers/sanitizer/msan/msan_interceptor.cpp index 614614ebdd..b9085e8137 100644 --- a/source/loader/layers/sanitizer/msan/msan_interceptor.cpp +++ b/source/loader/layers/sanitizer/msan/msan_interceptor.cpp @@ -72,7 +72,7 @@ ur_result_t MsanInterceptor::allocateMemory(ur_context_handle_t Context, // For memory release { std::scoped_lock Guard(m_AllocationMapMutex); - m_AllocationMap.emplace(AI->AllocBegin, std::move(AI)); + m_AllocationMap.emplace(AI->AllocBegin, AI); } ManagedQueue Queue(Context, Device); diff --git a/source/loader/layers/sanitizer/msan/msan_shadow.cpp b/source/loader/layers/sanitizer/msan/msan_shadow.cpp index 8af2335779..25a9b4d278 100644 --- a/source/loader/layers/sanitizer/msan/msan_shadow.cpp +++ b/source/loader/layers/sanitizer/msan/msan_shadow.cpp @@ -178,9 +178,6 @@ ur_result_t MsanShadowMemoryGPU::EnqueueMapShadow( std::vector &EventWaitList, ur_event_handle_t *OutEvent) { - ur_physical_mem_properties_t Desc{UR_STRUCTURE_TYPE_PHYSICAL_MEM_PROPERTIES, - nullptr, 0}; - const size_t PageSize = GetVirtualMemGranularity(Context, Device); const uptr ShadowBegin = MemToShadow(Ptr); @@ -194,7 +191,7 @@ ur_result_t MsanShadowMemoryGPU::EnqueueMapShadow( if (VirtualMemMaps.find(MappedPtr) == VirtualMemMaps.end()) { ur_physical_mem_handle_t PhysicalMem{}; auto URes = getContext()->urDdiTable.PhysicalMem.pfnCreate( - Context, Device, PageSize, &Desc, &PhysicalMem); + Context, Device, PageSize, nullptr, &PhysicalMem); if (URes != UR_RESULT_SUCCESS) { getContext()->logger.error("urPhysicalMemCreate(): {}", URes); return URes; @@ -223,7 +220,8 @@ ur_result_t MsanShadowMemoryGPU::EnqueueMapShadow( } EventWaitList.clear(); - EventWaitList.push_back(*OutEvent); + if (OutEvent) + EventWaitList.push_back(*OutEvent); VirtualMemMaps[MappedPtr].first = PhysicalMem; } From 2469522e96944c3298b6a7cf97e357395a635363 Mon Sep 17 00:00:00 2001 From: "Zhao, Yang2" Date: Fri, 3 Jan 2025 05:19:48 +0100 Subject: [PATCH 09/15] fix format --- source/loader/layers/sanitizer/msan/msan_shadow.cpp | 11 ++++++----- source/loader/layers/sanitizer/msan/msan_shadow.hpp | 7 +++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/source/loader/layers/sanitizer/msan/msan_shadow.cpp b/source/loader/layers/sanitizer/msan/msan_shadow.cpp index 25a9b4d278..2cdf8600d2 100644 --- a/source/loader/layers/sanitizer/msan/msan_shadow.cpp +++ b/source/loader/layers/sanitizer/msan/msan_shadow.cpp @@ -212,16 +212,17 @@ ur_result_t MsanShadowMemoryGPU::EnqueueMapShadow( // Initialize to zero URes = EnqueueUSMBlockingSet(Queue, (void *)MappedPtr, 0, PageSize, - EventWaitList.size(), EventWaitList.data(), - OutEvent); + EventWaitList.size(), + EventWaitList.data(), OutEvent); if (URes != UR_RESULT_SUCCESS) { getContext()->logger.error("EnqueueUSMSet(): {}", URes); return URes; } EventWaitList.clear(); - if (OutEvent) + if (OutEvent) { EventWaitList.push_back(*OutEvent); + } VirtualMemMaps[MappedPtr].first = PhysicalMem; } @@ -260,8 +261,8 @@ ur_result_t MsanShadowMemoryGPU::EnqueuePoisonShadow( assert(ShadowBegin <= ShadowEnd); auto Result = EnqueueUSMBlockingSet(Queue, (void *)ShadowBegin, Value, - ShadowEnd - ShadowBegin + 1, Events.size(), - Events.data(), OutEvent); + ShadowEnd - ShadowBegin + 1, + Events.size(), Events.data(), OutEvent); getContext()->logger.debug( "EnqueuePoisonShadow(addr={}, count={}, value={}): {}", diff --git a/source/loader/layers/sanitizer/msan/msan_shadow.hpp b/source/loader/layers/sanitizer/msan/msan_shadow.hpp index 1fa5196711..ca5791385c 100644 --- a/source/loader/layers/sanitizer/msan/msan_shadow.hpp +++ b/source/loader/layers/sanitizer/msan/msan_shadow.hpp @@ -103,10 +103,9 @@ struct MsanShadowMemoryGPU : public MsanShadowMemory { virtual size_t GetShadowSize() = 0; private: - ur_result_t - EnqueueMapShadow(ur_queue_handle_t Queue, uptr Ptr, uptr Size, - std::vector &EventWaitList, - ur_event_handle_t *OutEvent); + ur_result_t EnqueueMapShadow(ur_queue_handle_t Queue, uptr Ptr, uptr Size, + std::vector &EventWaitList, + ur_event_handle_t *OutEvent); std::unordered_map< uptr, std::pair Date: Fri, 3 Jan 2025 06:23:13 +0100 Subject: [PATCH 10/15] fix cpu --- .../loader/layers/sanitizer/msan/msan_ddi.cpp | 63 +++++++++++++------ 1 file changed, 44 insertions(+), 19 deletions(-) diff --git a/source/loader/layers/sanitizer/msan/msan_ddi.cpp b/source/loader/layers/sanitizer/msan/msan_ddi.cpp index f90ce5eeda..41e98ec885 100644 --- a/source/loader/layers/sanitizer/msan/msan_ddi.cpp +++ b/source/loader/layers/sanitizer/msan/msan_ddi.cpp @@ -1307,8 +1307,9 @@ ur_result_t UR_APICALL urEnqueueUSMFill( auto pfnUSMFill = getContext()->urDdiTable.Enqueue.pfnUSMFill; getContext()->logger.debug("==== urEnqueueUSMFill"); + ur_event_handle_t hEvent = nullptr; UR_CALL(pfnUSMFill(hQueue, pMem, patternSize, pPattern, size, - numEventsInWaitList, phEventWaitList, phEvent)); + numEventsInWaitList, phEventWaitList, &hEvent)); const auto Mem = (uptr)pMem; auto MemInfoItOp = getMsanInterceptor()->findAllocInfoByAddress(Mem); @@ -1319,8 +1320,13 @@ ur_result_t UR_APICALL urEnqueueUSMFill( getMsanInterceptor()->getDeviceInfo(MemInfo->Device); const auto MemShadow = DeviceInfo->Shadow->MemToShadow(Mem); - UR_CALL(EnqueueUSMBlockingSet(hQueue, (void *)MemShadow, 0, size, - phEvent ? 1 : 0, phEvent, phEvent)); + const ur_event_handle_t hEventWait = hEvent; + UR_CALL(EnqueueUSMBlockingSet(hQueue, (void *)MemShadow, 0, size, 1, + &hEventWait, &hEvent)); + } + + if (phEvent) { + *phEvent = hEvent; } return UR_RESULT_SUCCESS; @@ -1350,8 +1356,9 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy( auto pfnUSMMemcpy = getContext()->urDdiTable.Enqueue.pfnUSMMemcpy; getContext()->logger.debug("==== pfnUSMMemcpy"); + ur_event_handle_t hEvent = nullptr; UR_CALL(pfnUSMMemcpy(hQueue, blocking, pDst, pSrc, size, - numEventsInWaitList, phEventWaitList, phEvent)); + numEventsInWaitList, phEventWaitList, &hEvent)); const auto Src = (uptr)pSrc, Dst = (uptr)pDst; auto SrcInfoItOp = getMsanInterceptor()->findAllocInfoByAddress(Src); @@ -1366,9 +1373,9 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy( const auto SrcShadow = DeviceInfo->Shadow->MemToShadow(Src); const auto DstShadow = DeviceInfo->Shadow->MemToShadow(Dst); + const ur_event_handle_t hEventWait = hEvent; UR_CALL(pfnUSMMemcpy(hQueue, blocking, (void *)DstShadow, - (void *)SrcShadow, size, phEvent ? 1 : 0, phEvent, - phEvent)); + (void *)SrcShadow, size, 1, &hEventWait, &hEvent)); } else if (DstInfoItOp) { auto DstInfo = (*DstInfoItOp)->second; @@ -1376,8 +1383,13 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy( getMsanInterceptor()->getDeviceInfo(DstInfo->Device); auto DstShadow = DeviceInfo->Shadow->MemToShadow(Dst); - UR_CALL(EnqueueUSMBlockingSet(hQueue, (void *)DstShadow, 0, size, - phEvent ? 1 : 0, phEvent, phEvent)); + const ur_event_handle_t hEventWait = hEvent; + UR_CALL(EnqueueUSMBlockingSet(hQueue, (void *)DstShadow, 0, size, 1, + &hEventWait, &hEvent)); + } + + if (phEvent) { + *phEvent = hEvent; } return UR_RESULT_SUCCESS; @@ -1413,9 +1425,10 @@ ur_result_t UR_APICALL urEnqueueUSMFill2D( auto pfnUSMFill2D = getContext()->urDdiTable.Enqueue.pfnUSMFill2D; getContext()->logger.debug("==== urEnqueueUSMFill2D"); + ur_event_handle_t hEvent = nullptr; UR_CALL(pfnUSMFill2D(hQueue, pMem, pitch, patternSize, pPattern, width, height, numEventsInWaitList, phEventWaitList, - phEvent)); + &hEvent)); const auto Mem = (uptr)pMem; auto MemInfoItOp = getMsanInterceptor()->findAllocInfoByAddress(Mem); @@ -1427,8 +1440,13 @@ ur_result_t UR_APICALL urEnqueueUSMFill2D( const auto MemShadow = DeviceInfo->Shadow->MemToShadow(Mem); const char Pattern = 0; + const ur_event_handle_t hEventWait = hEvent; UR_CALL(pfnUSMFill2D(hQueue, (void *)MemShadow, pitch, 1, &Pattern, - width, height, phEvent ? 1 : 0, phEvent, phEvent)); + width, height, 1, &hEventWait, &hEvent)); + } + + if (phEvent) { + *phEvent = hEvent; } return UR_RESULT_SUCCESS; @@ -1463,11 +1481,12 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( auto pfnUSMMemcpy2D = getContext()->urDdiTable.Enqueue.pfnUSMMemcpy2D; getContext()->logger.debug("==== pfnUSMMemcpy2D"); + ur_event_handle_t hEvent = nullptr; UR_CALL(pfnUSMMemcpy2D(hQueue, blocking, pDst, dstPitch, pSrc, srcPitch, width, height, numEventsInWaitList, phEventWaitList, - phEvent)); + &hEvent)); - auto Src = (uptr)pSrc, Dst = (uptr)pDst; + const auto Src = (uptr)pSrc, Dst = (uptr)pDst; auto SrcInfoItOp = getMsanInterceptor()->findAllocInfoByAddress(Src); auto DstInfoItOp = getMsanInterceptor()->findAllocInfoByAddress(Dst); @@ -1477,23 +1496,29 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( const auto &DeviceInfo = getMsanInterceptor()->getDeviceInfo(SrcInfo->Device); - auto SrcShadow = DeviceInfo->Shadow->MemToShadow(Src); - auto DstShadow = DeviceInfo->Shadow->MemToShadow(Dst); + const auto SrcShadow = DeviceInfo->Shadow->MemToShadow(Src); + const auto DstShadow = DeviceInfo->Shadow->MemToShadow(Dst); + const ur_event_handle_t hEventWait = hEvent; UR_CALL(pfnUSMMemcpy2D(hQueue, blocking, (void *)DstShadow, dstPitch, - (void *)SrcShadow, srcPitch, width, height, - phEvent ? 1 : 0, phEvent, phEvent)); + (void *)SrcShadow, srcPitch, width, height, 1, + &hEventWait, &hEvent)); } else if (DstInfoItOp) { auto DstInfo = (*DstInfoItOp)->second; const auto &DeviceInfo = getMsanInterceptor()->getDeviceInfo(DstInfo->Device); - auto DstShadow = DeviceInfo->Shadow->MemToShadow(Dst); + const auto DstShadow = DeviceInfo->Shadow->MemToShadow(Dst); const char Pattern = 0; + const ur_event_handle_t hEventWait = hEvent; UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMFill2D( - hQueue, (void *)DstShadow, dstPitch, 1, &Pattern, width, height, - phEvent ? 1 : 0, phEvent, phEvent)); + hQueue, (void *)DstShadow, dstPitch, 1, &Pattern, width, height, 1, + &hEventWait, &hEvent)); + } + + if (phEvent) { + *phEvent = hEvent; } return UR_RESULT_SUCCESS; From 4ac06788d999dd07081d2148e6418a8f94c7570a Mon Sep 17 00:00:00 2001 From: "Zhao, Yang2" Date: Fri, 3 Jan 2025 07:54:39 +0100 Subject: [PATCH 11/15] wip --- source/loader/layers/sanitizer/msan/msan_ddi.cpp | 3 +++ .../loader/layers/sanitizer/msan/msan_interceptor.cpp | 10 +++++----- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/source/loader/layers/sanitizer/msan/msan_ddi.cpp b/source/loader/layers/sanitizer/msan/msan_ddi.cpp index 41e98ec885..8cbdf95051 100644 --- a/source/loader/layers/sanitizer/msan/msan_ddi.cpp +++ b/source/loader/layers/sanitizer/msan/msan_ddi.cpp @@ -1390,7 +1390,10 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy( if (phEvent) { *phEvent = hEvent; + } else { + getContext()->urDdiTable.Event.pfnWait(1, &hEvent); } + // getContext()->urDdiTable.Queue.pfnFinish(hQueue); return UR_RESULT_SUCCESS; } diff --git a/source/loader/layers/sanitizer/msan/msan_interceptor.cpp b/source/loader/layers/sanitizer/msan/msan_interceptor.cpp index b9085e8137..f13283efb1 100644 --- a/source/loader/layers/sanitizer/msan/msan_interceptor.cpp +++ b/source/loader/layers/sanitizer/msan/msan_interceptor.cpp @@ -386,13 +386,13 @@ MsanInterceptor::findAllocInfoByAddress(uptr Address) { std::shared_lock Guard(m_AllocationMapMutex); auto It = m_AllocationMap.upper_bound(Address); if (It == m_AllocationMap.begin()) { - return std::optional{}; + return std::nullopt; } --It; - // Make sure we got the right MsanAllocInfo - assert(Address >= It->second->AllocBegin && - Address < It->second->AllocBegin + It->second->AllocSize && - "Wrong MsanAllocInfo for the address"); + + if (Address < It->second->AllocBegin || Address >= It->second->AllocBegin + It->second->AllocSize) { + return std::nullopt; + } return It; } From c24131cf4c0a20f4dd6fd75fcc4a6f494cdc53d0 Mon Sep 17 00:00:00 2001 From: "Zhao, Yang2" Date: Fri, 3 Jan 2025 09:31:57 +0100 Subject: [PATCH 12/15] wip --- source/loader/layers/sanitizer/msan/msan_ddi.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/source/loader/layers/sanitizer/msan/msan_ddi.cpp b/source/loader/layers/sanitizer/msan/msan_ddi.cpp index 8cbdf95051..41e98ec885 100644 --- a/source/loader/layers/sanitizer/msan/msan_ddi.cpp +++ b/source/loader/layers/sanitizer/msan/msan_ddi.cpp @@ -1390,10 +1390,7 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy( if (phEvent) { *phEvent = hEvent; - } else { - getContext()->urDdiTable.Event.pfnWait(1, &hEvent); } - // getContext()->urDdiTable.Queue.pfnFinish(hQueue); return UR_RESULT_SUCCESS; } From 05d657cc9f4430e38b5527743c66cb204afc5593 Mon Sep 17 00:00:00 2001 From: "Zhao, Yang2" Date: Mon, 6 Jan 2025 07:02:38 +0100 Subject: [PATCH 13/15] fix format --- source/loader/layers/sanitizer/msan/msan_interceptor.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/source/loader/layers/sanitizer/msan/msan_interceptor.cpp b/source/loader/layers/sanitizer/msan/msan_interceptor.cpp index f13283efb1..995c21dbaa 100644 --- a/source/loader/layers/sanitizer/msan/msan_interceptor.cpp +++ b/source/loader/layers/sanitizer/msan/msan_interceptor.cpp @@ -390,7 +390,8 @@ MsanInterceptor::findAllocInfoByAddress(uptr Address) { } --It; - if (Address < It->second->AllocBegin || Address >= It->second->AllocBegin + It->second->AllocSize) { + if (Address < It->second->AllocBegin || + Address >= It->second->AllocBegin + It->second->AllocSize) { return std::nullopt; } return It; From e8ea1362f6479c48628a48abd528646cb7017bcd Mon Sep 17 00:00:00 2001 From: "Zhao, Yang2" Date: Mon, 6 Jan 2025 07:06:12 +0100 Subject: [PATCH 14/15] add comment --- source/loader/layers/sanitizer/msan/msan_interceptor.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/source/loader/layers/sanitizer/msan/msan_interceptor.cpp b/source/loader/layers/sanitizer/msan/msan_interceptor.cpp index 995c21dbaa..08010fe170 100644 --- a/source/loader/layers/sanitizer/msan/msan_interceptor.cpp +++ b/source/loader/layers/sanitizer/msan/msan_interceptor.cpp @@ -390,10 +390,12 @@ MsanInterceptor::findAllocInfoByAddress(uptr Address) { } --It; + // Since we haven't intercepted all USM APIs, we can't make sure the found AllocInfo is correct. if (Address < It->second->AllocBegin || Address >= It->second->AllocBegin + It->second->AllocSize) { return std::nullopt; } + return It; } From 3eeb2a174148bdf9386521e355e61910a0e1d177 Mon Sep 17 00:00:00 2001 From: "Zhao, Yang2" Date: Mon, 6 Jan 2025 07:34:37 +0100 Subject: [PATCH 15/15] small optimization --- .../loader/layers/sanitizer/msan/msan_ddi.cpp | 54 +++++++++---------- 1 file changed, 26 insertions(+), 28 deletions(-) diff --git a/source/loader/layers/sanitizer/msan/msan_ddi.cpp b/source/loader/layers/sanitizer/msan/msan_ddi.cpp index 41e98ec885..027a02d656 100644 --- a/source/loader/layers/sanitizer/msan/msan_ddi.cpp +++ b/source/loader/layers/sanitizer/msan/msan_ddi.cpp @@ -1307,9 +1307,9 @@ ur_result_t UR_APICALL urEnqueueUSMFill( auto pfnUSMFill = getContext()->urDdiTable.Enqueue.pfnUSMFill; getContext()->logger.debug("==== urEnqueueUSMFill"); - ur_event_handle_t hEvent = nullptr; + ur_event_handle_t hEvents[2] = {}; UR_CALL(pfnUSMFill(hQueue, pMem, patternSize, pPattern, size, - numEventsInWaitList, phEventWaitList, &hEvent)); + numEventsInWaitList, phEventWaitList, &hEvents[0])); const auto Mem = (uptr)pMem; auto MemInfoItOp = getMsanInterceptor()->findAllocInfoByAddress(Mem); @@ -1320,13 +1320,13 @@ ur_result_t UR_APICALL urEnqueueUSMFill( getMsanInterceptor()->getDeviceInfo(MemInfo->Device); const auto MemShadow = DeviceInfo->Shadow->MemToShadow(Mem); - const ur_event_handle_t hEventWait = hEvent; - UR_CALL(EnqueueUSMBlockingSet(hQueue, (void *)MemShadow, 0, size, 1, - &hEventWait, &hEvent)); + UR_CALL(EnqueueUSMBlockingSet(hQueue, (void *)MemShadow, 0, size, 0, + nullptr, &hEvents[1])); } if (phEvent) { - *phEvent = hEvent; + UR_CALL(getContext()->urDdiTable.Enqueue.pfnEventsWait( + hQueue, 2, hEvents, phEvent)); } return UR_RESULT_SUCCESS; @@ -1356,9 +1356,9 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy( auto pfnUSMMemcpy = getContext()->urDdiTable.Enqueue.pfnUSMMemcpy; getContext()->logger.debug("==== pfnUSMMemcpy"); - ur_event_handle_t hEvent = nullptr; + ur_event_handle_t hEvents[2] = {}; UR_CALL(pfnUSMMemcpy(hQueue, blocking, pDst, pSrc, size, - numEventsInWaitList, phEventWaitList, &hEvent)); + numEventsInWaitList, phEventWaitList, &hEvents[0])); const auto Src = (uptr)pSrc, Dst = (uptr)pDst; auto SrcInfoItOp = getMsanInterceptor()->findAllocInfoByAddress(Src); @@ -1373,9 +1373,8 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy( const auto SrcShadow = DeviceInfo->Shadow->MemToShadow(Src); const auto DstShadow = DeviceInfo->Shadow->MemToShadow(Dst); - const ur_event_handle_t hEventWait = hEvent; UR_CALL(pfnUSMMemcpy(hQueue, blocking, (void *)DstShadow, - (void *)SrcShadow, size, 1, &hEventWait, &hEvent)); + (void *)SrcShadow, size, 0, nullptr, &hEvents[1])); } else if (DstInfoItOp) { auto DstInfo = (*DstInfoItOp)->second; @@ -1383,13 +1382,13 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy( getMsanInterceptor()->getDeviceInfo(DstInfo->Device); auto DstShadow = DeviceInfo->Shadow->MemToShadow(Dst); - const ur_event_handle_t hEventWait = hEvent; - UR_CALL(EnqueueUSMBlockingSet(hQueue, (void *)DstShadow, 0, size, 1, - &hEventWait, &hEvent)); + UR_CALL(EnqueueUSMBlockingSet(hQueue, (void *)DstShadow, 0, size, 0, + nullptr, &hEvents[1])); } if (phEvent) { - *phEvent = hEvent; + UR_CALL(getContext()->urDdiTable.Enqueue.pfnEventsWait( + hQueue, 2, hEvents, phEvent)); } return UR_RESULT_SUCCESS; @@ -1425,10 +1424,10 @@ ur_result_t UR_APICALL urEnqueueUSMFill2D( auto pfnUSMFill2D = getContext()->urDdiTable.Enqueue.pfnUSMFill2D; getContext()->logger.debug("==== urEnqueueUSMFill2D"); - ur_event_handle_t hEvent = nullptr; + ur_event_handle_t hEvents[2] = {}; UR_CALL(pfnUSMFill2D(hQueue, pMem, pitch, patternSize, pPattern, width, height, numEventsInWaitList, phEventWaitList, - &hEvent)); + &hEvents[0])); const auto Mem = (uptr)pMem; auto MemInfoItOp = getMsanInterceptor()->findAllocInfoByAddress(Mem); @@ -1440,13 +1439,13 @@ ur_result_t UR_APICALL urEnqueueUSMFill2D( const auto MemShadow = DeviceInfo->Shadow->MemToShadow(Mem); const char Pattern = 0; - const ur_event_handle_t hEventWait = hEvent; UR_CALL(pfnUSMFill2D(hQueue, (void *)MemShadow, pitch, 1, &Pattern, - width, height, 1, &hEventWait, &hEvent)); + width, height, 0, nullptr, &hEvents[1])); } if (phEvent) { - *phEvent = hEvent; + UR_CALL(getContext()->urDdiTable.Enqueue.pfnEventsWait( + hQueue, 2, hEvents, phEvent)); } return UR_RESULT_SUCCESS; @@ -1481,10 +1480,10 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( auto pfnUSMMemcpy2D = getContext()->urDdiTable.Enqueue.pfnUSMMemcpy2D; getContext()->logger.debug("==== pfnUSMMemcpy2D"); - ur_event_handle_t hEvent = nullptr; + ur_event_handle_t hEvents[2] = {}; UR_CALL(pfnUSMMemcpy2D(hQueue, blocking, pDst, dstPitch, pSrc, srcPitch, width, height, numEventsInWaitList, phEventWaitList, - &hEvent)); + &hEvents[0])); const auto Src = (uptr)pSrc, Dst = (uptr)pDst; auto SrcInfoItOp = getMsanInterceptor()->findAllocInfoByAddress(Src); @@ -1499,10 +1498,9 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( const auto SrcShadow = DeviceInfo->Shadow->MemToShadow(Src); const auto DstShadow = DeviceInfo->Shadow->MemToShadow(Dst); - const ur_event_handle_t hEventWait = hEvent; UR_CALL(pfnUSMMemcpy2D(hQueue, blocking, (void *)DstShadow, dstPitch, - (void *)SrcShadow, srcPitch, width, height, 1, - &hEventWait, &hEvent)); + (void *)SrcShadow, srcPitch, width, height, 0, + nullptr, &hEvents[1])); } else if (DstInfoItOp) { auto DstInfo = (*DstInfoItOp)->second; @@ -1511,14 +1509,14 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( const auto DstShadow = DeviceInfo->Shadow->MemToShadow(Dst); const char Pattern = 0; - const ur_event_handle_t hEventWait = hEvent; UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMFill2D( - hQueue, (void *)DstShadow, dstPitch, 1, &Pattern, width, height, 1, - &hEventWait, &hEvent)); + hQueue, (void *)DstShadow, dstPitch, 1, &Pattern, width, height, 0, + nullptr, &hEvents[1])); } if (phEvent) { - *phEvent = hEvent; + UR_CALL(getContext()->urDdiTable.Enqueue.pfnEventsWait( + hQueue, 2, hEvents, phEvent)); } return UR_RESULT_SUCCESS;