From 294fa6de16d41e51d467cf2da75851c9aa7dc683 Mon Sep 17 00:00:00 2001 From: Sourabh Betigeri Date: Thu, 16 May 2024 19:10:25 -0400 Subject: [PATCH] Revert "SWDEV-301667 - Disable HostBlit copy for HIP" This reverts commit 5447cf887254367c024716cd6c27822cfd9a2d73. Reason for revert: SWDEV-455075, SWDEV-461507 - This change forces to use ROCr's copy path. Reintroducing hostBlit copy path for host-to-host copies. Change-Id: Ic3c45b49e481c9dcdaa7611f61071778790b7e6c --- rocclr/device/blit.hpp | 1 - rocclr/device/rocm/rocblit.cpp | 45 +++++++++++++------------------ rocclr/device/rocm/rocvirtual.cpp | 2 -- 3 files changed, 19 insertions(+), 29 deletions(-) diff --git a/rocclr/device/blit.hpp b/rocclr/device/blit.hpp index e5cf93526..eb70a87be 100644 --- a/rocclr/device/blit.hpp +++ b/rocclr/device/blit.hpp @@ -53,7 +53,6 @@ class BlitManager : public amd::HeapObject { uint disableFillImage_ : 1; uint disableCopyBufferToImageOpt_ : 1; uint disableHwlCopyBuffer_ : 1; - uint disableHostCopyBuffer_ : 1; }; uint32_t value_; Setup() : value_(0) {} diff --git a/rocclr/device/rocm/rocblit.cpp b/rocclr/device/rocm/rocblit.cpp index ddb458a05..74bcdbd65 100644 --- a/rocclr/device/rocm/rocblit.cpp +++ b/rocclr/device/rocm/rocblit.cpp @@ -72,8 +72,7 @@ bool DmaBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost, // Use host copy if memory has direct access if (setup_.disableReadBuffer_ || - (srcMemory.isHostMemDirectAccess() && !srcMemory.isCpuUncached() && - !setup_.disableHostCopyBuffer_)) { + (srcMemory.isHostMemDirectAccess() && !srcMemory.isCpuUncached())) { // Stall GPU before CPU access gpu().Barriers().WaitCurrent(); return HostBlitManager::readBuffer(srcMemory, dstHost, origin, size, entire, copyMetadata); @@ -166,12 +165,10 @@ bool DmaBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost, // Use host copy if memory has direct access if (setup_.disableReadBufferRect_ || - (srcMemory.isHostMemDirectAccess() && !srcMemory.isCpuUncached() && - !setup_.disableHostCopyBuffer_)) { + (srcMemory.isHostMemDirectAccess() && !srcMemory.isCpuUncached())) { // Stall GPU before CPU access gpu().Barriers().WaitCurrent(); - return HostBlitManager::readBufferRect(srcMemory, dstHost, bufRect, hostRect, size, - entire, copyMetadata); + return HostBlitManager::readBufferRect(srcMemory, dstHost, bufRect, hostRect, size, entire, copyMetadata); } else { Memory& xferBuf = dev().xferRead().acquire(); address staging = xferBuf.getDeviceMemory(); @@ -239,8 +236,8 @@ bool DmaBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemory, const amd::Coord3D& origin, const amd::Coord3D& size, bool entire, amd::CopyMetadata copyMetadata) const { // Use host copy if memory has direct access - if ((setup_.disableWriteBuffer_ || dstMemory.isHostMemDirectAccess() || - gpuMem(dstMemory).IsPersistentDirectMap()) && !setup_.disableHostCopyBuffer_) { + if (setup_.disableWriteBuffer_ || dstMemory.isHostMemDirectAccess() || + gpuMem(dstMemory).IsPersistentDirectMap()) { // Stall GPU before CPU access gpu().releaseGpuMemoryFence(); return HostBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire, copyMetadata); @@ -336,8 +333,8 @@ bool DmaBlitManager::writeBufferRect(const void* srcHost, device::Memory& dstMem gpu().releaseGpuMemoryFence(); // Use host copy if memory has direct access - if ((setup_.disableWriteBufferRect_ || dstMemory.isHostMemDirectAccess() || - gpuMem(dstMemory).IsPersistentDirectMap()) && !setup_.disableHostCopyBuffer_) { + if (setup_.disableWriteBufferRect_ || dstMemory.isHostMemDirectAccess() || + gpuMem(dstMemory).IsPersistentDirectMap()) { return HostBlitManager::writeBufferRect(srcHost, dstMemory, hostRect, bufRect, size, entire, copyMetadata); } else { @@ -393,9 +390,8 @@ bool DmaBlitManager::copyBuffer(device::Memory& srcMemory, device::Memory& dstMe const amd::Coord3D& size, bool entire, amd::CopyMetadata copyMetadata) const { if (setup_.disableCopyBuffer_ || - (!setup_.disableHostCopyBuffer_ && srcMemory.isHostMemDirectAccess() && - !srcMemory.isCpuUncached() && (dev().agent_profile() != HSA_PROFILE_FULL) && - dstMemory.isHostMemDirectAccess())) { + (srcMemory.isHostMemDirectAccess() && !srcMemory.isCpuUncached() && + (dev().agent_profile() != HSA_PROFILE_FULL) && dstMemory.isHostMemDirectAccess())) { // Stall GPU before CPU access gpu().releaseGpuMemoryFence(); return HostBlitManager::copyBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, size, false, @@ -413,8 +409,8 @@ bool DmaBlitManager::copyBufferRect(device::Memory& srcMemory, device::Memory& d const amd::Coord3D& size, bool entire, amd::CopyMetadata copyMetadata) const { if (setup_.disableCopyBufferRect_ || - (!setup_.disableHostCopyBuffer_ && srcMemory.isHostMemDirectAccess() && - !srcMemory.isCpuUncached() && dstMemory.isHostMemDirectAccess())) { + (srcMemory.isHostMemDirectAccess() && !srcMemory.isCpuUncached() && + dstMemory.isHostMemDirectAccess())) { // Stall GPU before CPU access gpu().releaseGpuMemoryFence(); return HostBlitManager::copyBufferRect(srcMemory, dstMemory, srcRect, dstRect, size, entire, @@ -1843,7 +1839,7 @@ bool KernelBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost, // Use host copy if memory has direct access if (setup_.disableReadBuffer_ || (srcMemory.isHostMemDirectAccess() && - !srcMemory.isCpuUncached() && !setup_.disableHostCopyBuffer_)) { + !srcMemory.isCpuUncached())) { // Stall GPU before CPU access gpu().releaseGpuMemoryFence(); result = HostBlitManager::readBuffer(srcMemory, dstHost, origin, size, entire, copyMetadata); @@ -1895,8 +1891,7 @@ bool KernelBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost, // Use host copy if memory has direct access if (setup_.disableReadBufferRect_ || - (srcMemory.isHostMemDirectAccess() && !srcMemory.isCpuUncached() && - !setup_.disableHostCopyBuffer_)) { + (srcMemory.isHostMemDirectAccess() && !srcMemory.isCpuUncached())) { // Stall GPU before CPU access gpu().releaseGpuMemoryFence(); result = HostBlitManager::readBufferRect(srcMemory, dstHost, bufRect, hostRect, size, entire, @@ -1946,8 +1941,8 @@ bool KernelBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemo bool result = false; // Use host copy if memory has direct access - if ((setup_.disableWriteBuffer_ || dstMemory.isHostMemDirectAccess() || - gpuMem(dstMemory).IsPersistentDirectMap()) && !setup_.disableHostCopyBuffer_) { + if (setup_.disableWriteBuffer_ || dstMemory.isHostMemDirectAccess() || + gpuMem(dstMemory).IsPersistentDirectMap()) { // Stall GPU before CPU access gpu().releaseGpuMemoryFence(); result = HostBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire, copyMetadata); @@ -1998,8 +1993,8 @@ bool KernelBlitManager::writeBufferRect(const void* srcHost, device::Memory& dst bool result = false; // Use host copy if memory has direct access - if ((setup_.disableWriteBufferRect_ || dstMemory.isHostMemDirectAccess() || - gpuMem(dstMemory).IsPersistentDirectMap()) && !setup_.disableHostCopyBuffer_) { + if (setup_.disableWriteBufferRect_ || dstMemory.isHostMemDirectAccess() || + gpuMem(dstMemory).IsPersistentDirectMap()) { // Stall GPU before CPU access gpu().releaseGpuMemoryFence(); result = HostBlitManager::writeBufferRect(srcHost, dstMemory, hostRect, bufRect, size, entire, @@ -2080,8 +2075,7 @@ bool KernelBlitManager::fillBuffer1D(device::Memory& memory, const void* pattern bool result = false; // Use host fill if memory has direct access - if (setup_.disableFillBuffer_ || (!forceBlit && memory.isHostMemDirectAccess() && - !setup_.disableHostCopyBuffer_)) { + if (setup_.disableFillBuffer_ || (!forceBlit && memory.isHostMemDirectAccess())) { // Stall GPU before CPU access gpu().releaseGpuMemoryFence(); result = HostBlitManager::fillBuffer(memory, pattern, patternSize, size, origin, size, entire); @@ -2161,8 +2155,7 @@ bool KernelBlitManager::fillBuffer2D(device::Memory& memory, const void* pattern bool result = false; // Use host fill if memory has direct access - if (setup_.disableFillBuffer_ || (!forceBlit && memory.isHostMemDirectAccess() && - !setup_.disableHostCopyBuffer_)) { + if (setup_.disableFillBuffer_ || (!forceBlit && memory.isHostMemDirectAccess())) { // Stall GPU before CPU access gpu().releaseGpuMemoryFence(); result = HostBlitManager::fillBuffer(memory, pattern, patternSize, size, origin, size, entire); diff --git a/rocclr/device/rocm/rocvirtual.cpp b/rocclr/device/rocm/rocvirtual.cpp index 24d8e7c4e..c56d79678 100644 --- a/rocclr/device/rocm/rocvirtual.cpp +++ b/rocclr/device/rocm/rocvirtual.cpp @@ -1329,8 +1329,6 @@ bool VirtualGPU::create() { } device::BlitManager::Setup blitSetup; - // Disable HostBlit copy path for HIP - blitSetup.disableHostCopyBuffer_ = amd::IS_HIP; blitMgr_ = new KernelBlitManager(*this, blitSetup); if ((nullptr == blitMgr_) || !blitMgr_->create(roc_device_)) { LogError("Could not create BlitManager!");