Skip to content

Commit

Permalink
Revert "SWDEV-301667 - Disable HostBlit copy for HIP"
Browse files Browse the repository at this point in the history
This reverts commit 5447cf8.

Reason for revert: SWDEV-455075, SWDEV-461507 - This change forces to
use ROCr's copy path. Reintroducing hostBlit copy path for
host-to-host copies.


Change-Id: Ic3c45b49e481c9dcdaa7611f61071778790b7e6c
  • Loading branch information
Sourabh Betigeri authored and Sourabh Betigeri committed May 20, 2024
1 parent 3ff0366 commit 294fa6d
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 29 deletions.
1 change: 0 additions & 1 deletion rocclr/device/blit.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ class BlitManager : public amd::HeapObject {
uint disableFillImage_ : 1;
uint disableCopyBufferToImageOpt_ : 1;
uint disableHwlCopyBuffer_ : 1;
uint disableHostCopyBuffer_ : 1;
};
uint32_t value_;
Setup() : value_(0) {}
Expand Down
45 changes: 19 additions & 26 deletions rocclr/device/rocm/rocblit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,7 @@ bool DmaBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost,

// Use host copy if memory has direct access
if (setup_.disableReadBuffer_ ||
(srcMemory.isHostMemDirectAccess() && !srcMemory.isCpuUncached() &&
!setup_.disableHostCopyBuffer_)) {
(srcMemory.isHostMemDirectAccess() && !srcMemory.isCpuUncached())) {
// Stall GPU before CPU access
gpu().Barriers().WaitCurrent();
return HostBlitManager::readBuffer(srcMemory, dstHost, origin, size, entire, copyMetadata);
Expand Down Expand Up @@ -166,12 +165,10 @@ bool DmaBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost,

// Use host copy if memory has direct access
if (setup_.disableReadBufferRect_ ||
(srcMemory.isHostMemDirectAccess() && !srcMemory.isCpuUncached() &&
!setup_.disableHostCopyBuffer_)) {
(srcMemory.isHostMemDirectAccess() && !srcMemory.isCpuUncached())) {
// Stall GPU before CPU access
gpu().Barriers().WaitCurrent();
return HostBlitManager::readBufferRect(srcMemory, dstHost, bufRect, hostRect, size,
entire, copyMetadata);
return HostBlitManager::readBufferRect(srcMemory, dstHost, bufRect, hostRect, size, entire, copyMetadata);
} else {
Memory& xferBuf = dev().xferRead().acquire();
address staging = xferBuf.getDeviceMemory();
Expand Down Expand Up @@ -239,8 +236,8 @@ bool DmaBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemory,
const amd::Coord3D& origin, const amd::Coord3D& size,
bool entire, amd::CopyMetadata copyMetadata) const {
// Use host copy if memory has direct access
if ((setup_.disableWriteBuffer_ || dstMemory.isHostMemDirectAccess() ||
gpuMem(dstMemory).IsPersistentDirectMap()) && !setup_.disableHostCopyBuffer_) {
if (setup_.disableWriteBuffer_ || dstMemory.isHostMemDirectAccess() ||
gpuMem(dstMemory).IsPersistentDirectMap()) {
// Stall GPU before CPU access
gpu().releaseGpuMemoryFence();
return HostBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire, copyMetadata);
Expand Down Expand Up @@ -336,8 +333,8 @@ bool DmaBlitManager::writeBufferRect(const void* srcHost, device::Memory& dstMem
gpu().releaseGpuMemoryFence();

// Use host copy if memory has direct access
if ((setup_.disableWriteBufferRect_ || dstMemory.isHostMemDirectAccess() ||
gpuMem(dstMemory).IsPersistentDirectMap()) && !setup_.disableHostCopyBuffer_) {
if (setup_.disableWriteBufferRect_ || dstMemory.isHostMemDirectAccess() ||
gpuMem(dstMemory).IsPersistentDirectMap()) {
return HostBlitManager::writeBufferRect(srcHost, dstMemory, hostRect, bufRect, size, entire,
copyMetadata);
} else {
Expand Down Expand Up @@ -393,9 +390,8 @@ bool DmaBlitManager::copyBuffer(device::Memory& srcMemory, device::Memory& dstMe
const amd::Coord3D& size, bool entire,
amd::CopyMetadata copyMetadata) const {
if (setup_.disableCopyBuffer_ ||
(!setup_.disableHostCopyBuffer_ && srcMemory.isHostMemDirectAccess() &&
!srcMemory.isCpuUncached() && (dev().agent_profile() != HSA_PROFILE_FULL) &&
dstMemory.isHostMemDirectAccess())) {
(srcMemory.isHostMemDirectAccess() && !srcMemory.isCpuUncached() &&
(dev().agent_profile() != HSA_PROFILE_FULL) && dstMemory.isHostMemDirectAccess())) {
// Stall GPU before CPU access
gpu().releaseGpuMemoryFence();
return HostBlitManager::copyBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, size, false,
Expand All @@ -413,8 +409,8 @@ bool DmaBlitManager::copyBufferRect(device::Memory& srcMemory, device::Memory& d
const amd::Coord3D& size, bool entire,
amd::CopyMetadata copyMetadata) const {
if (setup_.disableCopyBufferRect_ ||
(!setup_.disableHostCopyBuffer_ && srcMemory.isHostMemDirectAccess() &&
!srcMemory.isCpuUncached() && dstMemory.isHostMemDirectAccess())) {
(srcMemory.isHostMemDirectAccess() && !srcMemory.isCpuUncached() &&
dstMemory.isHostMemDirectAccess())) {
// Stall GPU before CPU access
gpu().releaseGpuMemoryFence();
return HostBlitManager::copyBufferRect(srcMemory, dstMemory, srcRect, dstRect, size, entire,
Expand Down Expand Up @@ -1843,7 +1839,7 @@ bool KernelBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost,

// Use host copy if memory has direct access
if (setup_.disableReadBuffer_ || (srcMemory.isHostMemDirectAccess() &&
!srcMemory.isCpuUncached() && !setup_.disableHostCopyBuffer_)) {
!srcMemory.isCpuUncached())) {
// Stall GPU before CPU access
gpu().releaseGpuMemoryFence();
result = HostBlitManager::readBuffer(srcMemory, dstHost, origin, size, entire, copyMetadata);
Expand Down Expand Up @@ -1895,8 +1891,7 @@ bool KernelBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost,

// Use host copy if memory has direct access
if (setup_.disableReadBufferRect_ ||
(srcMemory.isHostMemDirectAccess() && !srcMemory.isCpuUncached() &&
!setup_.disableHostCopyBuffer_)) {
(srcMemory.isHostMemDirectAccess() && !srcMemory.isCpuUncached())) {
// Stall GPU before CPU access
gpu().releaseGpuMemoryFence();
result = HostBlitManager::readBufferRect(srcMemory, dstHost, bufRect, hostRect, size, entire,
Expand Down Expand Up @@ -1946,8 +1941,8 @@ bool KernelBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemo
bool result = false;

// Use host copy if memory has direct access
if ((setup_.disableWriteBuffer_ || dstMemory.isHostMemDirectAccess() ||
gpuMem(dstMemory).IsPersistentDirectMap()) && !setup_.disableHostCopyBuffer_) {
if (setup_.disableWriteBuffer_ || dstMemory.isHostMemDirectAccess() ||
gpuMem(dstMemory).IsPersistentDirectMap()) {
// Stall GPU before CPU access
gpu().releaseGpuMemoryFence();
result = HostBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire, copyMetadata);
Expand Down Expand Up @@ -1998,8 +1993,8 @@ bool KernelBlitManager::writeBufferRect(const void* srcHost, device::Memory& dst
bool result = false;

// Use host copy if memory has direct access
if ((setup_.disableWriteBufferRect_ || dstMemory.isHostMemDirectAccess() ||
gpuMem(dstMemory).IsPersistentDirectMap()) && !setup_.disableHostCopyBuffer_) {
if (setup_.disableWriteBufferRect_ || dstMemory.isHostMemDirectAccess() ||
gpuMem(dstMemory).IsPersistentDirectMap()) {
// Stall GPU before CPU access
gpu().releaseGpuMemoryFence();
result = HostBlitManager::writeBufferRect(srcHost, dstMemory, hostRect, bufRect, size, entire,
Expand Down Expand Up @@ -2080,8 +2075,7 @@ bool KernelBlitManager::fillBuffer1D(device::Memory& memory, const void* pattern
bool result = false;

// Use host fill if memory has direct access
if (setup_.disableFillBuffer_ || (!forceBlit && memory.isHostMemDirectAccess() &&
!setup_.disableHostCopyBuffer_)) {
if (setup_.disableFillBuffer_ || (!forceBlit && memory.isHostMemDirectAccess())) {
// Stall GPU before CPU access
gpu().releaseGpuMemoryFence();
result = HostBlitManager::fillBuffer(memory, pattern, patternSize, size, origin, size, entire);
Expand Down Expand Up @@ -2161,8 +2155,7 @@ bool KernelBlitManager::fillBuffer2D(device::Memory& memory, const void* pattern
bool result = false;

// Use host fill if memory has direct access
if (setup_.disableFillBuffer_ || (!forceBlit && memory.isHostMemDirectAccess() &&
!setup_.disableHostCopyBuffer_)) {
if (setup_.disableFillBuffer_ || (!forceBlit && memory.isHostMemDirectAccess())) {
// Stall GPU before CPU access
gpu().releaseGpuMemoryFence();
result = HostBlitManager::fillBuffer(memory, pattern, patternSize, size, origin, size, entire);
Expand Down
2 changes: 0 additions & 2 deletions rocclr/device/rocm/rocvirtual.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1329,8 +1329,6 @@ bool VirtualGPU::create() {
}

device::BlitManager::Setup blitSetup;
// Disable HostBlit copy path for HIP
blitSetup.disableHostCopyBuffer_ = amd::IS_HIP;
blitMgr_ = new KernelBlitManager(*this, blitSetup);
if ((nullptr == blitMgr_) || !blitMgr_->create(roc_device_)) {
LogError("Could not create BlitManager!");
Expand Down

0 comments on commit 294fa6d

Please sign in to comment.