From ccd065214094837dd59a45aa5111d860aff38ecf Mon Sep 17 00:00:00 2001 From: Saleel Kudchadker Date: Tue, 10 Jan 2023 22:37:15 -0800 Subject: [PATCH] SWDEV-345213 - Fix staged line-by-line copy path - Address an old bug in offset calculation that was causing out of bound access. - Improve logging Change-Id: Iebdf34dddaa5e987cc72184a2152918adc6a96e0 (cherry picked from commit cfa12d9ebc9d8d82f342e5215e1363b5e70f7cbe) --- device/rocm/rocblit.cpp | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/device/rocm/rocblit.cpp b/device/rocm/rocblit.cpp index b759a479..32c34000 100644 --- a/device/rocm/rocblit.cpp +++ b/device/rocm/rocblit.cpp @@ -175,8 +175,7 @@ bool DmaBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost, // Copy data from device to host - line by line address dst = reinterpret_cast
(dstHost) + dstOffset; - src += srcOffset; - bool retval = hsaCopyStaged(src, dst, size[0], staging, false); + bool retval = hsaCopyStaged(src + srcOffset, dst, size[0], staging, false); if (!retval) { return retval; } @@ -336,9 +335,8 @@ bool DmaBlitManager::writeBufferRect(const void* srcHost, device::Memory& dstMem dstOffset = bufRect.offset(0, y, z); // Copy data from host to device - line by line - dst += dstOffset; const_address src = reinterpret_cast(srcHost) + srcOffset; - bool retval = hsaCopyStaged(src, dst, size[0], staging, true); + bool retval = hsaCopyStaged(src, dst + dstOffset, size[0], staging, true); if (!retval) { return retval; } @@ -664,8 +662,9 @@ bool DmaBlitManager::hsaCopy(const Memory& srcMemory, const Memory& dstMemory, // Use SDMA to transfer the data ClPrint(amd::LOG_DEBUG, amd::LOG_COPY, - "HSA Asycn Copy wait_event=0x%zx, completion_signal=0x%zx", - (wait_events.size() != 0) ? wait_events[0].handle : 0, active.handle); + "HSA Asycn Copy dst=0x%zx, src=0x%zx, size=%d, wait_event=0x%zx, " + "completion_signal=0x%zx", + dst, src, (wait_events.size() != 0) ? wait_events[0].handle : 0, active.handle); status = hsa_amd_memory_async_copy(dst, dstAgent, src, srcAgent, size[0], wait_events.size(), wait_events.data(), active); @@ -720,10 +719,12 @@ bool DmaBlitManager::hsaCopyStaged(const_address hostSrc, address hostDst, size_ hsa_signal_t active = gpu().Barriers().ActiveSignal(kInitSignalValueOne, gpu().timestamp()); memcpy(hsaBuffer, hostSrc + offset, size); - ClPrint(amd::LOG_DEBUG, amd::LOG_COPY, - "HSA Async Copy completion_signal=0x%zx", active.handle); status = hsa_amd_memory_async_copy(hostDst + offset, dev().getBackendDevice(), hsaBuffer, srcAgent, size, 0, nullptr, active); + ClPrint(amd::LOG_DEBUG, amd::LOG_COPY, + "HSA Async Copy staged H2D dst=0x%zx, src=0x%zx, size=%ld, completion_signal=0x%zx", + hostDst + offset, hsaBuffer, size, active.handle); + if (status != HSA_STATUS_SUCCESS) { gpu().Barriers().ResetCurrentSignal(); LogPrintfError("Hsa copy from host to device failed with code %d", status); @@ -749,10 +750,12 @@ bool DmaBlitManager::hsaCopyStaged(const_address hostSrc, address hostDst, size_ hsa_signal_t active = gpu().Barriers().ActiveSignal(kInitSignalValueOne, gpu().timestamp()); // Copy data from Device to Host - ClPrint(amd::LOG_DEBUG, amd::LOG_COPY, - "HSA Async Copy completion_signal=0x%zx", active.handle); status = hsa_amd_memory_async_copy(hsaBuffer, dstAgent, hostSrc + offset, dev().getBackendDevice(), size, 0, nullptr, active); + ClPrint(amd::LOG_DEBUG, amd::LOG_COPY, + "HSA Async Copy staged D2H dst=0x%zx, src=0x%zx, size=%ld, completion_signal=0x%zx", + hsaBuffer, hostSrc + offset, size, active.handle); + if (status == HSA_STATUS_SUCCESS) { gpu().Barriers().WaitCurrent(); memcpy(hostDst + offset, hsaBuffer, size);