Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[L0 v2][CTS] Fix problems reported by SYCL e2e tests #2516

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
8 changes: 8 additions & 0 deletions source/adapters/level_zero/queue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2293,6 +2293,10 @@ ur_result_t ur_queue_handle_t_::createCommandList(
IsInOrderList = true;
}

logger::debug(
"create command list ordinal: {}, type: regular, device: {}, inOrder: {}",
QueueGroupOrdinal, Device->ZeDevice, IsInOrderList);

ZE2UR_CALL(zeCommandListCreate, (Context->ZeContext, Device->ZeDevice,
&ZeCommandListDesc, &ZeCommandList));

Expand Down Expand Up @@ -2459,6 +2463,10 @@ ur_command_list_ptr_t &ur_queue_handle_t_::ur_queue_group_t::getImmCmdList() {
"(round robin in [{}, {}]) priority = {}",
ZeCommandQueueDesc.ordinal, ZeCommandQueueDesc.index,
LowerIndex, UpperIndex, Priority);
logger::debug("create command list ordinal: {}, type: immediate, device: "
"{}, inOrder: {}",
ZeCommandQueueDesc.ordinal, Queue->Device->ZeDevice,
isInOrderList);

ZE_CALL_NOCHECK(zeCommandListCreateImmediate,
(Queue->Context->ZeContext, Queue->Device->ZeDevice,
Expand Down
11 changes: 0 additions & 11 deletions source/adapters/level_zero/v2/api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -474,15 +474,4 @@ ur_result_t urCommandBufferCommandGetInfoExp(
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

ur_result_t urUSMImportExp(ur_context_handle_t hContext, void *pMem,
size_t size) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

ur_result_t urUSMReleaseExp(ur_context_handle_t hContext, void *pMem) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

} // namespace ur::level_zero
11 changes: 11 additions & 0 deletions source/adapters/level_zero/v2/command_list_cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,12 @@ command_list_cache_t::createCommandList(const command_list_descriptor_t &desc) {
QueueDesc.index = ImmCmdDesc->Index.value();
}
QueueDesc.pNext = &offloadDesc;

logger::debug("create command list ordinal: {}, type: immediate, device: "
"{}, inOrder: {}",
ImmCmdDesc->Ordinal, ImmCmdDesc->ZeDevice,
ImmCmdDesc->IsInOrder);

ZE2UR_CALL_THROWS(
zeCommandListCreateImmediate,
(ZeContext, ImmCmdDesc->ZeDevice, &QueueDesc, &ZeCommandList));
Expand All @@ -81,6 +87,11 @@ command_list_cache_t::createCommandList(const command_list_descriptor_t &desc) {
CmdListDesc.commandQueueGroupOrdinal = RegCmdDesc.Ordinal;
CmdListDesc.pNext = &offloadDesc;

logger::debug("create command list ordinal: {}, type: immediate, device: "
"{}, inOrder: {}",
RegCmdDesc.Ordinal, RegCmdDesc.ZeDevice,
RegCmdDesc.IsInOrder);

ze_command_list_handle_t ZeCommandList;
ZE2UR_CALL_THROWS(zeCommandListCreate, (ZeContext, RegCmdDesc.ZeDevice,
&CmdListDesc, &ZeCommandList));
Expand Down
2 changes: 2 additions & 0 deletions source/adapters/level_zero/v2/event_provider_normal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ provider_pool::provider_pool(ur_context_handle_t context, queue_type queue,
devices.push_back(d->ZeDevice);
}

logger::debug("ze_event_pool_desc_t flags set to: {}", desc.flags);

ZE2UR_CALL_THROWS(zeEventPoolCreate,
(context->getZeHandle(), &desc, devices.size(),
devices.data(), pool.ptr()));
Expand Down
8 changes: 6 additions & 2 deletions source/adapters/level_zero/v2/kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -287,8 +287,12 @@ ur_result_t ur_kernel_handle_t_::prepareForSubmission(
(hZeKernel, groupSizeX, groupSizeY, groupSizeZ));

for (auto &pending : pending_allocations) {
auto zePtr = pending.hMem->getDevicePtr(hDevice, pending.mode, 0,
pending.hMem->getSize(), migrate);
void *zePtr = nullptr;
if (pending.hMem) {
// NULL is a valid value
zePtr = pending.hMem->getDevicePtr(hDevice, pending.mode, 0,
pending.hMem->getSize(), migrate);
}
UR_CALL(setArgPointer(pending.argIndex, nullptr, zePtr));
}
pending_allocations.clear();
Expand Down
106 changes: 54 additions & 52 deletions source/adapters/level_zero/v2/memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,10 +115,7 @@ ur_integrated_mem_handle_t::ur_integrated_mem_handle_t(
if (!ownHostPtr) {
return;
}
auto ret = hContext->getDefaultUSMPool()->free(ptr);
if (ret != UR_RESULT_SUCCESS) {
logger::error("Failed to free host memory: {}", ret);
}
ZE_CALL_NOCHECK(zeMemFree, (hContext->getZeHandle(), ptr));
});
}

Expand Down Expand Up @@ -209,7 +206,7 @@ ur_discrete_mem_handle_t::ur_discrete_mem_handle_t(
device_access_mode_t accessMode)
: ur_mem_handle_t_(hContext, size, accessMode),
deviceAllocations(hContext->getPlatform()->getNumDevices()),
activeAllocationDevice(nullptr), hostAllocations() {
activeAllocationDevice(nullptr), mapToPtr(hostPtr), hostAllocations() {
if (hostPtr) {
auto initialDevice = hContext->getDevices()[0];
UR_CALL_THROWS(migrateBufferTo(initialDevice, hostPtr, size));
Expand All @@ -234,10 +231,7 @@ ur_discrete_mem_handle_t::ur_discrete_mem_handle_t(
if (!ownZePtr) {
return;
}
auto ret = hContext->getDefaultUSMPool()->free(ptr);
if (ret != UR_RESULT_SUCCESS) {
logger::error("Failed to free device memory: {}", ret);
}
ZE_CALL_NOCHECK(zeMemFree, (hContext->getZeHandle(), ptr));
});
}
}
Expand All @@ -246,12 +240,18 @@ ur_discrete_mem_handle_t::~ur_discrete_mem_handle_t() {
if (!activeAllocationDevice || !writeBackPtr)
return;

auto srcPtr = ur_cast<char *>(
deviceAllocations[activeAllocationDevice->Id.value()].get());
auto srcPtr = getActiveDeviceAlloc();
synchronousZeCopy(hContext, activeAllocationDevice, writeBackPtr, srcPtr,
getSize());
}

void *ur_discrete_mem_handle_t::getActiveDeviceAlloc(size_t offset) {
assert(activeAllocationDevice);
return ur_cast<char *>(
deviceAllocations[activeAllocationDevice->Id.value()].get()) +
offset;
}

void *ur_discrete_mem_handle_t::getDevicePtr(
ur_device_handle_t hDevice, device_access_mode_t access, size_t offset,
size_t size, std::function<void(void *src, void *dst, size_t)> migrate) {
Expand All @@ -272,10 +272,8 @@ void *ur_discrete_mem_handle_t::getDevicePtr(
hDevice = activeAllocationDevice;
}

char *ptr;
if (activeAllocationDevice == hDevice) {
ptr = ur_cast<char *>(deviceAllocations[hDevice->Id.value()].get());
return ptr + offset;
return getActiveDeviceAlloc(offset);
}

auto &p2pDevices = hContext->getP2PDevices(hDevice);
Expand All @@ -288,9 +286,7 @@ void *ur_discrete_mem_handle_t::getDevicePtr(
}

// TODO: see if it's better to migrate the memory to the specified device
return ur_cast<char *>(
deviceAllocations[activeAllocationDevice->Id.value()].get()) +
offset;
return getActiveDeviceAlloc(offset);
}

void *ur_discrete_mem_handle_t::mapHostPtr(
Expand All @@ -299,55 +295,63 @@ void *ur_discrete_mem_handle_t::mapHostPtr(
TRACK_SCOPE_LATENCY("ur_discrete_mem_handle_t::mapHostPtr");
// TODO: use async alloc?

void *ptr;
UR_CALL_THROWS(hContext->getDefaultUSMPool()->allocate(
hContext, nullptr, nullptr, UR_USM_TYPE_HOST, size, &ptr));
void *ptr = mapToPtr;
if (!ptr) {
UR_CALL_THROWS(hContext->getDefaultUSMPool()->allocate(
hContext, nullptr, nullptr, UR_USM_TYPE_HOST, size, &ptr));
}

hostAllocations.emplace_back(ptr, size, offset, flags);
usm_unique_ptr_t mappedPtr =
usm_unique_ptr_t(ptr, [ownsAlloc = bool(mapToPtr), this](void *p) {
if (ownsAlloc) {
auto ret = hContext->getDefaultUSMPool()->free(p);
if (ret != UR_RESULT_SUCCESS) {
logger::error("Failed to mapped memory: {}", ret);
}
}
});

hostAllocations.emplace_back(std::move(mappedPtr), size, offset, flags);

if (activeAllocationDevice && (flags & UR_MAP_FLAG_READ)) {
auto srcPtr =
ur_cast<char *>(
deviceAllocations[activeAllocationDevice->Id.value()].get()) +
offset;
migrate(srcPtr, hostAllocations.back().ptr, size);
auto srcPtr = getActiveDeviceAlloc(offset);
migrate(srcPtr, hostAllocations.back().ptr.get(), size);
}

return hostAllocations.back().ptr;
return hostAllocations.back().ptr.get();
}

void ur_discrete_mem_handle_t::unmapHostPtr(
void *pMappedPtr,
std::function<void(void *src, void *dst, size_t)> migrate) {
TRACK_SCOPE_LATENCY("ur_discrete_mem_handle_t::unmapHostPtr");

for (auto &hostAllocation : hostAllocations) {
if (hostAllocation.ptr == pMappedPtr) {
void *devicePtr = nullptr;
if (activeAllocationDevice) {
devicePtr =
ur_cast<char *>(
deviceAllocations[activeAllocationDevice->Id.value()].get()) +
hostAllocation.offset;
} else if (!(hostAllocation.flags &
UR_MAP_FLAG_WRITE_INVALIDATE_REGION)) {
devicePtr = ur_cast<char *>(getDevicePtr(
hContext->getDevices()[0], device_access_mode_t::read_only,
hostAllocation.offset, hostAllocation.size, migrate));
}
auto hostAlloc =
std::find_if(hostAllocations.begin(), hostAllocations.end(),
[pMappedPtr](const host_allocation_desc_t &desc) {
return desc.ptr.get() == pMappedPtr;
});

if (devicePtr) {
migrate(hostAllocation.ptr, devicePtr, hostAllocation.size);
}
if (hostAlloc == hostAllocations.end()) {
throw UR_RESULT_ERROR_INVALID_ARGUMENT;
}

// TODO: use async free here?
UR_CALL_THROWS(hContext->getDefaultUSMPool()->free(hostAllocation.ptr));
return;
}
bool shouldMigrateToDevice =
!(hostAlloc->flags & UR_MAP_FLAG_WRITE_INVALIDATE_REGION);

if (!activeAllocationDevice && shouldMigrateToDevice) {
allocateOnDevice(hContext->getDevices()[0], getSize());
}

// TODO: tests require that memory is migrated even for
// UR_MAP_FLAG_WRITE_INVALIDATE_REGION when there is an active device
// allocation. is this correct?
if (activeAllocationDevice) {
migrate(hostAlloc->ptr.get(), getActiveDeviceAlloc(hostAlloc->offset),
hostAlloc->size);
}

// No mapping found
throw UR_RESULT_ERROR_INVALID_ARGUMENT;
hostAllocations.erase(hostAlloc);
}

static bool useHostBuffer(ur_context_handle_t hContext) {
Expand Down Expand Up @@ -419,8 +423,6 @@ ur_result_t urMemBufferCreate(ur_context_handle_t hContext,
auto accessMode = getDeviceAccessMode(flags);

if (useHostBuffer(hContext)) {
// TODO: assert that if hostPtr is set, either UR_MEM_FLAG_USE_HOST_POINTER
// or UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER is set?
auto hostPtrAction =
flags & UR_MEM_FLAG_USE_HOST_POINTER
? ur_integrated_mem_handle_t::host_ptr_action_t::import
Expand Down
11 changes: 7 additions & 4 deletions source/adapters/level_zero/v2/memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,11 +98,11 @@ struct ur_integrated_mem_handle_t : public ur_mem_handle_t_ {
};

struct host_allocation_desc_t {
host_allocation_desc_t(void *ptr, size_t size, size_t offset,
host_allocation_desc_t(usm_unique_ptr_t ptr, size_t size, size_t offset,
ur_map_flags_t flags)
: ptr(ptr), size(size), offset(offset), flags(flags) {}
: ptr(std::move(ptr)), size(size), offset(offset), flags(flags) {}

void *ptr;
usm_unique_ptr_t ptr;
size_t size;
size_t offset;
ur_map_flags_t flags;
Expand Down Expand Up @@ -146,10 +146,13 @@ struct ur_discrete_mem_handle_t : public ur_mem_handle_t_ {
// If not null, copy the buffer content back to this memory on release.
void *writeBackPtr = nullptr;

// If not null, mapHostPtr should map memory to this ptr
void *mapToPtr = nullptr;

std::vector<host_allocation_desc_t> hostAllocations;

void *getActiveDeviceAlloc(size_t offset = 0);
void *allocateOnDevice(ur_device_handle_t hDevice, size_t size);

ur_result_t migrateBufferTo(ur_device_handle_t hDevice, void *src,
size_t size);
};
Expand Down
45 changes: 28 additions & 17 deletions source/adapters/level_zero/v2/queue_immediate_in_order.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,8 +132,15 @@ ur_queue_immediate_in_order_t::queueGetInfo(ur_queue_info_t propName,
case UR_QUEUE_INFO_DEVICE_DEFAULT:
return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION;
case UR_QUEUE_INFO_EMPTY: {
// We can't tell if the queue is empty as we don't hold to any events
return ReturnValue(false);
auto status = ZE_CALL_NOCHECK(zeCommandListHostSynchronize,
(handler.commandList.get(), 0));
if (status == ZE_RESULT_SUCCESS) {
return ReturnValue(true);
} else if (status == ZE_RESULT_NOT_READY) {
return ReturnValue(false);
} else {
return ze2urResult(status);
}
}
default:
logger::error("Unsupported ParamName in urQueueGetInfo: "
Expand Down Expand Up @@ -660,10 +667,11 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferMap(
// If memory was not migrated, we need to wait on the events here.
ZE2UR_CALL(zeCommandListAppendWaitOnEvents,
(handler.commandList.get(), waitList.second, waitList.first));
if (signalEvent) {
ZE2UR_CALL(zeCommandListAppendSignalEvent,
(handler.commandList.get(), signalEvent->getZeEvent()));
}
}

if (signalEvent) {
ZE2UR_CALL(zeCommandListAppendSignalEvent,
(handler.commandList.get(), signalEvent->getZeEvent()));
}

if (blockingMap) {
Expand Down Expand Up @@ -872,17 +880,20 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueUSMMemcpy2D(
bool blocking, void *pDst, size_t dstPitch, const void *pSrc,
size_t srcPitch, size_t width, size_t height, uint32_t numEventsInWaitList,
const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
std::ignore = blocking;
std::ignore = pDst;
std::ignore = dstPitch;
std::ignore = pSrc;
std::ignore = srcPitch;
std::ignore = width;
std::ignore = height;
std::ignore = numEventsInWaitList;
std::ignore = phEventWaitList;
std::ignore = phEvent;
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
TRACK_SCOPE_LATENCY("ur_queue_immediate_in_order_t::enqueueUSMMemcpy2D");

ur_rect_offset_t zeroOffset{0, 0, 0};
ur_rect_region_t region{width, height, 0};

std::scoped_lock<ur_shared_mutex> lock(this->Mutex);

ur_usm_handle_t_ srcHandle(hContext, 0, pSrc);
ur_usm_handle_t_ dstHandle(hContext, 0, pDst);

return enqueueRegionCopyUnlocked(&srcHandle, &dstHandle, blocking, zeroOffset,
zeroOffset, region, srcPitch, 0, dstPitch, 0,
numEventsInWaitList, phEventWaitList,
phEvent, UR_COMMAND_MEM_BUFFER_COPY_RECT);
}

static void *getGlobalPointerFromModule(ze_module_handle_t hModule,
Expand Down
Loading
Loading