diff --git a/source/adapters/opencl/adapter.cpp b/source/adapters/opencl/adapter.cpp index 1decaa92ca..fad7f5f8fa 100644 --- a/source/adapters/opencl/adapter.cpp +++ b/source/adapters/opencl/adapter.cpp @@ -8,15 +8,10 @@ // //===----------------------------------------------------------------------===// +#include "adapter.hpp" #include "common.hpp" #include "logger/ur_logger.hpp" -struct ur_adapter_handle_t_ { - std::atomic RefCount = 0; - std::mutex Mutex; - logger::Logger &log = logger::get_logger("opencl"); -}; - static ur_adapter_handle_t_ *adapter = nullptr; static void globalAdapterShutdown() { diff --git a/source/adapters/opencl/adapter.hpp b/source/adapters/opencl/adapter.hpp index 27a45b0af8..4951cd9bae 100644 --- a/source/adapters/opencl/adapter.hpp +++ b/source/adapters/opencl/adapter.hpp @@ -7,7 +7,14 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// +#include "logger/ur_logger.hpp" +#include "platform.hpp" -struct ur_adapter_handle_t_; +struct ur_adapter_handle_t_ { + std::atomic RefCount = 0; + std::mutex Mutex; + logger::Logger &log = logger::get_logger("opencl"); -extern ur_adapter_handle_t_ adapter; + std::vector> URPlatforms; + uint32_t NumPlatforms = 0; +}; \ No newline at end of file diff --git a/source/adapters/opencl/command_buffer.cpp b/source/adapters/opencl/command_buffer.cpp index 8cf26b949a..195d9cdf5e 100644 --- a/source/adapters/opencl/command_buffer.cpp +++ b/source/adapters/opencl/command_buffer.cpp @@ -46,7 +46,7 @@ commandHandleReleaseInternal(ur_exp_command_buffer_command_handle_t Command) { ur_exp_command_buffer_handle_t_::~ur_exp_command_buffer_handle_t_() { urQueueRelease(hInternalQueue); - cl_context CLContext = hContext->get(); + cl_context CLContext = hContext->CLContext; cl_ext::clReleaseCommandBufferKHR_fn clReleaseCommandBufferKHR = nullptr; cl_int Res = cl_ext::getExtFuncFromContext( @@ -66,7 +66,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp( ur_queue_handle_t Queue = nullptr; UR_RETURN_ON_FAILURE(urQueueCreate(hContext, hDevice, nullptr, &Queue)); - cl_context CLContext = hContext->get(); + cl_context CLContext = hContext->CLContext; cl_ext::clCreateCommandBufferKHR_fn clCreateCommandBufferKHR = nullptr; UR_RETURN_ON_FAILURE( cl_ext::getExtFuncFromContext( @@ -77,7 +77,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp( pCommandBufferDesc ? pCommandBufferDesc->isUpdatable : false; ur_device_command_buffer_update_capability_flags_t UpdateCapabilities; - cl_device_id CLDevice = hDevice->get(); + cl_device_id CLDevice = hDevice->CLDevice; CL_RETURN_ON_FAILURE( getDeviceCommandBufferUpdateCapabilities(CLDevice, UpdateCapabilities)); bool DeviceSupportsUpdate = UpdateCapabilities > 0; @@ -91,7 +91,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp( IsUpdatable ? CL_COMMAND_BUFFER_MUTABLE_KHR : 0u, 0}; cl_int Res = CL_SUCCESS; - const cl_command_queue CLQueue = Queue->get(); + const cl_command_queue CLQueue = Queue->CLQueue; auto CLCommandBuffer = clCreateCommandBufferKHR(1, &CLQueue, Properties, &Res); CL_RETURN_ON_FAILURE_AND_SET_NULL(Res, phCommandBuffer); @@ -132,7 +132,7 @@ urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t hCommandBuffer) { UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t hCommandBuffer) { - cl_context CLContext = hCommandBuffer->hContext->get(); + cl_context CLContext = hCommandBuffer->hContext->CLContext; cl_ext::clFinalizeCommandBufferKHR_fn clFinalizeCommandBufferKHR = nullptr; UR_RETURN_ON_FAILURE( cl_ext::getExtFuncFromContext( @@ -160,7 +160,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( (void)phEventWaitList; (void)phEvent; - cl_context CLContext = hCommandBuffer->hContext->get(); + cl_context CLContext = hCommandBuffer->hContext->CLContext; cl_ext::clCommandNDRangeKernelKHR_fn clCommandNDRangeKernelKHR = nullptr; UR_RETURN_ON_FAILURE( cl_ext::getExtFuncFromContext( @@ -182,7 +182,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( cl_command_properties_khr *Properties = hCommandBuffer->IsUpdatable ? UpdateProperties : nullptr; CL_RETURN_ON_FAILURE(clCommandNDRangeKernelKHR( - hCommandBuffer->CLCommandBuffer, nullptr, Properties, hKernel->get(), + hCommandBuffer->CLCommandBuffer, nullptr, Properties, hKernel->CLKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, OutCommandHandle)); @@ -246,7 +246,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( (void)phEventWaitList; (void)phEvent; (void)phCommand; - cl_context CLContext = hCommandBuffer->hContext->get(); + cl_context CLContext = hCommandBuffer->hContext->CLContext; cl_ext::clCommandCopyBufferKHR_fn clCommandCopyBufferKHR = nullptr; UR_RETURN_ON_FAILURE( cl_ext::getExtFuncFromContext( @@ -254,8 +254,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( cl_ext::CommandCopyBufferName, &clCommandCopyBufferKHR)); CL_RETURN_ON_FAILURE(clCommandCopyBufferKHR( - hCommandBuffer->CLCommandBuffer, nullptr, nullptr, hSrcMem->get(), - hDstMem->get(), srcOffset, dstOffset, size, numSyncPointsInWaitList, + hCommandBuffer->CLCommandBuffer, nullptr, nullptr, hSrcMem->CLMemory, + hDstMem->CLMemory, srcOffset, dstOffset, size, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, nullptr)); return UR_RESULT_SUCCESS; @@ -283,7 +283,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( size_t OpenCLDstRect[3]{dstOrigin.x, dstOrigin.y, dstOrigin.z}; size_t OpenCLRegion[3]{region.width, region.height, region.depth}; - cl_context CLContext = hCommandBuffer->hContext->get(); + cl_context CLContext = hCommandBuffer->hContext->CLContext; cl_ext::clCommandCopyBufferRectKHR_fn clCommandCopyBufferRectKHR = nullptr; UR_RETURN_ON_FAILURE( cl_ext::getExtFuncFromContext( @@ -291,8 +291,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( cl_ext::CommandCopyBufferRectName, &clCommandCopyBufferRectKHR)); CL_RETURN_ON_FAILURE(clCommandCopyBufferRectKHR( - hCommandBuffer->CLCommandBuffer, nullptr, nullptr, hSrcMem->get(), - hDstMem->get(), OpenCLOriginRect, OpenCLDstRect, OpenCLRegion, + hCommandBuffer->CLCommandBuffer, nullptr, nullptr, hSrcMem->CLMemory, + hDstMem->CLMemory, OpenCLOriginRect, OpenCLDstRect, OpenCLRegion, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, nullptr)); @@ -386,7 +386,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( [[maybe_unused]] ur_event_handle_t *phEvent, [[maybe_unused]] ur_exp_command_buffer_command_handle_t *phCommand) { - cl_context CLContext = hCommandBuffer->hContext->get(); + cl_context CLContext = hCommandBuffer->hContext->CLContext; cl_ext::clCommandFillBufferKHR_fn clCommandFillBufferKHR = nullptr; UR_RETURN_ON_FAILURE( cl_ext::getExtFuncFromContext( @@ -394,7 +394,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( cl_ext::CommandFillBufferName, &clCommandFillBufferKHR)); CL_RETURN_ON_FAILURE(clCommandFillBufferKHR( - hCommandBuffer->CLCommandBuffer, nullptr, nullptr, hBuffer->get(), + hCommandBuffer->CLCommandBuffer, nullptr, nullptr, hBuffer->CLMemory, pPattern, patternSize, offset, size, numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, nullptr)); @@ -452,7 +452,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - cl_context CLContext = hCommandBuffer->hContext->get(); + cl_context CLContext = hCommandBuffer->hContext->CLContext; cl_ext::clEnqueueCommandBufferKHR_fn clEnqueueCommandBufferKHR = nullptr; UR_RETURN_ON_FAILURE( cl_ext::getExtFuncFromContext( @@ -463,9 +463,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( cl_event Event; std::vector CLWaitEvents(numEventsInWaitList); for (uint32_t i = 0; i < numEventsInWaitList; i++) { - CLWaitEvents[i] = phEventWaitList[i]->get(); + CLWaitEvents[i] = phEventWaitList[i]->CLEvent; } - cl_command_queue CLQueue = hQueue->get(); + cl_command_queue CLQueue = hQueue->CLQueue; CL_RETURN_ON_FAILURE(clEnqueueCommandBufferKHR( NumberOfQueues, &CLQueue, hCommandBuffer->CLCommandBuffer, numEventsInWaitList, CLWaitEvents.data(), &Event)); @@ -532,7 +532,7 @@ void updateKernelArgs(std::vector &CLArgs, for (uint32_t i = 0; i < NumMemobjArgs; i++) { const ur_exp_command_buffer_update_memobj_arg_desc_t &URMemObjArg = ArgMemobjList[i]; - cl_mem arg_value = URMemObjArg.hNewMemObjArg->get(); + cl_mem arg_value = URMemObjArg.hNewMemObjArg->CLMemory; cl_mutable_dispatch_arg_khr CLArg{ URMemObjArg.argIndex, // arg_index sizeof(cl_mem), // arg_size @@ -567,7 +567,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( } ur_exp_command_buffer_handle_t hCommandBuffer = hCommand->hCommandBuffer; - cl_context CLContext = hCommandBuffer->hContext->get(); + cl_context CLContext = hCommandBuffer->hContext->CLContext; cl_ext::clUpdateMutableCommandsKHR_fn clUpdateMutableCommandsKHR = nullptr; UR_RETURN_ON_FAILURE( diff --git a/source/adapters/opencl/context.cpp b/source/adapters/opencl/context.cpp index 27bb3cf7ba..8a019c84fc 100644 --- a/source/adapters/opencl/context.cpp +++ b/source/adapters/opencl/context.cpp @@ -14,6 +14,53 @@ #include #include +ur_result_t +ur_context_handle_t_::makeWithNative(native_type Ctx, uint32_t DevCount, + const ur_device_handle_t *phDevices, + ur_context_handle_t &Context) { + try { + uint32_t CLDeviceCount; + CL_RETURN_ON_FAILURE(clGetContextInfo(Ctx, CL_CONTEXT_NUM_DEVICES, + sizeof(CLDeviceCount), &CLDeviceCount, + nullptr)); + std::vector CLDevices(CLDeviceCount); + CL_RETURN_ON_FAILURE(clGetContextInfo( + Ctx, CL_CONTEXT_DEVICES, sizeof(CLDevices), CLDevices.data(), nullptr)); + std::vector URDevices; + if (DevCount) { + if (DevCount != CLDeviceCount) { + return UR_RESULT_ERROR_INVALID_CONTEXT; + } + for (uint32_t i = 0; i < DevCount; i++) { + if (phDevices[i]->CLDevice != CLDevices[i]) { + return UR_RESULT_ERROR_INVALID_CONTEXT; + } + URDevices.push_back(phDevices[i]); + } + } else { + DevCount = CLDeviceCount; + for (uint32_t i = 0; i < CLDeviceCount; i++) { + ur_device_handle_t UrDevice = nullptr; + ur_native_handle_t hNativeHandle = + reinterpret_cast(CLDevices[i]); + UR_RETURN_ON_FAILURE(urDeviceCreateWithNativeHandle( + hNativeHandle, nullptr, nullptr, &UrDevice)); + URDevices.push_back(UrDevice); + } + } + + auto URContext = + std::make_unique(Ctx, DevCount, URDevices.data()); + Context = URContext.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + + return UR_RESULT_SUCCESS; +} + UR_APIEXPORT ur_result_t UR_APICALL urContextCreate( uint32_t DeviceCount, const ur_device_handle_t *phDevices, const ur_context_properties_t *, ur_context_handle_t *phContext) { @@ -21,7 +68,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextCreate( cl_int Ret; std::vector CLDevices(DeviceCount); for (size_t i = 0; i < DeviceCount; i++) { - CLDevices[i] = phDevices[i]->get(); + CLDevices[i] = phDevices[i]->CLDevice; } try { @@ -82,7 +129,7 @@ urContextRelease(ur_context_handle_t hContext) { // should drastically reduce the chances of the pathological case described // in the comments in common.hpp. static std::mutex contextReleaseMutex; - auto clContext = hContext->get(); + auto clContext = hContext->CLContext; std::lock_guard lock(contextReleaseMutex); size_t refCount = hContext->getReferenceCount(); @@ -108,7 +155,7 @@ urContextRetain(ur_context_handle_t hContext) { UR_APIEXPORT ur_result_t UR_APICALL urContextGetNativeHandle( ur_context_handle_t hContext, ur_native_handle_t *phNativeContext) { - *phNativeContext = reinterpret_cast(hContext->get()); + *phNativeContext = reinterpret_cast(hContext->CLContext); return UR_RESULT_SUCCESS; } @@ -172,8 +219,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextSetExtendedDeleter( auto *C = static_cast(pUserData); C->execute(); }; - CL_RETURN_ON_FAILURE( - clSetContextDestructorCallback(hContext->get(), ClCallback, Callback)); + CL_RETURN_ON_FAILURE(clSetContextDestructorCallback(hContext->CLContext, + ClCallback, Callback)); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/context.hpp b/source/adapters/opencl/context.hpp index 4b60ca5afe..a71f6adc05 100644 --- a/source/adapters/opencl/context.hpp +++ b/source/adapters/opencl/context.hpp @@ -16,7 +16,7 @@ struct ur_context_handle_t_ { using native_type = cl_context; - native_type Context; + native_type CLContext; std::vector Devices; uint32_t DeviceCount; std::atomic RefCount = 0; @@ -24,7 +24,7 @@ struct ur_context_handle_t_ { ur_context_handle_t_(native_type Ctx, uint32_t DevCount, const ur_device_handle_t *phDevices) - : Context(Ctx), DeviceCount(DevCount) { + : CLContext(Ctx), DeviceCount(DevCount) { for (uint32_t i = 0; i < DeviceCount; i++) { Devices.emplace_back(phDevices[i]); urDeviceRetain(phDevices[i]); @@ -40,61 +40,13 @@ struct ur_context_handle_t_ { static ur_result_t makeWithNative(native_type Ctx, uint32_t DevCount, const ur_device_handle_t *phDevices, - ur_context_handle_t &Context) { - try { - uint32_t CLDeviceCount; - CL_RETURN_ON_FAILURE(clGetContextInfo(Ctx, CL_CONTEXT_NUM_DEVICES, - sizeof(CLDeviceCount), - &CLDeviceCount, nullptr)); - std::vector CLDevices(CLDeviceCount); - CL_RETURN_ON_FAILURE(clGetContextInfo(Ctx, CL_CONTEXT_DEVICES, - sizeof(CLDevices), CLDevices.data(), - nullptr)); - std::vector URDevices; - if (DevCount) { - if (DevCount != CLDeviceCount) { - return UR_RESULT_ERROR_INVALID_CONTEXT; - } - for (uint32_t i = 0; i < DevCount; i++) { - if (phDevices[i]->get() != CLDevices[i]) { - return UR_RESULT_ERROR_INVALID_CONTEXT; - } - URDevices.push_back(phDevices[i]); - } - } else { - DevCount = CLDeviceCount; - for (uint32_t i = 0; i < CLDeviceCount; i++) { - ur_device_handle_t UrDevice = nullptr; - ur_native_handle_t hNativeHandle = - reinterpret_cast(CLDevices[i]); - UR_RETURN_ON_FAILURE(urDeviceCreateWithNativeHandle( - hNativeHandle, nullptr, nullptr, &UrDevice)); - URDevices.push_back(UrDevice); - } - } - - auto URContext = std::make_unique(Ctx, DevCount, - URDevices.data()); - Context = URContext.release(); - } catch (std::bad_alloc &) { - return UR_RESULT_ERROR_OUT_OF_RESOURCES; - } catch (...) { - return UR_RESULT_ERROR_UNKNOWN; - } - - return UR_RESULT_SUCCESS; - } - + ur_context_handle_t &Context); ~ur_context_handle_t_() { for (uint32_t i = 0; i < DeviceCount; i++) { urDeviceRelease(Devices[i]); } if (IsNativeHandleOwned) { - clReleaseContext(Context); + clReleaseContext(CLContext); } } - - native_type get() { return Context; } - - const std::vector &getDevices() { return Devices; } }; diff --git a/source/adapters/opencl/device.cpp b/source/adapters/opencl/device.cpp index 893c17572b..dac0c9bdfa 100644 --- a/source/adapters/opencl/device.cpp +++ b/source/adapters/opencl/device.cpp @@ -46,8 +46,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet(ur_platform_handle_t hPlatform, uint32_t AllDevicesNum = hPlatform->Devices.size(); uint32_t DeviceNumIter = 0; for (uint32_t i = 0; i < AllDevicesNum; i++) { - cl_device_type DeviceType = hPlatform->Devices[i]->Type; - if (DeviceType == Type || Type == CL_DEVICE_TYPE_ALL) { + cl_device_type DevTy = hPlatform->Devices[i]->Type; + if (DevTy == Type || Type == CL_DEVICE_TYPE_ALL) { if (phDevices) { phDevices[DeviceNumIter] = hPlatform->Devices[i].get(); } @@ -321,7 +321,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, } cl_device_pci_bus_info_khr PciInfo = {}; - CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->get(), + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->CLDevice, CL_DEVICE_PCI_BUS_INFO_KHR, sizeof(PciInfo), &PciInfo, nullptr)); return ReturnValue(PciInfo.pci_device); @@ -338,11 +338,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_SUPPORTED_PARTITIONS: { size_t CLSize; CL_RETURN_ON_FAILURE( - clGetDeviceInfo(hDevice->get(), CLPropName, 0, nullptr, &CLSize)); + clGetDeviceInfo(hDevice->CLDevice, CLPropName, 0, nullptr, &CLSize)); const size_t NProperties = CLSize / sizeof(cl_device_partition_property); std::vector CLValue(NProperties); - CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->get(), CLPropName, CLSize, + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->CLDevice, CLPropName, CLSize, CLValue.data(), nullptr)); /* The OpenCL implementation returns a value of 0 if no properties are @@ -365,7 +365,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, size_t CLSize; CL_RETURN_ON_FAILURE( - clGetDeviceInfo(hDevice->get(), CLPropName, 0, nullptr, &CLSize)); + clGetDeviceInfo(hDevice->CLDevice, CLPropName, 0, nullptr, &CLSize)); const size_t NProperties = CLSize / sizeof(cl_device_partition_property); /* The OpenCL implementation returns either a size of 0 or a value of 0 if @@ -379,8 +379,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, auto CLValue = reinterpret_cast(alloca(CLSize)); - CL_RETURN_ON_FAILURE( - clGetDeviceInfo(hDevice->get(), CLPropName, CLSize, CLValue, nullptr)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->CLDevice, CLPropName, CLSize, + CLValue, nullptr)); std::vector URValue(NProperties - 1); @@ -437,7 +437,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, if (DevVer >= oclv::V2_1) { cl_uint CLValue; - CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->get(), + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->CLDevice, CL_DEVICE_MAX_NUM_SUB_GROUPS, sizeof(cl_uint), &CLValue, nullptr)); @@ -471,7 +471,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, } cl_device_fp_config CLValue; - CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->get(), CLPropName, + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->CLDevice, CLPropName, sizeof(cl_device_fp_config), &CLValue, nullptr)); @@ -493,7 +493,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, /* For OpenCL >=3.0, the query should be implemented */ cl_device_atomic_capabilities CLCapabilities; CL_RETURN_ON_FAILURE(clGetDeviceInfo( - hDevice->get(), CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES, + hDevice->CLDevice, CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES, sizeof(cl_device_atomic_capabilities), &CLCapabilities, nullptr)); /* Mask operation to only consider atomic_memory_order* capabilities */ @@ -545,7 +545,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, cl_device_atomic_capabilities CLCapabilities; if (DevVer >= oclv::V3_0) { CL_RETURN_ON_FAILURE(clGetDeviceInfo( - hDevice->get(), CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES, + hDevice->CLDevice, CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES, sizeof(cl_device_atomic_capabilities), &CLCapabilities, nullptr)); assert((CLCapabilities & CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP) && @@ -597,7 +597,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, cl_device_atomic_capabilities CLCapabilities; if (DevVer >= oclv::V3_0) { CL_RETURN_ON_FAILURE(clGetDeviceInfo( - hDevice->get(), CL_DEVICE_ATOMIC_FENCE_CAPABILITIES, + hDevice->CLDevice, CL_DEVICE_ATOMIC_FENCE_CAPABILITIES, sizeof(cl_device_atomic_capabilities), &CLCapabilities, nullptr)); assert((CLCapabilities & CL_DEVICE_ATOMIC_ORDER_RELAXED) && @@ -664,7 +664,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, if (DevVer >= oclv::V3_0) { cl_device_atomic_capabilities CLCapabilities; CL_RETURN_ON_FAILURE(clGetDeviceInfo( - hDevice->get(), CL_DEVICE_ATOMIC_FENCE_CAPABILITIES, + hDevice->CLDevice, CL_DEVICE_ATOMIC_FENCE_CAPABILITIES, sizeof(cl_device_atomic_capabilities), &CLCapabilities, nullptr)); assert((CLCapabilities & CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP) && "Violates minimum mandated guarantee"); @@ -683,7 +683,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, // not return an error if the query is unsuccessful as this is expected // of an OpenCL 1.2 driver. cl_device_atomic_capabilities CLCapabilities; - if (CL_SUCCESS == clGetDeviceInfo(hDevice->get(), + if (CL_SUCCESS == clGetDeviceInfo(hDevice->CLDevice, CL_DEVICE_ATOMIC_FENCE_CAPABILITIES, sizeof(cl_device_atomic_capabilities), &CLCapabilities, nullptr)) { @@ -715,7 +715,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_BUILD_ON_SUBDEVICE: { cl_device_type DevType = CL_DEVICE_TYPE_DEFAULT; - CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->get(), CL_DEVICE_TYPE, + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->CLDevice, CL_DEVICE_TYPE, sizeof(cl_device_type), &DevType, nullptr)); @@ -731,12 +731,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_ESIMD_SUPPORT: { bool Supported = false; cl_device_type DevType = CL_DEVICE_TYPE_DEFAULT; - CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->get(), CL_DEVICE_TYPE, + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->CLDevice, CL_DEVICE_TYPE, sizeof(cl_device_type), &DevType, nullptr)); cl_uint VendorID = 0; - CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->get(), CL_DEVICE_VENDOR_ID, + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->CLDevice, CL_DEVICE_VENDOR_ID, sizeof(VendorID), &VendorID, nullptr)); /* ESIMD is only supported by Intel GPUs. */ @@ -777,7 +777,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, cl_bitfield CLValue = 0; CL_RETURN_ON_FAILURE(clGetDeviceInfo( - hDevice->get(), CLPropName, sizeof(cl_bitfield), &CLValue, nullptr)); + hDevice->CLDevice, CLPropName, sizeof(cl_bitfield), &CLValue, nullptr)); /* We can just static_cast the output because OpenCL and UR bitfields * map 1 to 1 for these properties. cl_bitfield is uint64_t and ur_flags_t @@ -796,8 +796,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, {"cl_intel_unified_shared_memory"}, Supported)); if (Supported) { cl_bitfield CLValue = 0; - CL_RETURN_ON_FAILURE(clGetDeviceInfo( - hDevice->get(), CLPropName, sizeof(cl_bitfield), &CLValue, nullptr)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->CLDevice, CLPropName, + sizeof(cl_bitfield), &CLValue, + nullptr)); return ReturnValue(static_cast(CLValue)); } else { return ReturnValue(0); @@ -816,7 +817,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, * UR type: ur_bool_t */ cl_bool CLValue; - CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->get(), CLPropName, + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->CLDevice, CLPropName, sizeof(cl_bool), &CLValue, nullptr)); /* cl_bool is uint32_t and ur_bool_t is bool */ @@ -885,8 +886,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, * | ur_device_handle_t | cl_device_id | 8 | */ - CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->get(), CLPropName, propSize, - pPropValue, pPropSizeRet)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->CLDevice, CLPropName, + propSize, pPropValue, pPropSizeRet)); return UR_RESULT_SUCCESS; } @@ -897,8 +898,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, if (!Supported) { return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; } - CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->get(), CLPropName, propSize, - pPropValue, pPropSizeRet)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->CLDevice, CLPropName, + propSize, pPropValue, pPropSizeRet)); return UR_RESULT_SUCCESS; } @@ -916,17 +917,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, // Have to convert size_t to uint32_t size_t SubGroupSizesSize = 0; - CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->get(), CLPropName, 0, nullptr, - &SubGroupSizesSize)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->CLDevice, CLPropName, 0, + nullptr, &SubGroupSizesSize)); std::vector SubGroupSizes(SubGroupSizesSize / sizeof(size_t)); - CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->get(), CLPropName, + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->CLDevice, CLPropName, SubGroupSizesSize, SubGroupSizes.data(), nullptr)); return ReturnValue.template operator()(SubGroupSizes.data(), SubGroupSizes.size()); } case UR_DEVICE_INFO_EXTENSIONS: { - cl_device_id Dev = hDevice->get(); + cl_device_id Dev = hDevice->CLDevice; size_t ExtSize = 0; CL_RETURN_ON_FAILURE( clGetDeviceInfo(Dev, CL_DEVICE_EXTENSIONS, 0, nullptr, &ExtSize)); @@ -953,7 +954,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, } static_assert(CL_UUID_SIZE_KHR == 16); std::array UUID{}; - CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->get(), CL_DEVICE_UUID_KHR, + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->CLDevice, CL_DEVICE_UUID_KHR, UUID.size(), UUID.data(), nullptr)); return ReturnValue(UUID); } @@ -994,7 +995,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, } case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: { - cl_device_id Dev = hDevice->get(); + cl_device_id Dev = hDevice->CLDevice; size_t ExtSize = 0; CL_RETURN_ON_FAILURE( clGetDeviceInfo(Dev, CL_DEVICE_EXTENSIONS, 0, nullptr, &ExtSize)); @@ -1008,7 +1009,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, std::string::npos); } case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP: { - cl_device_id Dev = hDevice->get(); + cl_device_id Dev = hDevice->CLDevice; ur_device_command_buffer_update_capability_flags_t UpdateCapabilities = 0; CL_RETURN_ON_FAILURE( getDeviceCommandBufferUpdateCapabilities(Dev, UpdateCapabilities)); @@ -1064,8 +1065,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDevicePartition( CLProperties[CLProperties.size() - 1] = 0; cl_uint CLNumDevicesRet; - CL_RETURN_ON_FAILURE(clCreateSubDevices(hDevice->get(), CLProperties.data(), - 0, nullptr, &CLNumDevicesRet)); + CL_RETURN_ON_FAILURE(clCreateSubDevices( + hDevice->CLDevice, CLProperties.data(), 0, nullptr, &CLNumDevicesRet)); if (pNumDevicesRet) { *pNumDevicesRet = CLNumDevicesRet; @@ -1075,9 +1076,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urDevicePartition( * function shall only retrieve that number of sub-devices. */ if (phSubDevices) { std::vector CLSubDevices(CLNumDevicesRet); - CL_RETURN_ON_FAILURE(clCreateSubDevices(hDevice->get(), CLProperties.data(), - CLNumDevicesRet, - CLSubDevices.data(), nullptr)); + CL_RETURN_ON_FAILURE( + clCreateSubDevices(hDevice->CLDevice, CLProperties.data(), + CLNumDevicesRet, CLSubDevices.data(), nullptr)); for (uint32_t i = 0; i < std::min(CLNumDevicesRet, NumDevices); i++) { try { auto URSubDevice = std::make_unique( @@ -1125,21 +1126,21 @@ urDeviceRelease(ur_device_handle_t hDevice) { UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetNativeHandle( ur_device_handle_t hDevice, ur_native_handle_t *phNativeDevice) { - *phNativeDevice = reinterpret_cast(hDevice->get()); + *phNativeDevice = reinterpret_cast(hDevice->CLDevice); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle( - ur_native_handle_t hNativeDevice, ur_adapter_handle_t, + ur_native_handle_t hNativeDevice, ur_adapter_handle_t hAdapter, const ur_device_native_properties_t *pProperties, ur_device_handle_t *phDevice) { cl_device_id NativeHandle = reinterpret_cast(hNativeDevice); uint32_t NumPlatforms = 0; - UR_RETURN_ON_FAILURE(urPlatformGet(nullptr, 0, 0, nullptr, &NumPlatforms)); + UR_RETURN_ON_FAILURE(urPlatformGet(&hAdapter, 1, 0, nullptr, &NumPlatforms)); std::vector Platforms(NumPlatforms); UR_RETURN_ON_FAILURE( - urPlatformGet(nullptr, 0, NumPlatforms, Platforms.data(), nullptr)); + urPlatformGet(&hAdapter, 1, NumPlatforms, Platforms.data(), nullptr)); for (uint32_t i = 0; i < NumPlatforms; i++) { uint32_t NumDevices = 0; @@ -1150,7 +1151,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle( NumDevices, Devices.data(), nullptr)); for (auto &Device : Devices) { - if (Device->get() == NativeHandle) { + if (Device->CLDevice == NativeHandle) { *phDevice = Device; (*phDevice)->IsNativeHandleOwned = pProperties ? pProperties->isNativeHandleOwned : false; @@ -1165,7 +1166,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetGlobalTimestamps( ur_device_handle_t hDevice, uint64_t *pDeviceTimestamp, uint64_t *pHostTimestamp) { oclv::OpenCLVersion DevVer, PlatVer; - cl_device_id DeviceId = hDevice->get(); + cl_device_id DeviceId = hDevice->CLDevice; // TODO: Cache OpenCL version for each device and platform auto RetErr = hDevice->getDeviceVersion(DevVer); @@ -1211,7 +1212,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceSelectBinary( // Get the type of the device cl_device_type DeviceType; constexpr uint32_t InvalidInd = std::numeric_limits::max(); - cl_int RetErr = clGetDeviceInfo(hDevice->get(), CL_DEVICE_TYPE, + cl_int RetErr = clGetDeviceInfo(hDevice->CLDevice, CL_DEVICE_TYPE, sizeof(cl_device_type), &DeviceType, nullptr); if (RetErr != CL_SUCCESS) { *pSelectedBinary = InvalidInd; diff --git a/source/adapters/opencl/device.hpp b/source/adapters/opencl/device.hpp index 8527f9b518..afd8a8b96d 100644 --- a/source/adapters/opencl/device.hpp +++ b/source/adapters/opencl/device.hpp @@ -14,7 +14,7 @@ struct ur_device_handle_t_ { using native_type = cl_device_id; - native_type Device; + native_type CLDevice; ur_platform_handle_t Platform; cl_device_type Type = 0; ur_device_handle_t ParentDevice = nullptr; @@ -23,19 +23,19 @@ struct ur_device_handle_t_ { ur_device_handle_t_(native_type Dev, ur_platform_handle_t Plat, ur_device_handle_t Parent) - : Device(Dev), Platform(Plat), ParentDevice(Parent) { + : CLDevice(Dev), Platform(Plat), ParentDevice(Parent) { RefCount = 1; if (Parent) { Type = Parent->Type; } else { - clGetDeviceInfo(Device, CL_DEVICE_TYPE, sizeof(cl_device_type), &Type, + clGetDeviceInfo(CLDevice, CL_DEVICE_TYPE, sizeof(cl_device_type), &Type, nullptr); } } ~ur_device_handle_t_() { if (ParentDevice && IsNativeHandleOwned) { - clReleaseDevice(Device); + clReleaseDevice(CLDevice); } } @@ -45,16 +45,14 @@ struct ur_device_handle_t_ { uint32_t getReferenceCount() const noexcept { return RefCount; } - native_type get() { return Device; } - ur_result_t getDeviceVersion(oclv::OpenCLVersion &Version) { size_t DevVerSize = 0; CL_RETURN_ON_FAILURE( - clGetDeviceInfo(Device, CL_DEVICE_VERSION, 0, nullptr, &DevVerSize)); + clGetDeviceInfo(CLDevice, CL_DEVICE_VERSION, 0, nullptr, &DevVerSize)); std::string DevVer(DevVerSize, '\0'); - CL_RETURN_ON_FAILURE(clGetDeviceInfo(Device, CL_DEVICE_VERSION, DevVerSize, - DevVer.data(), nullptr)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(CLDevice, CL_DEVICE_VERSION, + DevVerSize, DevVer.data(), nullptr)); Version = oclv::OpenCLVersion(DevVer); if (!Version.isValid()) { @@ -67,9 +65,9 @@ struct ur_device_handle_t_ { bool isIntelFPGAEmuDevice() { size_t NameSize = 0; CL_RETURN_ON_FAILURE( - clGetDeviceInfo(Device, CL_DEVICE_NAME, 0, nullptr, &NameSize)); + clGetDeviceInfo(CLDevice, CL_DEVICE_NAME, 0, nullptr, &NameSize)); std::string NameStr(NameSize, '\0'); - CL_RETURN_ON_FAILURE(clGetDeviceInfo(Device, CL_DEVICE_NAME, NameSize, + CL_RETURN_ON_FAILURE(clGetDeviceInfo(CLDevice, CL_DEVICE_NAME, NameSize, NameStr.data(), nullptr)); return NameStr.find("Intel(R) FPGA Emulation Device") != std::string::npos; @@ -79,12 +77,12 @@ struct ur_device_handle_t_ { bool &Supported) { size_t ExtSize = 0; CL_RETURN_ON_FAILURE( - clGetDeviceInfo(Device, CL_DEVICE_EXTENSIONS, 0, nullptr, &ExtSize)); + clGetDeviceInfo(CLDevice, CL_DEVICE_EXTENSIONS, 0, nullptr, &ExtSize)); std::string ExtStr(ExtSize, '\0'); - CL_RETURN_ON_FAILURE(clGetDeviceInfo(Device, CL_DEVICE_EXTENSIONS, ExtSize, - ExtStr.data(), nullptr)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(CLDevice, CL_DEVICE_EXTENSIONS, + ExtSize, ExtStr.data(), nullptr)); Supported = true; for (const std::string &Ext : Exts) { diff --git a/source/adapters/opencl/enqueue.cpp b/source/adapters/opencl/enqueue.cpp index 02c451e249..b4e66babcd 100644 --- a/source/adapters/opencl/enqueue.cpp +++ b/source/adapters/opencl/enqueue.cpp @@ -39,10 +39,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( cl_event Event; std::vector CLWaitEvents(numEventsInWaitList); for (uint32_t i = 0; i < numEventsInWaitList; i++) { - CLWaitEvents[i] = phEventWaitList[i]->get(); + CLWaitEvents[i] = phEventWaitList[i]->CLEvent; } CL_RETURN_ON_FAILURE( - clEnqueueNDRangeKernel(hQueue->get(), hKernel->get(), workDim, + clEnqueueNDRangeKernel(hQueue->CLQueue, hKernel->CLKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { @@ -75,10 +75,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWait( cl_event Event; std::vector CLWaitEvents(numEventsInWaitList); for (uint32_t i = 0; i < numEventsInWaitList; i++) { - CLWaitEvents[i] = phEventWaitList[i]->get(); + CLWaitEvents[i] = phEventWaitList[i]->CLEvent; } CL_RETURN_ON_FAILURE(clEnqueueMarkerWithWaitList( - hQueue->get(), numEventsInWaitList, CLWaitEvents.data(), &Event)); + hQueue->CLQueue, numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { try { auto UREvent = @@ -99,10 +99,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( cl_event Event; std::vector CLWaitEvents(numEventsInWaitList); for (uint32_t i = 0; i < numEventsInWaitList; i++) { - CLWaitEvents[i] = phEventWaitList[i]->get(); + CLWaitEvents[i] = phEventWaitList[i]->CLEvent; } CL_RETURN_ON_FAILURE(clEnqueueBarrierWithWaitList( - hQueue->get(), numEventsInWaitList, CLWaitEvents.data(), &Event)); + hQueue->CLQueue, numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { try { auto UREvent = @@ -124,10 +124,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( cl_event Event; std::vector CLWaitEvents(numEventsInWaitList); for (uint32_t i = 0; i < numEventsInWaitList; i++) { - CLWaitEvents[i] = phEventWaitList[i]->get(); + CLWaitEvents[i] = phEventWaitList[i]->CLEvent; } CL_RETURN_ON_FAILURE(clEnqueueReadBuffer( - hQueue->get(), hBuffer->get(), blockingRead, offset, size, pDst, + hQueue->CLQueue, hBuffer->CLMemory, blockingRead, offset, size, pDst, numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { try { @@ -150,10 +150,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( cl_event Event; std::vector CLWaitEvents(numEventsInWaitList); for (uint32_t i = 0; i < numEventsInWaitList; i++) { - CLWaitEvents[i] = phEventWaitList[i]->get(); + CLWaitEvents[i] = phEventWaitList[i]->CLEvent; } CL_RETURN_ON_FAILURE(clEnqueueWriteBuffer( - hQueue->get(), hBuffer->get(), blockingWrite, offset, size, pSrc, + hQueue->CLQueue, hBuffer->CLMemory, blockingWrite, offset, size, pSrc, numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { try { @@ -183,12 +183,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( cl_event Event; std::vector CLWaitEvents(numEventsInWaitList); for (uint32_t i = 0; i < numEventsInWaitList; i++) { - CLWaitEvents[i] = phEventWaitList[i]->get(); + CLWaitEvents[i] = phEventWaitList[i]->CLEvent; } CL_RETURN_ON_FAILURE(clEnqueueReadBufferRect( - hQueue->get(), hBuffer->get(), blockingRead, BufferOrigin, HostOrigin, - Region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, - pDst, numEventsInWaitList, CLWaitEvents.data(), &Event)); + hQueue->CLQueue, hBuffer->CLMemory, blockingRead, BufferOrigin, + HostOrigin, Region, bufferRowPitch, bufferSlicePitch, hostRowPitch, + hostSlicePitch, pDst, numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { try { auto UREvent = @@ -217,12 +217,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( cl_event Event; std::vector CLWaitEvents(numEventsInWaitList); for (uint32_t i = 0; i < numEventsInWaitList; i++) { - CLWaitEvents[i] = phEventWaitList[i]->get(); + CLWaitEvents[i] = phEventWaitList[i]->CLEvent; } CL_RETURN_ON_FAILURE(clEnqueueWriteBufferRect( - hQueue->get(), hBuffer->get(), blockingWrite, BufferOrigin, HostOrigin, - Region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, - pSrc, numEventsInWaitList, CLWaitEvents.data(), &Event)); + hQueue->CLQueue, hBuffer->CLMemory, blockingWrite, BufferOrigin, + HostOrigin, Region, bufferRowPitch, bufferSlicePitch, hostRowPitch, + hostSlicePitch, pSrc, numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { try { auto UREvent = @@ -245,11 +245,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopy( cl_event Event; std::vector CLWaitEvents(numEventsInWaitList); for (uint32_t i = 0; i < numEventsInWaitList; i++) { - CLWaitEvents[i] = phEventWaitList[i]->get(); + CLWaitEvents[i] = phEventWaitList[i]->CLEvent; } CL_RETURN_ON_FAILURE(clEnqueueCopyBuffer( - hQueue->get(), hBufferSrc->get(), hBufferDst->get(), srcOffset, dstOffset, - size, numEventsInWaitList, CLWaitEvents.data(), &Event)); + hQueue->CLQueue, hBufferSrc->CLMemory, hBufferDst->CLMemory, srcOffset, + dstOffset, size, numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { try { auto UREvent = @@ -277,11 +277,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( cl_event Event; std::vector CLWaitEvents(numEventsInWaitList); for (uint32_t i = 0; i < numEventsInWaitList; i++) { - CLWaitEvents[i] = phEventWaitList[i]->get(); + CLWaitEvents[i] = phEventWaitList[i]->CLEvent; } CL_RETURN_ON_FAILURE(clEnqueueCopyBufferRect( - hQueue->get(), hBufferSrc->get(), hBufferDst->get(), SrcOrigin, DstOrigin, - Region, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, + hQueue->CLQueue, hBufferSrc->CLMemory, hBufferDst->CLMemory, SrcOrigin, + DstOrigin, Region, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { try { @@ -308,10 +308,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( cl_event Event; std::vector CLWaitEvents(numEventsInWaitList); for (uint32_t i = 0; i < numEventsInWaitList; i++) { - CLWaitEvents[i] = phEventWaitList[i]->get(); + CLWaitEvents[i] = phEventWaitList[i]->CLEvent; } CL_RETURN_ON_FAILURE(clEnqueueFillBuffer( - hQueue->get(), hBuffer->get(), pPattern, patternSize, offset, size, + hQueue->CLQueue, hBuffer->CLMemory, pPattern, patternSize, offset, size, numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { try { @@ -337,10 +337,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( cl_event WriteEvent = nullptr; std::vector CLWaitEvents(numEventsInWaitList); for (uint32_t i = 0; i < numEventsInWaitList; i++) { - CLWaitEvents[i] = phEventWaitList[i]->get(); + CLWaitEvents[i] = phEventWaitList[i]->CLEvent; } auto ClErr = clEnqueueWriteBuffer( - hQueue->get(), hBuffer->get(), false, offset, size, HostBuffer, + hQueue->CLQueue, hBuffer->CLMemory, false, offset, size, HostBuffer, numEventsInWaitList, CLWaitEvents.data(), &WriteEvent); if (ClErr != CL_SUCCESS) { delete[] HostBuffer; @@ -388,10 +388,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( cl_event Event; std::vector CLWaitEvents(numEventsInWaitList); for (uint32_t i = 0; i < numEventsInWaitList; i++) { - CLWaitEvents[i] = phEventWaitList[i]->get(); + CLWaitEvents[i] = phEventWaitList[i]->CLEvent; } CL_RETURN_ON_FAILURE(clEnqueueReadImage( - hQueue->get(), hImage->get(), blockingRead, Origin, Region, rowPitch, + hQueue->CLQueue, hImage->CLMemory, blockingRead, Origin, Region, rowPitch, slicePitch, pDst, numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { try { @@ -417,11 +417,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( cl_event Event; std::vector CLWaitEvents(numEventsInWaitList); for (uint32_t i = 0; i < numEventsInWaitList; i++) { - CLWaitEvents[i] = phEventWaitList[i]->get(); + CLWaitEvents[i] = phEventWaitList[i]->CLEvent; } - CL_RETURN_ON_FAILURE(clEnqueueWriteImage( - hQueue->get(), hImage->get(), blockingWrite, Origin, Region, rowPitch, - slicePitch, pSrc, numEventsInWaitList, CLWaitEvents.data(), &Event)); + CL_RETURN_ON_FAILURE( + clEnqueueWriteImage(hQueue->CLQueue, hImage->CLMemory, blockingWrite, + Origin, Region, rowPitch, slicePitch, pSrc, + numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { try { auto UREvent = @@ -448,11 +449,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy( cl_event Event; std::vector CLWaitEvents(numEventsInWaitList); for (uint32_t i = 0; i < numEventsInWaitList; i++) { - CLWaitEvents[i] = phEventWaitList[i]->get(); + CLWaitEvents[i] = phEventWaitList[i]->CLEvent; } CL_RETURN_ON_FAILURE(clEnqueueCopyImage( - hQueue->get(), hImageSrc->get(), hImageDst->get(), SrcOrigin, DstOrigin, - Region, numEventsInWaitList, CLWaitEvents.data(), &Event)); + hQueue->CLQueue, hImageSrc->CLMemory, hImageDst->CLMemory, SrcOrigin, + DstOrigin, Region, numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { try { auto UREvent = @@ -475,13 +476,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap( cl_event Event; std::vector CLWaitEvents(numEventsInWaitList); for (uint32_t i = 0; i < numEventsInWaitList; i++) { - CLWaitEvents[i] = phEventWaitList[i]->get(); + CLWaitEvents[i] = phEventWaitList[i]->CLEvent; } cl_int Err; - *ppRetMap = clEnqueueMapBuffer(hQueue->get(), hBuffer->get(), blockingMap, - convertURMapFlagsToCL(mapFlags), offset, size, - numEventsInWaitList, CLWaitEvents.data(), - &Event, &Err); + *ppRetMap = clEnqueueMapBuffer(hQueue->CLQueue, hBuffer->CLMemory, + blockingMap, convertURMapFlagsToCL(mapFlags), + offset, size, numEventsInWaitList, + CLWaitEvents.data(), &Event, &Err); if (phEvent) { try { auto UREvent = @@ -503,9 +504,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap( cl_event Event; std::vector CLWaitEvents(numEventsInWaitList); for (uint32_t i = 0; i < numEventsInWaitList; i++) { - CLWaitEvents[i] = phEventWaitList[i]->get(); + CLWaitEvents[i] = phEventWaitList[i]->CLEvent; } - CL_RETURN_ON_FAILURE(clEnqueueUnmapMemObject(hQueue->get(), hMem->get(), + CL_RETURN_ON_FAILURE(clEnqueueUnmapMemObject(hQueue->CLQueue, hMem->CLMemory, pMappedPtr, numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { @@ -528,11 +529,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - cl_context Ctx = hQueue->Context->get(); + cl_context Ctx = hQueue->Context->CLContext; cl_event Event; std::vector CLWaitEvents(numEventsInWaitList); for (uint32_t i = 0; i < numEventsInWaitList; i++) { - CLWaitEvents[i] = phEventWaitList[i]->get(); + CLWaitEvents[i] = phEventWaitList[i]->CLEvent; } cl_ext::clEnqueueWriteGlobalVariable_fn F = nullptr; UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( @@ -540,8 +541,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( cl_ext::EnqueueWriteGlobalVariableName, &F)); cl_int Res = - F(hQueue->get(), hProgram->get(), name, blockingWrite, count, offset, - pSrc, numEventsInWaitList, CLWaitEvents.data(), &Event); + F(hQueue->CLQueue, hProgram->CLProgram, name, blockingWrite, count, + offset, pSrc, numEventsInWaitList, CLWaitEvents.data(), &Event); if (phEvent) { try { auto UREvent = @@ -562,11 +563,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - cl_context Ctx = hQueue->Context->get(); + cl_context Ctx = hQueue->Context->CLContext; cl_event Event; std::vector CLWaitEvents(numEventsInWaitList); for (uint32_t i = 0; i < numEventsInWaitList; i++) { - CLWaitEvents[i] = phEventWaitList[i]->get(); + CLWaitEvents[i] = phEventWaitList[i]->CLEvent; } cl_ext::clEnqueueReadGlobalVariable_fn F = nullptr; UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( @@ -574,8 +575,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( cl_ext::EnqueueReadGlobalVariableName, &F)); cl_int Res = - F(hQueue->get(), hProgram->get(), name, blockingRead, count, offset, pDst, - numEventsInWaitList, CLWaitEvents.data(), &Event); + F(hQueue->CLQueue, hProgram->CLProgram, name, blockingRead, count, offset, + pDst, numEventsInWaitList, CLWaitEvents.data(), &Event); if (phEvent) { try { @@ -597,11 +598,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueReadHostPipe( uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - cl_context CLContext = hQueue->Context->get(); + cl_context CLContext = hQueue->Context->CLContext; cl_event Event; std::vector CLWaitEvents(numEventsInWaitList); for (uint32_t i = 0; i < numEventsInWaitList; i++) { - CLWaitEvents[i] = phEventWaitList[i]->get(); + CLWaitEvents[i] = phEventWaitList[i]->CLEvent; } cl_ext::clEnqueueReadHostPipeINTEL_fn FuncPtr = nullptr; UR_RETURN_ON_FAILURE( @@ -610,9 +611,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueReadHostPipe( cl_ext::EnqueueReadHostPipeName, &FuncPtr)); if (FuncPtr) { - CL_RETURN_ON_FAILURE(FuncPtr(hQueue->get(), hProgram->get(), pipe_symbol, - blocking, pDst, size, numEventsInWaitList, - CLWaitEvents.data(), &Event)); + CL_RETURN_ON_FAILURE( + FuncPtr(hQueue->CLQueue, hProgram->CLProgram, pipe_symbol, blocking, + pDst, size, numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { try { @@ -636,11 +637,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueWriteHostPipe( uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - cl_context CLContext = hQueue->Context->get(); + cl_context CLContext = hQueue->Context->CLContext; cl_event Event; std::vector CLWaitEvents(numEventsInWaitList); for (uint32_t i = 0; i < numEventsInWaitList; i++) { - CLWaitEvents[i] = phEventWaitList[i]->get(); + CLWaitEvents[i] = phEventWaitList[i]->CLEvent; } cl_ext::clEnqueueWriteHostPipeINTEL_fn FuncPtr = nullptr; UR_RETURN_ON_FAILURE( @@ -649,9 +650,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueWriteHostPipe( cl_ext::EnqueueWriteHostPipeName, &FuncPtr)); if (FuncPtr) { - CL_RETURN_ON_FAILURE(FuncPtr(hQueue->get(), hProgram->get(), pipe_symbol, - blocking, pSrc, size, numEventsInWaitList, - CLWaitEvents.data(), &Event)); + CL_RETURN_ON_FAILURE( + FuncPtr(hQueue->CLQueue, hProgram->CLProgram, pipe_symbol, blocking, + pSrc, size, numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { try { auto UREvent = std::make_unique( diff --git a/source/adapters/opencl/event.cpp b/source/adapters/opencl/event.cpp index b432959a75..5fef1803bc 100644 --- a/source/adapters/opencl/event.cpp +++ b/source/adapters/opencl/event.cpp @@ -132,7 +132,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventCreateWithNativeHandle( UR_APIEXPORT ur_result_t UR_APICALL urEventGetNativeHandle( ur_event_handle_t hEvent, ur_native_handle_t *phNativeEvent) { - return getNativeHandle(hEvent->get(), phNativeEvent); + return getNativeHandle(hEvent->CLEvent, phNativeEvent); } UR_APIEXPORT ur_result_t UR_APICALL urEventRelease(ur_event_handle_t hEvent) { @@ -151,7 +151,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventWait(uint32_t numEvents, const ur_event_handle_t *phEventWaitList) { std::vector CLEvents(numEvents); for (uint32_t i = 0; i < numEvents; i++) { - CLEvents[i] = phEventWaitList[i]->get(); + CLEvents[i] = phEventWaitList[i]->CLEvent; } cl_int RetErr = clWaitForEvents(numEvents, CLEvents.data()); CL_RETURN_ON_FAILURE(RetErr); @@ -178,7 +178,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo(ur_event_handle_t hEvent, } default: { size_t CheckPropSize = 0; - cl_int RetErr = clGetEventInfo(hEvent->get(), CLEventInfo, propSize, + cl_int RetErr = clGetEventInfo(hEvent->CLEvent, CLEventInfo, propSize, pPropValue, &CheckPropSize); if (pPropValue && CheckPropSize != propSize) { return UR_RESULT_ERROR_INVALID_SIZE; @@ -213,7 +213,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo( ur_event_handle_t hEvent, ur_profiling_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { cl_profiling_info CLProfilingInfo = convertURProfilingInfoToCL(propName); - cl_int RetErr = clGetEventProfilingInfo(hEvent->get(), CLProfilingInfo, + cl_int RetErr = clGetEventProfilingInfo(hEvent->CLEvent, CLProfilingInfo, propSize, pPropValue, pPropSizeRet); CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; @@ -280,7 +280,7 @@ urEventSetCallback(ur_event_handle_t hEvent, ur_execution_info_t execStatus, C->execute(); }; CL_RETURN_ON_FAILURE( - clSetEventCallback(hEvent->get(), CallbackType, ClCallback, Callback)); + clSetEventCallback(hEvent->CLEvent, CallbackType, ClCallback, Callback)); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/event.hpp b/source/adapters/opencl/event.hpp index 18d7c83f6d..a323685818 100644 --- a/source/adapters/opencl/event.hpp +++ b/source/adapters/opencl/event.hpp @@ -15,7 +15,7 @@ struct ur_event_handle_t_ { using native_type = cl_event; - native_type Event; + native_type CLEvent; ur_context_handle_t Context; ur_queue_handle_t Queue; std::atomic RefCount = 0; @@ -23,7 +23,7 @@ struct ur_event_handle_t_ { ur_event_handle_t_(native_type Event, ur_context_handle_t Ctx, ur_queue_handle_t Queue) - : Event(Event), Context(Ctx), Queue(Queue) { + : CLEvent(Event), Context(Ctx), Queue(Queue) { RefCount = 1; urContextRetain(Context); if (Queue) { @@ -37,7 +37,7 @@ struct ur_event_handle_t_ { urQueueRelease(Queue); } if (IsNativeHandleOwned) { - clReleaseEvent(Event); + clReleaseEvent(CLEvent); } } @@ -46,6 +46,4 @@ struct ur_event_handle_t_ { uint32_t decrementReferenceCount() noexcept { return --RefCount; } uint32_t getReferenceCount() const noexcept { return RefCount; } - - native_type get() { return Event; } }; diff --git a/source/adapters/opencl/kernel.cpp b/source/adapters/opencl/kernel.cpp index 92ccc0a995..c56d356c1e 100644 --- a/source/adapters/opencl/kernel.cpp +++ b/source/adapters/opencl/kernel.cpp @@ -19,12 +19,53 @@ #include #include +ur_result_t ur_kernel_handle_t_::makeWithNative(native_type NativeKernel, + ur_program_handle_t Program, + ur_context_handle_t Context, + ur_kernel_handle_t &Kernel) { + try { + cl_context CLContext; + CL_RETURN_ON_FAILURE(clGetKernelInfo(NativeKernel, CL_KERNEL_CONTEXT, + sizeof(CLContext), &CLContext, + nullptr)); + cl_program CLProgram; + CL_RETURN_ON_FAILURE(clGetKernelInfo(NativeKernel, CL_KERNEL_PROGRAM, + sizeof(CLProgram), &CLProgram, + nullptr)); + + if (Context->CLContext != CLContext) { + return UR_RESULT_ERROR_INVALID_CONTEXT; + } + if (Program) { + if (Program->CLProgram != CLProgram) { + return UR_RESULT_ERROR_INVALID_PROGRAM; + } + } else { + ur_native_handle_t hNativeHandle = + reinterpret_cast(CLProgram); + UR_RETURN_ON_FAILURE(urProgramCreateWithNativeHandle( + hNativeHandle, Context, nullptr, &Program)); + } + + auto URKernel = + std::make_unique(NativeKernel, Program, Context); + Kernel = URKernel.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + + return UR_RESULT_SUCCESS; +} + UR_APIEXPORT ur_result_t UR_APICALL urKernelCreate(ur_program_handle_t hProgram, const char *pKernelName, ur_kernel_handle_t *phKernel) { try { cl_int CLResult; - cl_kernel Kernel = clCreateKernel(hProgram->get(), pKernelName, &CLResult); + cl_kernel Kernel = + clCreateKernel(hProgram->CLProgram, pKernelName, &CLResult); CL_RETURN_ON_FAILURE(CLResult); auto URKernel = std::make_unique(Kernel, hProgram, hProgram->Context); @@ -43,7 +84,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgValue( const ur_kernel_arg_value_properties_t *, const void *pArgValue) { CL_RETURN_ON_FAILURE(clSetKernelArg( - hKernel->get(), static_cast(argIndex), argSize, pArgValue)); + hKernel->CLKernel, static_cast(argIndex), argSize, pArgValue)); return UR_RESULT_SUCCESS; } @@ -53,7 +94,7 @@ urKernelSetArgLocal(ur_kernel_handle_t hKernel, uint32_t argIndex, size_t argSize, const ur_kernel_arg_local_properties_t *) { CL_RETURN_ON_FAILURE(clSetKernelArg( - hKernel->get(), static_cast(argIndex), argSize, nullptr)); + hKernel->CLKernel, static_cast(argIndex), argSize, nullptr)); return UR_RESULT_SUCCESS; } @@ -105,8 +146,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetInfo(ur_kernel_handle_t hKernel, default: { size_t CheckPropSize = 0; cl_int ClResult = - clGetKernelInfo(hKernel->get(), mapURKernelInfoToCL(propName), propSize, - pPropValue, &CheckPropSize); + clGetKernelInfo(hKernel->CLKernel, mapURKernelInfoToCL(propName), + propSize, pPropValue, &CheckPropSize); if (pPropValue && CheckPropSize != propSize) { return UR_RESULT_ERROR_INVALID_SIZE; } @@ -151,7 +192,7 @@ urKernelGetGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, // to deter naive use of the query. if (propName == UR_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE) { cl_device_type ClDeviceType; - CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->get(), CL_DEVICE_TYPE, + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->CLDevice, CL_DEVICE_TYPE, sizeof(ClDeviceType), &ClDeviceType, nullptr)); if (ClDeviceType != CL_DEVICE_TYPE_CUSTOM) { @@ -163,7 +204,7 @@ urKernelGetGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; } CL_RETURN_ON_FAILURE(clGetKernelWorkGroupInfo( - hKernel->get(), hDevice->get(), mapURKernelGroupInfoToCL(propName), + hKernel->CLKernel, hDevice->CLDevice, mapURKernelGroupInfoToCL(propName), propSize, pPropValue, pPropSizeRet)); return UR_RESULT_SUCCESS; @@ -216,9 +257,10 @@ urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, InputValueSize = MaxDims * sizeof(size_t); } - cl_int Ret = clGetKernelSubGroupInfo( - hKernel->get(), hDevice->get(), mapURKernelSubGroupInfoToCL(propName), - InputValueSize, InputValue.get(), sizeof(size_t), &RetVal, pPropSizeRet); + cl_int Ret = clGetKernelSubGroupInfo(hKernel->CLKernel, hDevice->CLDevice, + mapURKernelSubGroupInfoToCL(propName), + InputValueSize, InputValue.get(), + sizeof(size_t), &RetVal, pPropSizeRet); if (Ret == CL_INVALID_OPERATION) { // clGetKernelSubGroupInfo returns CL_INVALID_OPERATION if the device does @@ -288,7 +330,7 @@ static ur_result_t usmSetIndirectAccess(ur_kernel_handle_t hKernel) { /* We test that each alloc type is supported before we actually try to set * KernelExecInfo. */ - CL_RETURN_ON_FAILURE(clGetKernelInfo(hKernel->get(), CL_KERNEL_CONTEXT, + CL_RETURN_ON_FAILURE(clGetKernelInfo(hKernel->CLKernel, CL_KERNEL_CONTEXT, sizeof(cl_context), &CLContext, nullptr)); @@ -298,7 +340,7 @@ static ur_result_t usmSetIndirectAccess(ur_kernel_handle_t hKernel) { if (HFunc) { CL_RETURN_ON_FAILURE(clSetKernelExecInfo( - hKernel->get(), CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL, + hKernel->CLKernel, CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL, sizeof(cl_bool), &TrueVal)); } @@ -308,7 +350,7 @@ static ur_result_t usmSetIndirectAccess(ur_kernel_handle_t hKernel) { if (DFunc) { CL_RETURN_ON_FAILURE(clSetKernelExecInfo( - hKernel->get(), CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL, + hKernel->CLKernel, CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL, sizeof(cl_bool), &TrueVal)); } @@ -318,7 +360,7 @@ static ur_result_t usmSetIndirectAccess(ur_kernel_handle_t hKernel) { if (SFunc) { CL_RETURN_ON_FAILURE(clSetKernelExecInfo( - hKernel->get(), CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL, + hKernel->CLKernel, CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL, sizeof(cl_bool), &TrueVal)); } return UR_RESULT_SUCCESS; @@ -341,7 +383,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetExecInfo( return UR_RESULT_SUCCESS; } case UR_KERNEL_EXEC_INFO_USM_PTRS: { - CL_RETURN_ON_FAILURE(clSetKernelExecInfo(hKernel->get(), + CL_RETURN_ON_FAILURE(clSetKernelExecInfo(hKernel->CLKernel, CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL, propSize, pPropValue)); return UR_RESULT_SUCCESS; @@ -357,7 +399,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgPointer( const ur_kernel_arg_pointer_properties_t *, const void *pArgValue) { cl_context CLContext; - CL_RETURN_ON_FAILURE(clGetKernelInfo(hKernel->get(), CL_KERNEL_CONTEXT, + CL_RETURN_ON_FAILURE(clGetKernelInfo(hKernel->CLKernel, CL_KERNEL_CONTEXT, sizeof(cl_context), &CLContext, nullptr)); @@ -370,7 +412,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgPointer( if (FuncPtr) { CL_RETURN_ON_FAILURE( - FuncPtr(hKernel->get(), static_cast(argIndex), pArgValue)); + FuncPtr(hKernel->CLKernel, static_cast(argIndex), pArgValue)); } return UR_RESULT_SUCCESS; @@ -378,7 +420,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgPointer( UR_APIEXPORT ur_result_t UR_APICALL urKernelGetNativeHandle( ur_kernel_handle_t hKernel, ur_native_handle_t *phNativeKernel) { - *phNativeKernel = reinterpret_cast(hKernel->get()); + *phNativeKernel = reinterpret_cast(hKernel->CLKernel); return UR_RESULT_SUCCESS; } @@ -409,8 +451,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgMemObj( ur_kernel_handle_t hKernel, uint32_t argIndex, const ur_kernel_arg_mem_obj_properties_t *, ur_mem_handle_t hArgValue) { - cl_mem CLArgValue = hArgValue ? hArgValue->get() : nullptr; - CL_RETURN_ON_FAILURE(clSetKernelArg(hKernel->get(), + cl_mem CLArgValue = hArgValue ? hArgValue->CLMemory : nullptr; + CL_RETURN_ON_FAILURE(clSetKernelArg(hKernel->CLKernel, static_cast(argIndex), sizeof(CLArgValue), &CLArgValue)); return UR_RESULT_SUCCESS; @@ -420,9 +462,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgSampler( ur_kernel_handle_t hKernel, uint32_t argIndex, const ur_kernel_arg_sampler_properties_t *, ur_sampler_handle_t hArgValue) { - cl_sampler CLArgSampler = hArgValue->get(); - cl_int RetErr = clSetKernelArg(hKernel->get(), static_cast(argIndex), - sizeof(CLArgSampler), &CLArgSampler); + cl_sampler CLArgSampler = hArgValue->CLSampler; + cl_int RetErr = + clSetKernelArg(hKernel->CLKernel, static_cast(argIndex), + sizeof(CLArgSampler), &CLArgSampler); CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; } @@ -434,8 +477,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetSuggestedLocalWorkSize( cl_device_id Device; cl_platform_id Platform; - CL_RETURN_ON_FAILURE(clGetCommandQueueInfo( - hQueue->get(), CL_QUEUE_DEVICE, sizeof(cl_device_id), &Device, nullptr)); + CL_RETURN_ON_FAILURE(clGetCommandQueueInfo(hQueue->CLQueue, CL_QUEUE_DEVICE, + sizeof(cl_device_id), &Device, + nullptr)); CL_RETURN_ON_FAILURE(clGetDeviceInfo( Device, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &Platform, nullptr)); @@ -448,7 +492,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetSuggestedLocalWorkSize( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; CL_RETURN_ON_FAILURE(GetKernelSuggestedLocalWorkSizeFuncPtr( - hQueue->get(), hKernel->get(), workDim, pGlobalWorkOffset, + hQueue->CLQueue, hKernel->CLKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, pSuggestedLocalWorkSize)); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/kernel.hpp b/source/adapters/opencl/kernel.hpp index b103af03c2..a1cb5c317e 100644 --- a/source/adapters/opencl/kernel.hpp +++ b/source/adapters/opencl/kernel.hpp @@ -17,7 +17,7 @@ struct ur_kernel_handle_t_ { using native_type = cl_kernel; - native_type Kernel; + native_type CLKernel; ur_program_handle_t Program; ur_context_handle_t Context; std::atomic RefCount = 0; @@ -25,7 +25,7 @@ struct ur_kernel_handle_t_ { ur_kernel_handle_t_(native_type Kernel, ur_program_handle_t Program, ur_context_handle_t Context) - : Kernel(Kernel), Program(Program), Context(Context) { + : CLKernel(Kernel), Program(Program), Context(Context) { RefCount = 1; if (Program) { urProgramRetain(Program); @@ -39,7 +39,7 @@ struct ur_kernel_handle_t_ { } urContextRelease(Context); if (IsNativeHandleOwned) { - clReleaseKernel(Kernel); + clReleaseKernel(CLKernel); } } @@ -52,42 +52,5 @@ struct ur_kernel_handle_t_ { static ur_result_t makeWithNative(native_type NativeKernel, ur_program_handle_t Program, ur_context_handle_t Context, - ur_kernel_handle_t &Kernel) { - try { - cl_context CLContext; - CL_RETURN_ON_FAILURE(clGetKernelInfo(NativeKernel, CL_KERNEL_CONTEXT, - sizeof(CLContext), &CLContext, - nullptr)); - cl_program CLProgram; - CL_RETURN_ON_FAILURE(clGetKernelInfo(NativeKernel, CL_KERNEL_PROGRAM, - sizeof(CLProgram), &CLProgram, - nullptr)); - - if (Context->get() != CLContext) { - return UR_RESULT_ERROR_INVALID_CONTEXT; - } - if (Program) { - if (Program->get() != CLProgram) { - return UR_RESULT_ERROR_INVALID_PROGRAM; - } - } else { - ur_native_handle_t hNativeHandle = - reinterpret_cast(CLProgram); - UR_RETURN_ON_FAILURE(urProgramCreateWithNativeHandle( - hNativeHandle, Context, nullptr, &Program)); - } - - auto URKernel = - std::make_unique(NativeKernel, Program, Context); - Kernel = URKernel.release(); - } catch (std::bad_alloc &) { - return UR_RESULT_ERROR_OUT_OF_RESOURCES; - } catch (...) { - return UR_RESULT_ERROR_UNKNOWN; - } - - return UR_RESULT_SUCCESS; - } - - native_type get() { return Kernel; } + ur_kernel_handle_t &Kernel); }; diff --git a/source/adapters/opencl/memory.cpp b/source/adapters/opencl/memory.cpp index 4d41a9463f..4e49ac9f8e 100644 --- a/source/adapters/opencl/memory.cpp +++ b/source/adapters/opencl/memory.cpp @@ -216,6 +216,31 @@ cl_map_flags convertURMemFlagsToCL(ur_mem_flags_t URFlags) { return CLFlags; } +ur_result_t ur_mem_handle_t_::makeWithNative(native_type NativeMem, + ur_context_handle_t Ctx, + ur_mem_handle_t &Mem) { + if (!Ctx) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + try { + cl_context CLContext; + CL_RETURN_ON_FAILURE(clGetMemObjectInfo( + NativeMem, CL_MEM_CONTEXT, sizeof(CLContext), &CLContext, nullptr)); + + if (Ctx->CLContext != CLContext) { + return UR_RESULT_ERROR_INVALID_CONTEXT; + } + auto URMem = std::make_unique(NativeMem, Ctx); + Mem = URMem.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + + return UR_RESULT_SUCCESS; +} + UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate( ur_context_handle_t hContext, ur_mem_flags_t flags, size_t size, const ur_buffer_properties_t *pProperties, ur_mem_handle_t *phBuffer) { @@ -224,7 +249,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate( // TODO: need to check if all properties are supported by OpenCL RT and // ignore unsupported clCreateBufferWithPropertiesINTEL_fn FuncPtr = nullptr; - cl_context CLContext = hContext->get(); + cl_context CLContext = hContext->CLContext; // First we need to look up the function pointer RetErr = cl_ext::getExtFuncFromContext( @@ -274,8 +299,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate( void *HostPtr = pProperties ? pProperties->pHost : nullptr; try { cl_mem Buffer = - clCreateBuffer(hContext->get(), static_cast(flags), size, - HostPtr, static_cast(&RetErr)); + clCreateBuffer(hContext->CLContext, static_cast(flags), + size, HostPtr, static_cast(&RetErr)); CL_RETURN_ON_FAILURE(RetErr); auto URMem = std::make_unique(Buffer, hContext); *phBuffer = URMem.release(); @@ -301,7 +326,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreate( try { cl_mem Mem = - clCreateImage(hContext->get(), MapFlags, &ImageFormat, &ImageDesc, + clCreateImage(hContext->CLContext, MapFlags, &ImageFormat, &ImageDesc, pHost, static_cast(&RetErr)); CL_RETURN_ON_FAILURE(RetErr); auto URMem = std::make_unique(Mem, hContext); @@ -336,11 +361,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferPartition( BufferRegion.size = pRegion->size; try { cl_mem Buffer = clCreateSubBuffer( - hBuffer->get(), static_cast(flags), BufferCreateType, + hBuffer->CLMemory, static_cast(flags), BufferCreateType, &BufferRegion, static_cast(&RetErr)); if (RetErr == CL_INVALID_VALUE) { size_t BufferSize = 0; - CL_RETURN_ON_FAILURE(clGetMemObjectInfo(hBuffer->get(), CL_MEM_SIZE, + CL_RETURN_ON_FAILURE(clGetMemObjectInfo(hBuffer->CLMemory, CL_MEM_SIZE, sizeof(BufferSize), &BufferSize, nullptr)); if (BufferRegion.size + BufferRegion.origin > BufferSize) @@ -359,7 +384,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferPartition( UR_APIEXPORT ur_result_t UR_APICALL urMemGetNativeHandle( ur_mem_handle_t hMem, ur_device_handle_t, ur_native_handle_t *phNativeMem) { - return getNativeHandle(hMem->get(), phNativeMem); + return getNativeHandle(hMem->CLMemory, phNativeMem); } UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreateWithNativeHandle( @@ -401,7 +426,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemGetInfo(ur_mem_handle_t hMemory, } default: { size_t CheckPropSize = 0; - auto ClResult = clGetMemObjectInfo(hMemory->get(), CLPropName, propSize, + auto ClResult = clGetMemObjectInfo(hMemory->CLMemory, CLPropName, propSize, pPropValue, &CheckPropSize); if (pPropValue && CheckPropSize != propSize) { return UR_RESULT_ERROR_INVALID_SIZE; @@ -426,7 +451,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageGetInfo(ur_mem_handle_t hMemory, const cl_int CLPropName = mapURMemImageInfoToCL(propName); size_t CheckPropSize = 0; - auto ClResult = clGetImageInfo(hMemory->get(), CLPropName, propSize, + auto ClResult = clGetImageInfo(hMemory->CLMemory, CLPropName, propSize, pPropValue, &CheckPropSize); if (pPropValue && CheckPropSize != propSize) { return UR_RESULT_ERROR_INVALID_SIZE; diff --git a/source/adapters/opencl/memory.hpp b/source/adapters/opencl/memory.hpp index f54d9b1347..1aa1b16d4e 100644 --- a/source/adapters/opencl/memory.hpp +++ b/source/adapters/opencl/memory.hpp @@ -16,13 +16,13 @@ struct ur_mem_handle_t_ { using native_type = cl_mem; - native_type Memory; + native_type CLMemory; ur_context_handle_t Context; std::atomic RefCount = 0; bool IsNativeHandleOwned = true; ur_mem_handle_t_(native_type Mem, ur_context_handle_t Ctx) - : Memory(Mem), Context(Ctx) { + : CLMemory(Mem), Context(Ctx) { RefCount = 1; urContextRetain(Context); } @@ -30,7 +30,7 @@ struct ur_mem_handle_t_ { ~ur_mem_handle_t_() { urContextRelease(Context); if (IsNativeHandleOwned) { - clReleaseMemObject(Memory); + clReleaseMemObject(CLMemory); } } @@ -42,28 +42,5 @@ struct ur_mem_handle_t_ { static ur_result_t makeWithNative(native_type NativeMem, ur_context_handle_t Ctx, - ur_mem_handle_t &Mem) { - if (!Ctx) { - return UR_RESULT_ERROR_INVALID_NULL_HANDLE; - } - try { - cl_context CLContext; - CL_RETURN_ON_FAILURE(clGetMemObjectInfo( - NativeMem, CL_MEM_CONTEXT, sizeof(CLContext), &CLContext, nullptr)); - - if (Ctx->get() != CLContext) { - return UR_RESULT_ERROR_INVALID_CONTEXT; - } - auto URMem = std::make_unique(NativeMem, Ctx); - Mem = URMem.release(); - } catch (std::bad_alloc &) { - return UR_RESULT_ERROR_OUT_OF_RESOURCES; - } catch (...) { - return UR_RESULT_ERROR_UNKNOWN; - } - - return UR_RESULT_SUCCESS; - } - - native_type get() { return Memory; } + ur_mem_handle_t &Mem); }; diff --git a/source/adapters/opencl/platform.cpp b/source/adapters/opencl/platform.cpp index 526246f49a..9a476ac2b9 100644 --- a/source/adapters/opencl/platform.cpp +++ b/source/adapters/opencl/platform.cpp @@ -9,6 +9,7 @@ //===----------------------------------------------------------------------===// #include "platform.hpp" +#include "adapter.hpp" static cl_int mapURPlatformInfoToCL(ur_platform_info_t URPropName) { @@ -43,7 +44,7 @@ urPlatformGetInfo(ur_platform_handle_t hPlatform, ur_platform_info_t propName, case UR_PLATFORM_INFO_VERSION: case UR_PLATFORM_INFO_EXTENSIONS: case UR_PLATFORM_INFO_PROFILE: { - cl_platform_id Plat = hPlatform->get(); + cl_platform_id Plat = hPlatform->CLPlatform; CL_RETURN_ON_FAILURE( clGetPlatformInfo(Plat, CLPropName, propSize, pPropValue, pSizeRet)); @@ -62,81 +63,78 @@ urPlatformGetApiVersion([[maybe_unused]] ur_platform_handle_t hPlatform, return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL -urPlatformGet(ur_adapter_handle_t *, uint32_t, uint32_t NumEntries, - ur_platform_handle_t *phPlatforms, uint32_t *pNumPlatforms) { - - static std::vector> URPlatforms; - static std::once_flag InitFlag; - static uint32_t NumPlatforms = 0; - cl_int Result = CL_SUCCESS; - - std::call_once( - InitFlag, - [](cl_int &Res) { - Res = clGetPlatformIDs(0, nullptr, &NumPlatforms); - if (Res != CL_SUCCESS) { - return Res; - } - std::vector CLPlatforms(NumPlatforms); - Res = clGetPlatformIDs(static_cast(NumPlatforms), - CLPlatforms.data(), nullptr); - if (Res != CL_SUCCESS) { - return Res; +UR_APIEXPORT ur_result_t UR_APICALL urPlatformGet( + ur_adapter_handle_t *phAdapters, uint32_t NumAdapters, uint32_t NumEntries, + ur_platform_handle_t *phPlatforms, uint32_t *pNumPlatforms) { + for (uint32_t idx = 0; idx < NumAdapters; idx++) { + if (!(phAdapters[idx]->NumPlatforms)) { + uint32_t NumPlatforms = 0; + cl_int Res = clGetPlatformIDs(0, nullptr, &NumPlatforms); + + std::vector CLPlatforms(NumPlatforms); + Res = clGetPlatformIDs(static_cast(NumPlatforms), + CLPlatforms.data(), nullptr); + + /* Absorb the CL_PLATFORM_NOT_FOUND_KHR and just return 0 in num_platforms + */ + if (Res == CL_PLATFORM_NOT_FOUND_KHR) { + if (pNumPlatforms) { + *pNumPlatforms = 0; + return UR_RESULT_SUCCESS; } - try { - for (uint32_t i = 0; i < NumPlatforms; i++) { - auto URPlatform = - std::make_unique(CLPlatforms[i]); - URPlatforms.emplace_back(URPlatform.release()); - } - } catch (std::bad_alloc &) { - return CL_OUT_OF_RESOURCES; - } catch (...) { - return CL_INVALID_PLATFORM; + } + CL_RETURN_ON_FAILURE(Res); + try { + for (uint32_t i = 0; i < NumPlatforms; i++) { + auto URPlatform = + std::make_unique(CLPlatforms[i]); + phAdapters[idx]->URPlatforms.emplace_back(URPlatform.release()); } - return Res; - }, - Result); - - /* Absorb the CL_PLATFORM_NOT_FOUND_KHR and just return 0 in num_platforms */ - if (Result == CL_PLATFORM_NOT_FOUND_KHR) { - Result = CL_SUCCESS; - if (pNumPlatforms) { - *pNumPlatforms = 0; + phAdapters[idx]->NumPlatforms = NumPlatforms; + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_INVALID_PLATFORM; + } } } - if (pNumPlatforms != nullptr) { - *pNumPlatforms = NumPlatforms; + if (pNumPlatforms) { + *pNumPlatforms = 0; } - if (NumEntries && phPlatforms) { - for (uint32_t i = 0; i < NumEntries; i++) { - phPlatforms[i] = &(*URPlatforms[i]); + for (uint32_t idx = 0; idx < NumAdapters; idx++) { + if (pNumPlatforms != nullptr) { + *pNumPlatforms += phAdapters[idx]->NumPlatforms; + } + if (NumEntries && phPlatforms) { + for (uint32_t i = 0; i < NumEntries; i++) { + phPlatforms[i] = phAdapters[idx]->URPlatforms[i].get(); + } } } - return mapCLErrorToUR(Result); + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetNativeHandle( ur_platform_handle_t hPlatform, ur_native_handle_t *phNativePlatform) { - *phNativePlatform = reinterpret_cast(hPlatform->get()); + *phNativePlatform = + reinterpret_cast(hPlatform->CLPlatform); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urPlatformCreateWithNativeHandle( - ur_native_handle_t hNativePlatform, ur_adapter_handle_t, + ur_native_handle_t hNativePlatform, ur_adapter_handle_t hAdapter, const ur_platform_native_properties_t *, ur_platform_handle_t *phPlatform) { cl_platform_id NativeHandle = reinterpret_cast(hNativePlatform); uint32_t NumPlatforms = 0; - UR_RETURN_ON_FAILURE(urPlatformGet(nullptr, 0, 0, nullptr, &NumPlatforms)); + UR_RETURN_ON_FAILURE(urPlatformGet(&hAdapter, 1, 0, nullptr, &NumPlatforms)); std::vector Platforms(NumPlatforms); UR_RETURN_ON_FAILURE( - urPlatformGet(nullptr, 0, NumPlatforms, Platforms.data(), nullptr)); + urPlatformGet(&hAdapter, 1, NumPlatforms, Platforms.data(), nullptr)); for (uint32_t i = 0; i < NumPlatforms; i++) { - if (Platforms[i]->get() == NativeHandle) { + if (Platforms[i]->CLPlatform == NativeHandle) { *phPlatform = Platforms[i]; return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/platform.hpp b/source/adapters/opencl/platform.hpp index 6a9c49eb37..85699ded95 100644 --- a/source/adapters/opencl/platform.hpp +++ b/source/adapters/opencl/platform.hpp @@ -16,10 +16,10 @@ struct ur_platform_handle_t_ { using native_type = cl_platform_id; - native_type Platform = nullptr; + native_type CLPlatform = nullptr; std::vector> Devices; - ur_platform_handle_t_(native_type Plat) : Platform(Plat) {} + ur_platform_handle_t_(native_type Plat) : CLPlatform(Plat) {} ~ur_platform_handle_t_() { for (auto &Dev : Devices) { @@ -33,7 +33,7 @@ struct ur_platform_handle_t_ { if (!CachedExtFunc) { // TODO: check that the function is available CachedExtFunc = reinterpret_cast( - clGetExtensionFunctionAddressForPlatform(Platform, FuncName)); + clGetExtensionFunctionAddressForPlatform(CLPlatform, FuncName)); if (!CachedExtFunc) { return UR_RESULT_ERROR_INVALID_VALUE; } @@ -42,17 +42,16 @@ struct ur_platform_handle_t_ { return UR_RESULT_SUCCESS; } - native_type get() { return Platform; } - ur_result_t InitDevices() { if (Devices.empty()) { cl_uint DeviceNum = 0; - CL_RETURN_ON_FAILURE( - clGetDeviceIDs(Platform, CL_DEVICE_TYPE_ALL, 0, nullptr, &DeviceNum)); + CL_RETURN_ON_FAILURE(clGetDeviceIDs(CLPlatform, CL_DEVICE_TYPE_ALL, 0, + nullptr, &DeviceNum)); std::vector CLDevices(DeviceNum); - CL_RETURN_ON_FAILURE(clGetDeviceIDs( - Platform, CL_DEVICE_TYPE_ALL, DeviceNum, CLDevices.data(), nullptr)); + CL_RETURN_ON_FAILURE(clGetDeviceIDs(CLPlatform, CL_DEVICE_TYPE_ALL, + DeviceNum, CLDevices.data(), + nullptr)); try { Devices.resize(DeviceNum); @@ -72,12 +71,12 @@ struct ur_platform_handle_t_ { ur_result_t getPlatformVersion(oclv::OpenCLVersion &Version) { size_t PlatVerSize = 0; - CL_RETURN_ON_FAILURE(clGetPlatformInfo(Platform, CL_PLATFORM_VERSION, 0, + CL_RETURN_ON_FAILURE(clGetPlatformInfo(CLPlatform, CL_PLATFORM_VERSION, 0, nullptr, &PlatVerSize)); std::string PlatVer(PlatVerSize, '\0'); CL_RETURN_ON_FAILURE(clGetPlatformInfo( - Platform, CL_PLATFORM_VERSION, PlatVerSize, PlatVer.data(), nullptr)); + CLPlatform, CL_PLATFORM_VERSION, PlatVerSize, PlatVer.data(), nullptr)); Version = oclv::OpenCLVersion(PlatVer); if (!Version.isValid()) { diff --git a/source/adapters/opencl/program.cpp b/source/adapters/opencl/program.cpp index 2703c319c9..8c57c3a2fd 100644 --- a/source/adapters/opencl/program.cpp +++ b/source/adapters/opencl/program.cpp @@ -16,6 +16,32 @@ #include +ur_result_t ur_program_handle_t_::makeWithNative(native_type NativeProg, + ur_context_handle_t Context, + ur_program_handle_t &Program) { + if (!Context) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + try { + cl_context CLContext; + CL_RETURN_ON_FAILURE(clGetProgramInfo(NativeProg, CL_PROGRAM_CONTEXT, + sizeof(CLContext), &CLContext, + nullptr)); + if (Context->CLContext != CLContext) { + return UR_RESULT_ERROR_INVALID_CONTEXT; + } + auto URProgram = + std::make_unique(NativeProg, Context); + Program = URProgram.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + + return UR_RESULT_SUCCESS; +} + UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( ur_context_handle_t hContext, const void *pIL, size_t length, const ur_program_properties_t *, ur_program_handle_t *phProgram) { @@ -33,7 +59,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( if (PlatVer >= oclv::V2_1) { /* Make sure all devices support CL 2.1 or newer as well. */ - for (ur_device_handle_t URDev : hContext->getDevices()) { + for (ur_device_handle_t URDev : hContext->Devices) { oclv::OpenCLVersion DevVer; CL_RETURN_ON_FAILURE_AND_SET_NULL(URDev->getDeviceVersion(DevVer), @@ -55,7 +81,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( } cl_program Program = - clCreateProgramWithIL(hContext->get(), pIL, length, &Err); + clCreateProgramWithIL(hContext->CLContext, pIL, length, &Err); CL_RETURN_ON_FAILURE(Err); try { auto URProgram = @@ -70,7 +96,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( /* If none of the devices conform with CL 2.1 or newer make sure they all * support the cl_khr_il_program extension. */ - for (ur_device_handle_t URDev : hContext->getDevices()) { + for (ur_device_handle_t URDev : hContext->Devices) { bool Supported = false; CL_RETURN_ON_FAILURE_AND_SET_NULL( URDev->checkDeviceExtensions({"cl_khr_il_program"}, Supported), @@ -85,11 +111,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( cl_program(CL_API_CALL *)(cl_context, const void *, size_t, cl_int *); ApiFuncT FuncPtr = reinterpret_cast(clGetExtensionFunctionAddressForPlatform( - CurPlatform->get(), "clCreateProgramWithILKHR")); + CurPlatform->CLPlatform, "clCreateProgramWithILKHR")); assert(FuncPtr != nullptr); try { - cl_program Program = FuncPtr(hContext->get(), pIL, length, &Err); + cl_program Program = FuncPtr(hContext->CLContext, pIL, length, &Err); CL_RETURN_ON_FAILURE(Err); auto URProgram = std::make_unique(Program, hContext); @@ -110,14 +136,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( const uint8_t *pBinary, const ur_program_properties_t *, ur_program_handle_t *phProgram) { - const cl_device_id Devices[1] = {hDevice->get()}; + const cl_device_id Devices[1] = {hDevice->CLDevice}; const size_t Lengths[1] = {size}; cl_int BinaryStatus[1]; cl_int CLResult; try { cl_program Program = clCreateProgramWithBinary( - hContext->get(), static_cast(1u), Devices, Lengths, &pBinary, - BinaryStatus, &CLResult); + hContext->CLContext, static_cast(1u), Devices, Lengths, + &pBinary, BinaryStatus, &CLResult); CL_RETURN_ON_FAILURE(CLResult); auto URProgram = std::make_unique(Program, hContext); *phProgram = URProgram.release(); @@ -139,10 +165,10 @@ urProgramCompile([[maybe_unused]] ur_context_handle_t hContext, uint32_t DeviceCount = hProgram->Context->DeviceCount; std::vector CLDevicesInProgram(DeviceCount); for (uint32_t i = 0; i < DeviceCount; i++) { - CLDevicesInProgram[i] = hProgram->Context->Devices[i]->get(); + CLDevicesInProgram[i] = hProgram->Context->Devices[i]->CLDevice; } - CL_RETURN_ON_FAILURE(clCompileProgram(hProgram->get(), DeviceCount, + CL_RETURN_ON_FAILURE(clCompileProgram(hProgram->CLProgram, DeviceCount, CLDevicesInProgram.data(), pOptions, 0, nullptr, nullptr, nullptr, nullptr)); @@ -202,7 +228,7 @@ urProgramGetInfo(ur_program_handle_t hProgram, ur_program_info_t propName, } default: { size_t CheckPropSize = 0; - auto ClResult = clGetProgramInfo(hProgram->get(), CLPropName, propSize, + auto ClResult = clGetProgramInfo(hProgram->CLProgram, CLPropName, propSize, pPropValue, &CheckPropSize); if (pPropValue && CheckPropSize != propSize) { return UR_RESULT_ERROR_INVALID_SIZE; @@ -224,11 +250,11 @@ urProgramBuild([[maybe_unused]] ur_context_handle_t hContext, uint32_t DeviceCount = hProgram->Context->DeviceCount; std::vector CLDevicesInProgram(DeviceCount); for (uint32_t i = 0; i < DeviceCount; i++) { - CLDevicesInProgram[i] = hProgram->Context->Devices[i]->get(); + CLDevicesInProgram[i] = hProgram->Context->Devices[i]->CLDevice; } CL_RETURN_ON_FAILURE( - clBuildProgram(hProgram->get(), CLDevicesInProgram.size(), + clBuildProgram(hProgram->CLProgram, CLDevicesInProgram.size(), CLDevicesInProgram.data(), pOptions, nullptr, nullptr)); return UR_RESULT_SUCCESS; } @@ -241,10 +267,10 @@ urProgramLink(ur_context_handle_t hContext, uint32_t count, cl_int CLResult; std::vector CLPrograms(count); for (uint32_t i = 0; i < count; i++) { - CLPrograms[i] = phPrograms[i]->get(); + CLPrograms[i] = phPrograms[i]->CLProgram; } cl_program Program = clLinkProgram( - hContext->get(), 0, nullptr, pOptions, static_cast(count), + hContext->CLContext, 0, nullptr, pOptions, static_cast(count), CLPrograms.data(), nullptr, nullptr, &CLResult); if (CL_INVALID_BINARY == CLResult) { @@ -335,12 +361,13 @@ urProgramGetBuildInfo(ur_program_handle_t hProgram, ur_device_handle_t hDevice, UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); cl_program_binary_type BinaryType; CL_RETURN_ON_FAILURE(clGetProgramBuildInfo( - hProgram->get(), hDevice->get(), mapURProgramBuildInfoToCL(propName), - sizeof(cl_program_binary_type), &BinaryType, nullptr)); + hProgram->CLProgram, hDevice->CLDevice, + mapURProgramBuildInfoToCL(propName), sizeof(cl_program_binary_type), + &BinaryType, nullptr)); return ReturnValue(mapCLBinaryTypeToUR(BinaryType)); } size_t CheckPropSize = 0; - cl_int ClErr = clGetProgramBuildInfo(hProgram->get(), hDevice->get(), + cl_int ClErr = clGetProgramBuildInfo(hProgram->CLProgram, hDevice->CLDevice, mapURProgramBuildInfoToCL(propName), propSize, pPropValue, &CheckPropSize); if (pPropValue && CheckPropSize != propSize) { @@ -371,7 +398,7 @@ urProgramRelease(ur_program_handle_t hProgram) { UR_APIEXPORT ur_result_t UR_APICALL urProgramGetNativeHandle( ur_program_handle_t hProgram, ur_native_handle_t *phNativeProgram) { - *phNativeProgram = reinterpret_cast(hProgram->get()); + *phNativeProgram = reinterpret_cast(hProgram->CLProgram); return UR_RESULT_SUCCESS; } @@ -392,7 +419,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramSetSpecializationConstants( ur_program_handle_t hProgram, uint32_t count, const ur_specialization_constant_info_t *pSpecConstants) { - cl_program CLProg = hProgram->get(); + cl_program CLProg = hProgram->CLProgram; if (!hProgram->Context) { return UR_RESULT_ERROR_INVALID_PROGRAM; } @@ -410,7 +437,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramSetSpecializationConstants( if (PlatVer < oclv::V2_2) { UseExtensionLookup = true; } else { - for (ur_device_handle_t Dev : Ctx->getDevices()) { + for (ur_device_handle_t Dev : Ctx->Devices) { oclv::OpenCLVersion DevVer; UR_RETURN_ON_FAILURE(Dev->getDeviceVersion(DevVer)); @@ -433,7 +460,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramSetSpecializationConstants( SetProgramSpecializationConstant = nullptr; const ur_result_t URResult = cl_ext::getExtFuncFromContext< decltype(SetProgramSpecializationConstant)>( - Ctx->get(), + Ctx->CLContext, cl_ext::ExtFuncPtrCache->clSetProgramSpecializationConstantCache, cl_ext::SetProgramSpecializationConstantName, &SetProgramSpecializationConstant); @@ -481,7 +508,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetFunctionPointer( ur_device_handle_t hDevice, ur_program_handle_t hProgram, const char *pFunctionName, void **ppFunctionPointer) { - cl_context CLContext = hProgram->Context->get(); + cl_context CLContext = hProgram->Context->CLContext; cl_ext::clGetDeviceFunctionPointer_fn FuncT = nullptr; @@ -498,12 +525,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetFunctionPointer( *ppFunctionPointer = 0; size_t Size; CL_RETURN_ON_FAILURE(clGetProgramInfo( - hProgram->get(), CL_PROGRAM_KERNEL_NAMES, 0, nullptr, &Size)); + hProgram->CLProgram, CL_PROGRAM_KERNEL_NAMES, 0, nullptr, &Size)); std::string KernelNames(Size, ' '); CL_RETURN_ON_FAILURE( - clGetProgramInfo(hProgram->get(), CL_PROGRAM_KERNEL_NAMES, + clGetProgramInfo(hProgram->CLProgram, CL_PROGRAM_KERNEL_NAMES, KernelNames.size(), &KernelNames[0], nullptr)); // Get rid of the null terminator and search for the kernel name. If the @@ -514,7 +541,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetFunctionPointer( } const cl_int CLResult = - FuncT(hDevice->get(), hProgram->get(), pFunctionName, + FuncT(hDevice->CLDevice, hProgram->CLProgram, pFunctionName, reinterpret_cast(ppFunctionPointer)); // GPU runtime sometimes returns CL_INVALID_ARG_VALUE if the function address // cannot be found but the kernel exists. As the kernel does exist, return @@ -535,7 +562,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetGlobalVariablePointer( void **ppGlobalVariablePointerRet) { cl_context CLContext = nullptr; - CL_RETURN_ON_FAILURE(clGetProgramInfo(hProgram->get(), CL_PROGRAM_CONTEXT, + CL_RETURN_ON_FAILURE(clGetProgramInfo(hProgram->CLProgram, CL_PROGRAM_CONTEXT, sizeof(CLContext), &CLContext, nullptr)); @@ -547,7 +574,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetGlobalVariablePointer( cl_ext::GetDeviceGlobalVariablePointerName, &FuncT)); const cl_int CLResult = - FuncT(hDevice->get(), hProgram->get(), pGlobalVariableName, + FuncT(hDevice->CLDevice, hProgram->CLProgram, pGlobalVariableName, pGlobalVariableSizeRet, ppGlobalVariablePointerRet); if (CLResult != CL_SUCCESS) { diff --git a/source/adapters/opencl/program.hpp b/source/adapters/opencl/program.hpp index f2c065d895..b97a2feb0f 100644 --- a/source/adapters/opencl/program.hpp +++ b/source/adapters/opencl/program.hpp @@ -16,13 +16,13 @@ struct ur_program_handle_t_ { using native_type = cl_program; - native_type Program; + native_type CLProgram; ur_context_handle_t Context; std::atomic RefCount = 0; bool IsNativeHandleOwned = true; ur_program_handle_t_(native_type Prog, ur_context_handle_t Ctx) - : Program(Prog), Context(Ctx) { + : CLProgram(Prog), Context(Ctx) { RefCount = 1; urContextRetain(Context); } @@ -30,7 +30,7 @@ struct ur_program_handle_t_ { ~ur_program_handle_t_() { urContextRelease(Context); if (IsNativeHandleOwned) { - clReleaseProgram(Program); + clReleaseProgram(CLProgram); } } @@ -42,29 +42,5 @@ struct ur_program_handle_t_ { static ur_result_t makeWithNative(native_type NativeProg, ur_context_handle_t Context, - ur_program_handle_t &Program) { - if (!Context) { - return UR_RESULT_ERROR_INVALID_NULL_HANDLE; - } - try { - cl_context CLContext; - CL_RETURN_ON_FAILURE(clGetProgramInfo(NativeProg, CL_PROGRAM_CONTEXT, - sizeof(CLContext), &CLContext, - nullptr)); - if (Context->get() != CLContext) { - return UR_RESULT_ERROR_INVALID_CONTEXT; - } - auto URProgram = - std::make_unique(NativeProg, Context); - Program = URProgram.release(); - } catch (std::bad_alloc &) { - return UR_RESULT_ERROR_OUT_OF_RESOURCES; - } catch (...) { - return UR_RESULT_ERROR_UNKNOWN; - } - - return UR_RESULT_SUCCESS; - } - - native_type get() { return Program; } + ur_program_handle_t &Program); }; diff --git a/source/adapters/opencl/queue.cpp b/source/adapters/opencl/queue.cpp index 68f59d4563..80353ed7f5 100644 --- a/source/adapters/opencl/queue.cpp +++ b/source/adapters/opencl/queue.cpp @@ -70,6 +70,41 @@ mapCLQueuePropsToUR(const cl_command_queue_properties &Properties) { return Flags; } +ur_result_t ur_queue_handle_t_::makeWithNative(native_type NativeQueue, + ur_context_handle_t Context, + ur_device_handle_t Device, + ur_queue_handle_t &Queue) { + try { + cl_context CLContext; + CL_RETURN_ON_FAILURE(clGetCommandQueueInfo( + NativeQueue, CL_QUEUE_CONTEXT, sizeof(CLContext), &CLContext, nullptr)); + cl_device_id CLDevice; + CL_RETURN_ON_FAILURE(clGetCommandQueueInfo( + NativeQueue, CL_QUEUE_DEVICE, sizeof(CLDevice), &CLDevice, nullptr)); + if (Context->CLContext != CLContext) { + return UR_RESULT_ERROR_INVALID_CONTEXT; + } + if (Device) { + if (Device->CLDevice != CLDevice) { + return UR_RESULT_ERROR_INVALID_DEVICE; + } + } else { + ur_native_handle_t hNativeHandle = + reinterpret_cast(CLDevice); + UR_RETURN_ON_FAILURE(urDeviceCreateWithNativeHandle( + hNativeHandle, nullptr, nullptr, &Device)); + } + auto URQueue = + std::make_unique(NativeQueue, Context, Device); + Queue = URQueue.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + return UR_RESULT_SUCCESS; +} + UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( ur_context_handle_t hContext, ur_device_handle_t hDevice, const ur_queue_properties_t *pProperties, ur_queue_handle_t *phQueue) { @@ -92,7 +127,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( if (Version < oclv::V2_0) { cl_command_queue Queue = - clCreateCommandQueue(hContext->get(), hDevice->get(), + clCreateCommandQueue(hContext->CLContext, hDevice->CLDevice, CLProperties & SupportByOpenCL, &RetErr); CL_RETURN_ON_FAILURE(RetErr); try { @@ -112,7 +147,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( cl_queue_properties CreationFlagProperties[] = { CL_QUEUE_PROPERTIES, CLProperties & SupportByOpenCL, 0}; cl_command_queue Queue = clCreateCommandQueueWithProperties( - hContext->get(), hDevice->get(), CreationFlagProperties, &RetErr); + hContext->CLContext, hDevice->CLDevice, CreationFlagProperties, &RetErr); CL_RETURN_ON_FAILURE(RetErr); try { auto URQueue = @@ -149,7 +184,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueGetInfo(ur_queue_handle_t hQueue, case UR_QUEUE_INFO_FLAGS: { cl_command_queue_properties QueueProperties = 0; CL_RETURN_ON_FAILURE(clGetCommandQueueInfo( - hQueue->get(), CLCommandQueueInfo, sizeof(QueueProperties), + hQueue->CLQueue, CLCommandQueueInfo, sizeof(QueueProperties), &QueueProperties, nullptr)); return ReturnValue(mapCLQueuePropsToUR(QueueProperties)); @@ -159,7 +194,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueGetInfo(ur_queue_handle_t hQueue, } default: { size_t CheckPropSize = 0; - cl_int RetErr = clGetCommandQueueInfo(hQueue->get(), CLCommandQueueInfo, + cl_int RetErr = clGetCommandQueueInfo(hQueue->CLQueue, CLCommandQueueInfo, propSize, pPropValue, &CheckPropSize); if (pPropValue && CheckPropSize != propSize) { return UR_RESULT_ERROR_INVALID_SIZE; @@ -177,7 +212,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueGetInfo(ur_queue_handle_t hQueue, UR_APIEXPORT ur_result_t UR_APICALL urQueueGetNativeHandle(ur_queue_handle_t hQueue, ur_queue_native_desc_t *, ur_native_handle_t *phNativeQueue) { - return getNativeHandle(hQueue->get(), phNativeQueue); + return getNativeHandle(hQueue->CLQueue, phNativeQueue); } UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( @@ -198,13 +233,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( } UR_APIEXPORT ur_result_t UR_APICALL urQueueFinish(ur_queue_handle_t hQueue) { - cl_int RetErr = clFinish(hQueue->get()); + cl_int RetErr = clFinish(hQueue->CLQueue); CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urQueueFlush(ur_queue_handle_t hQueue) { - cl_int RetErr = clFinish(hQueue->get()); + cl_int RetErr = clFinish(hQueue->CLQueue); CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/queue.hpp b/source/adapters/opencl/queue.hpp index b3c05346cd..7b2924bfdd 100644 --- a/source/adapters/opencl/queue.hpp +++ b/source/adapters/opencl/queue.hpp @@ -17,7 +17,7 @@ struct ur_queue_handle_t_ { using native_type = cl_command_queue; - native_type Queue; + native_type CLQueue; ur_context_handle_t Context; ur_device_handle_t Device; std::atomic RefCount = 0; @@ -25,7 +25,7 @@ struct ur_queue_handle_t_ { ur_queue_handle_t_(native_type Queue, ur_context_handle_t Ctx, ur_device_handle_t Dev) - : Queue(Queue), Context(Ctx), Device(Dev) { + : CLQueue(Queue), Context(Ctx), Device(Dev) { RefCount = 1; if (Device) { urDeviceRetain(Device); @@ -36,38 +36,7 @@ struct ur_queue_handle_t_ { static ur_result_t makeWithNative(native_type NativeQueue, ur_context_handle_t Context, ur_device_handle_t Device, - ur_queue_handle_t &Queue) { - try { - cl_context CLContext; - CL_RETURN_ON_FAILURE(clGetCommandQueueInfo(NativeQueue, CL_QUEUE_CONTEXT, - sizeof(CLContext), &CLContext, - nullptr)); - cl_device_id CLDevice; - CL_RETURN_ON_FAILURE(clGetCommandQueueInfo( - NativeQueue, CL_QUEUE_DEVICE, sizeof(CLDevice), &CLDevice, nullptr)); - if (Context->get() != CLContext) { - return UR_RESULT_ERROR_INVALID_CONTEXT; - } - if (Device) { - if (Device->get() != CLDevice) { - return UR_RESULT_ERROR_INVALID_DEVICE; - } - } else { - ur_native_handle_t hNativeHandle = - reinterpret_cast(CLDevice); - UR_RETURN_ON_FAILURE(urDeviceCreateWithNativeHandle( - hNativeHandle, nullptr, nullptr, &Device)); - } - auto URQueue = - std::make_unique(NativeQueue, Context, Device); - Queue = URQueue.release(); - } catch (std::bad_alloc &) { - return UR_RESULT_ERROR_OUT_OF_RESOURCES; - } catch (...) { - return UR_RESULT_ERROR_UNKNOWN; - } - return UR_RESULT_SUCCESS; - } + ur_queue_handle_t &Queue); ~ur_queue_handle_t_() { if (Device) { @@ -75,7 +44,7 @@ struct ur_queue_handle_t_ { } urContextRelease(Context); if (IsNativeHandleOwned) { - clReleaseCommandQueue(Queue); + clReleaseCommandQueue(CLQueue); } } @@ -84,6 +53,4 @@ struct ur_queue_handle_t_ { uint32_t decrementReferenceCount() noexcept { return --RefCount; } uint32_t getReferenceCount() const noexcept { return RefCount; } - - native_type get() { return Queue; } }; diff --git a/source/adapters/opencl/sampler.cpp b/source/adapters/opencl/sampler.cpp index 7a8ebeba2f..1cd2532000 100644 --- a/source/adapters/opencl/sampler.cpp +++ b/source/adapters/opencl/sampler.cpp @@ -146,7 +146,7 @@ ur_result_t urSamplerCreate(ur_context_handle_t hContext, try { // Always call OpenCL 1.0 API cl_sampler Sampler = clCreateSampler( - hContext->get(), static_cast(pDesc->normalizedCoords), + hContext->CLContext, static_cast(pDesc->normalizedCoords), AddressingMode, FilterMode, &ErrorCode); CL_RETURN_ON_FAILURE(ErrorCode); auto URSampler = std::make_unique(Sampler, hContext); @@ -182,7 +182,7 @@ urSamplerGetInfo(ur_sampler_handle_t hSampler, ur_sampler_info_t propName, // between them. case UR_SAMPLER_INFO_NORMALIZED_COORDS: { cl_bool normalized_coords = false; - Err = mapCLErrorToUR(clGetSamplerInfo(hSampler->get(), SamplerInfo, + Err = mapCLErrorToUR(clGetSamplerInfo(hSampler->CLSampler, SamplerInfo, sizeof(cl_bool), &normalized_coords, nullptr)); if (pPropValue && propSize != sizeof(ur_bool_t)) { @@ -200,8 +200,9 @@ urSamplerGetInfo(ur_sampler_handle_t hSampler, ur_sampler_info_t propName, } default: { size_t CheckPropSize = 0; - ur_result_t Err = mapCLErrorToUR(clGetSamplerInfo( - hSampler->get(), SamplerInfo, propSize, pPropValue, &CheckPropSize)); + ur_result_t Err = + mapCLErrorToUR(clGetSamplerInfo(hSampler->CLSampler, SamplerInfo, + propSize, pPropValue, &CheckPropSize)); if (pPropValue && CheckPropSize != propSize) { return UR_RESULT_ERROR_INVALID_SIZE; } @@ -234,7 +235,7 @@ urSamplerRelease(ur_sampler_handle_t hSampler) { UR_APIEXPORT ur_result_t UR_APICALL urSamplerGetNativeHandle( ur_sampler_handle_t hSampler, ur_native_handle_t *phNativeSampler) { - *phNativeSampler = reinterpret_cast(hSampler->get()); + *phNativeSampler = reinterpret_cast(hSampler->CLSampler); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/sampler.hpp b/source/adapters/opencl/sampler.hpp index 49f8ec43eb..68839dbd7e 100644 --- a/source/adapters/opencl/sampler.hpp +++ b/source/adapters/opencl/sampler.hpp @@ -15,13 +15,13 @@ struct ur_sampler_handle_t_ { using native_type = cl_sampler; - native_type Sampler; + native_type CLSampler; ur_context_handle_t Context; std::atomic RefCount = 0; bool IsNativeHandleOwned = false; ur_sampler_handle_t_(native_type Sampler, ur_context_handle_t Ctx) - : Sampler(Sampler), Context(Ctx) { + : CLSampler(Sampler), Context(Ctx) { RefCount = 1; urContextRetain(Context); } @@ -29,7 +29,7 @@ struct ur_sampler_handle_t_ { ~ur_sampler_handle_t_() { urContextRelease(Context); if (IsNativeHandleOwned) { - clReleaseSampler(Sampler); + clReleaseSampler(CLSampler); } } @@ -38,6 +38,4 @@ struct ur_sampler_handle_t_ { uint32_t decrementReferenceCount() noexcept { return --RefCount; } uint32_t getReferenceCount() const noexcept { return RefCount; } - - native_type get() { return Sampler; } }; diff --git a/source/adapters/opencl/usm.cpp b/source/adapters/opencl/usm.cpp index 0c7a58443c..2b254d9268 100644 --- a/source/adapters/opencl/usm.cpp +++ b/source/adapters/opencl/usm.cpp @@ -111,7 +111,7 @@ urUSMHostAlloc(ur_context_handle_t Context, const ur_usm_desc_t *pUSMDesc, // First we need to look up the function pointer clHostMemAllocINTEL_fn FuncPtr = nullptr; - cl_context CLContext = Context->get(); + cl_context CLContext = Context->CLContext; if (auto UrResult = cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clHostMemAllocINTELCache, cl_ext::HostMemAllocName, &FuncPtr)) { @@ -159,7 +159,7 @@ urUSMDeviceAlloc(ur_context_handle_t Context, ur_device_handle_t hDevice, // First we need to look up the function pointer clDeviceMemAllocINTEL_fn FuncPtr = nullptr; - cl_context CLContext = Context->get(); + cl_context CLContext = Context->CLContext; if (auto UrResult = cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clDeviceMemAllocINTELCache, cl_ext::DeviceMemAllocName, &FuncPtr)) { @@ -168,7 +168,7 @@ urUSMDeviceAlloc(ur_context_handle_t Context, ur_device_handle_t hDevice, if (FuncPtr) { cl_int ClResult = CL_SUCCESS; - Ptr = FuncPtr(CLContext, hDevice->get(), + Ptr = FuncPtr(CLContext, hDevice->CLDevice, AllocProperties.empty() ? nullptr : AllocProperties.data(), size, Alignment, &ClResult); if (ClResult == CL_INVALID_BUFFER_SIZE) { @@ -207,7 +207,7 @@ urUSMSharedAlloc(ur_context_handle_t Context, ur_device_handle_t hDevice, // First we need to look up the function pointer clSharedMemAllocINTEL_fn FuncPtr = nullptr; - cl_context CLContext = Context->get(); + cl_context CLContext = Context->CLContext; if (auto UrResult = cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clSharedMemAllocINTELCache, cl_ext::SharedMemAllocName, &FuncPtr)) { @@ -216,7 +216,7 @@ urUSMSharedAlloc(ur_context_handle_t Context, ur_device_handle_t hDevice, if (FuncPtr) { cl_int ClResult = CL_SUCCESS; - Ptr = FuncPtr(CLContext, hDevice->get(), + Ptr = FuncPtr(CLContext, hDevice->CLDevice, AllocProperties.empty() ? nullptr : AllocProperties.data(), size, Alignment, static_cast(&ClResult)); if (ClResult == CL_INVALID_BUFFER_SIZE) { @@ -240,7 +240,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMFree(ur_context_handle_t Context, // might be still running. clMemBlockingFreeINTEL_fn FuncPtr = nullptr; - cl_context CLContext = Context->get(); + cl_context CLContext = Context->CLContext; ur_result_t RetVal = UR_RESULT_ERROR_INVALID_OPERATION; RetVal = cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clMemBlockingFreeINTELCache, @@ -258,7 +258,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( const void *pPattern, size_t size, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { // Have to look up the context from the kernel - cl_context CLContext = hQueue->Context->get(); + cl_context CLContext = hQueue->Context->CLContext; if (patternSize <= 128 && isPowerOf2(patternSize)) { clEnqueueMemFillINTEL_fn EnqueueMemFill = nullptr; @@ -269,9 +269,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( cl_event Event; std::vector CLWaitEvents(numEventsInWaitList); for (uint32_t i = 0; i < numEventsInWaitList; i++) { - CLWaitEvents[i] = phEventWaitList[i]->get(); + CLWaitEvents[i] = phEventWaitList[i]->CLEvent; } - CL_RETURN_ON_FAILURE(EnqueueMemFill(hQueue->get(), ptr, pPattern, + CL_RETURN_ON_FAILURE(EnqueueMemFill(hQueue->CLQueue, ptr, pPattern, patternSize, size, numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { @@ -320,9 +320,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( cl_event CopyEvent = nullptr; std::vector CLWaitEvents(numEventsInWaitList); for (uint32_t i = 0; i < numEventsInWaitList; i++) { - CLWaitEvents[i] = phEventWaitList[i]->get(); + CLWaitEvents[i] = phEventWaitList[i]->CLEvent; } - CL_RETURN_ON_FAILURE(USMMemcpy(hQueue->get(), false, ptr, HostBuffer, size, + CL_RETURN_ON_FAILURE(USMMemcpy(hQueue->CLQueue, false, ptr, HostBuffer, size, numEventsInWaitList, CLWaitEvents.data(), &CopyEvent)); @@ -383,7 +383,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { // Have to look up the context from the kernel - cl_context CLContext = hQueue->Context->get(); + cl_context CLContext = hQueue->Context->CLContext; clEnqueueMemcpyINTEL_fn FuncPtr = nullptr; ur_result_t RetVal = cl_ext::getExtFuncFromContext( @@ -394,9 +394,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( cl_event Event; std::vector CLWaitEvents(numEventsInWaitList); for (uint32_t i = 0; i < numEventsInWaitList; i++) { - CLWaitEvents[i] = phEventWaitList[i]->get(); + CLWaitEvents[i] = phEventWaitList[i]->CLEvent; } - RetVal = mapCLErrorToUR(FuncPtr(hQueue->get(), blocking, pDst, pSrc, size, + RetVal = mapCLErrorToUR(FuncPtr(hQueue->CLQueue, blocking, pDst, pSrc, size, numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { @@ -424,10 +424,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( cl_event Event; std::vector CLWaitEvents(numEventsInWaitList); for (uint32_t i = 0; i < numEventsInWaitList; i++) { - CLWaitEvents[i] = phEventWaitList[i]->get(); + CLWaitEvents[i] = phEventWaitList[i]->CLEvent; } CL_RETURN_ON_FAILURE(clEnqueueMarkerWithWaitList( - hQueue->get(), numEventsInWaitList, CLWaitEvents.data(), &Event)); + hQueue->CLQueue, numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { try { auto UREvent = @@ -454,7 +454,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( RetVal = Err; } else { RetVal = map_cl_error_to_ur( - FuncPtr(hQueue->get(), pMem, size, flags, + FuncPtr(hQueue->CLQueue, pMem, size, flags, numEventsInWaitList, reinterpret_cast(phEventWaitList), reinterpret_cast(phEvent))); @@ -468,7 +468,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMAdvise( ur_event_handle_t *phEvent) { cl_event Event; CL_RETURN_ON_FAILURE( - clEnqueueMarkerWithWaitList(hQueue->get(), 0, nullptr, &Event)); + clEnqueueMarkerWithWaitList(hQueue->CLQueue, 0, nullptr, &Event)); if (phEvent) { try { auto UREvent = @@ -496,7 +496,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMAdvise( RetVal = Err; } else { RetVal = - map_cl_error_to_ur(FuncPtr(hQueue->get(), pMem, + map_cl_error_to_ur(FuncPtr(hQueue->CLQueue, pMem, size, advice, 0, nullptr, reinterpret_cast(phEvent))); } */ @@ -518,7 +518,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( const void *pSrc, size_t srcPitch, size_t width, size_t height, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - cl_context CLContext = hQueue->Context->get(); + cl_context CLContext = hQueue->Context->CLContext; clEnqueueMemcpyINTEL_fn FuncPtr = nullptr; ur_result_t RetVal = cl_ext::getExtFuncFromContext( @@ -534,10 +534,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( cl_event Event = nullptr; std::vector CLWaitEvents(numEventsInWaitList); for (uint32_t i = 0; i < numEventsInWaitList; i++) { - CLWaitEvents[i] = phEventWaitList[i]->get(); + CLWaitEvents[i] = phEventWaitList[i]->CLEvent; } auto ClResult = - FuncPtr(hQueue->get(), false, + FuncPtr(hQueue->CLQueue, false, static_cast(pDst) + dstPitch * HeightIndex, static_cast(pSrc) + srcPitch * HeightIndex, width, numEventsInWaitList, CLWaitEvents.data(), &Event); @@ -555,7 +555,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( } if (phEvent && ClResult == CL_SUCCESS) { cl_event Event; - ClResult = clEnqueueBarrierWithWaitList(hQueue->get(), Events.size(), + ClResult = clEnqueueBarrierWithWaitList(hQueue->CLQueue, Events.size(), Events.data(), &Event); if (phEvent) { try { @@ -596,7 +596,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMGetMemAllocInfo( size_t propSize, void *pPropValue, size_t *pPropSizeRet) { clGetMemAllocInfoINTEL_fn GetMemAllocInfo = nullptr; - cl_context CLContext = Context->get(); + cl_context CLContext = Context->CLContext; UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clGetMemAllocInfoINTELCache, cl_ext::GetMemAllocInfoName, &GetMemAllocInfo)); @@ -623,7 +623,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMGetMemAllocInfo( return ReturnValue(Context->Devices[0]); } size_t CheckPropSize = 0; - cl_int ClErr = GetMemAllocInfo(Context->get(), pMem, PropNameCL, propSize, + cl_int ClErr = GetMemAllocInfo(Context->CLContext, pMem, PropNameCL, propSize, pPropValue, &CheckPropSize); if (pPropValue && CheckPropSize != propSize) { return UR_RESULT_ERROR_INVALID_SIZE;