@@ -20,6 +20,60 @@ namespace sycl {
20
20
inline namespace _V1 {
21
21
namespace detail ::enqueue_kernel_launch {
22
22
23
+ void handleOutOfResources (const device_impl &DeviceImpl, pi_kernel Kernel,
24
+ const NDRDescT &NDRDesc) {
25
+ sycl::platform Platform = DeviceImpl.get_platform ();
26
+ sycl::backend Backend = Platform.get_backend ();
27
+ if (Backend == sycl::backend::ext_oneapi_cuda) {
28
+ // PI_ERROR_OUT_OF_RESOURCES is returned when the kernel registers
29
+ // required for the launch config exceeds the maximum number of registers
30
+ // per block (PI_EXT_CODEPLAY_DEVICE_INFO_MAX_REGISTERS_PER_WORK_GROUP).
31
+ // This is if local_work_size[0] * ... * local_work_size[work_dim - 1]
32
+ // multiplied by PI_KERNEL_GROUP_INFO_NUM_REGS is greater than the value
33
+ // of PI_KERNEL_MAX_NUM_REGISTERS_PER_BLOCK. See Table 15: Technical
34
+ // Specifications per Compute Capability, for limitations.
35
+ const size_t TotalNumberOfWIs =
36
+ NDRDesc.LocalSize [0 ] * NDRDesc.LocalSize [1 ] * NDRDesc.LocalSize [2 ];
37
+
38
+ const uint32_t MaxRegistersPerBlock =
39
+ DeviceImpl.get_info <ext::codeplay::experimental::info::device::
40
+ max_registers_per_work_group>();
41
+
42
+ const PluginPtr &Plugin = DeviceImpl.getPlugin ();
43
+ sycl::detail::pi::PiDevice Device = DeviceImpl.getHandleRef ();
44
+
45
+ uint32_t NumRegisters = 0 ;
46
+ Plugin->call <PiApiKind::piKernelGetGroupInfo>(
47
+ Kernel, Device, PI_KERNEL_GROUP_INFO_NUM_REGS, sizeof (NumRegisters),
48
+ &NumRegisters, nullptr );
49
+
50
+ const bool HasExceededAvailableRegisters =
51
+ TotalNumberOfWIs * NumRegisters > MaxRegistersPerBlock;
52
+
53
+ if (HasExceededAvailableRegisters) {
54
+ std::string message (
55
+ " Exceeded the number of registers available on the hardware.\n " );
56
+ throw sycl::exception (
57
+ sycl::make_error_code (sycl::errc::nd_range),
58
+ // Additional information which can be helpful to the user.
59
+ message.append (
60
+ " \t The number registers per work-group cannot exceed " +
61
+ std::to_string (MaxRegistersPerBlock) +
62
+ " for this kernel on this device.\n "
63
+ " \t The kernel uses " +
64
+ std::to_string (NumRegisters) +
65
+ " registers per work-item for a total of " +
66
+ std::to_string (TotalNumberOfWIs) +
67
+ " work-items per work-group.\n " ));
68
+ }
69
+ }
70
+ // Fallback
71
+ constexpr pi_result Error = PI_ERROR_OUT_OF_RESOURCES;
72
+ throw sycl::exception (sycl::make_error_code (sycl::errc::runtime),
73
+ " PI backend failed. PI backend returns:" +
74
+ codeToString (Error));
75
+ }
76
+
23
77
void handleInvalidWorkGroupSize (const device_impl &DeviceImpl, pi_kernel Kernel,
24
78
const NDRDescT &NDRDesc) {
25
79
sycl::platform Platform = DeviceImpl.get_platform ();
@@ -30,7 +84,6 @@ void handleInvalidWorkGroupSize(const device_impl &DeviceImpl, pi_kernel Kernel,
30
84
bool IsOpenCLV1x = false ; // Backend is OpenCL 1.x
31
85
bool IsOpenCLVGE20 = false ; // Backend is Greater or Equal to OpenCL 2.0
32
86
bool IsLevelZero = false ; // Backend is any OneAPI Level 0 version
33
- bool IsCuda = false ; // Backend is CUDA
34
87
auto Backend = Platform.get_backend ();
35
88
if (Backend == sycl::backend::opencl) {
36
89
std::string VersionString =
@@ -41,8 +94,6 @@ void handleInvalidWorkGroupSize(const device_impl &DeviceImpl, pi_kernel Kernel,
41
94
(VersionString.find (" 2." ) == 0 ) || (VersionString.find (" 3." ) == 0 );
42
95
} else if (Backend == sycl::backend::ext_oneapi_level_zero) {
43
96
IsLevelZero = true ;
44
- } else if (Backend == sycl::backend::ext_oneapi_cuda) {
45
- IsCuda = true ;
46
97
}
47
98
48
99
const PluginPtr &Plugin = DeviceImpl.getPlugin ();
@@ -243,46 +294,6 @@ void handleInvalidWorkGroupSize(const device_impl &DeviceImpl, pi_kernel Kernel,
243
294
// else unknown. fallback (below)
244
295
}
245
296
}
246
- } else if (IsCuda) {
247
- // CUDA:
248
- // PI_ERROR_INVALID_WORK_GROUP_SIZE is returned when the kernel registers
249
- // required for the launch config exceeds the maximum number of registers
250
- // per block (PI_EXT_CODEPLAY_DEVICE_INFO_MAX_REGISTERS_PER_WORK_GROUP).
251
- // This is if local_work_size[0] * ... * local_work_size[work_dim - 1]
252
- // multiplied by PI_KERNEL_GROUP_INFO_NUM_REGS is greater than the value
253
- // of PI_KERNEL_MAX_NUM_REGISTERS_PER_BLOCK. See Table 15: Technical
254
- // Specifications per Compute Capability, for limitations.
255
- const size_t TotalNumberOfWIs =
256
- NDRDesc.LocalSize [0 ] * NDRDesc.LocalSize [1 ] * NDRDesc.LocalSize [2 ];
257
-
258
- uint32_t NumRegisters = 0 ;
259
- Plugin->call <PiApiKind::piKernelGetGroupInfo>(
260
- Kernel, Device, PI_KERNEL_GROUP_INFO_NUM_REGS, sizeof (NumRegisters),
261
- &NumRegisters, nullptr );
262
-
263
- uint32_t MaxRegistersPerBlock =
264
- DeviceImpl.get_info <ext::codeplay::experimental::info::device::
265
- max_registers_per_work_group>();
266
-
267
- const bool HasExceededAvailableRegisters =
268
- TotalNumberOfWIs * NumRegisters > MaxRegistersPerBlock;
269
-
270
- if (HasExceededAvailableRegisters) {
271
- std::string message (
272
- " Exceeded the number of registers available on the hardware.\n " );
273
- throw sycl::nd_range_error (
274
- // Additional information which can be helpful to the user.
275
- message.append (
276
- " \t The number registers per work-group cannot exceed " +
277
- std::to_string (MaxRegistersPerBlock) +
278
- " for this kernel on this device.\n "
279
- " \t The kernel uses " +
280
- std::to_string (NumRegisters) +
281
- " registers per work-item for a total of " +
282
- std::to_string (TotalNumberOfWIs) +
283
- " work-items per work-group.\n " ),
284
- PI_ERROR_INVALID_WORK_GROUP_SIZE);
285
- }
286
297
} else {
287
298
// TODO: Decide what checks (if any) we need for the other backends
288
299
}
@@ -352,6 +363,9 @@ void handleErrorOrWarning(pi_result Error, const device_impl &DeviceImpl,
352
363
assert (Error != PI_SUCCESS &&
353
364
" Success is expected to be handled on caller side" );
354
365
switch (Error) {
366
+ case PI_ERROR_OUT_OF_RESOURCES:
367
+ return handleOutOfResources (DeviceImpl, Kernel, NDRDesc);
368
+
355
369
case PI_ERROR_INVALID_WORK_GROUP_SIZE:
356
370
return handleInvalidWorkGroupSize (DeviceImpl, Kernel, NDRDesc);
357
371
0 commit comments