@@ -60,6 +60,23 @@ UCS_F_DEVICE void ucp_device_request_init(uct_device_ep_t *device_ep,
6060}
6161
6262
63+ /* *
64+ * Macro for device put operations with retry logic
65+ */
66+ #define UCP_DEVICE_PUT_BLOCKING (_level, _uct_device_ep_put, _device_ep, ...) \
67+ ({ \
68+ ucs_status_t _status; \
69+ do { \
70+ _status = _uct_device_ep_put<_level>(_device_ep, __VA_ARGS__); \
71+ if (_status != UCS_ERR_NO_RESOURCE) { \
72+ break ; \
73+ } \
74+ _status = uct_device_ep_progress<_level>(_device_ep); \
75+ } while (!UCS_STATUS_IS_ERR (_status)); \
76+ _status; \
77+ })
78+
79+
6380UCS_F_DEVICE ucs_status_t ucp_device_prepare_single (
6481 ucp_device_mem_list_handle_h mem_list_h, unsigned mem_list_index,
6582 ucp_device_request_t *req, uct_device_ep_t *&device_ep,
@@ -115,6 +132,7 @@ ucp_device_prepare_multi(ucp_device_mem_list_handle_h mem_list_h,
115132 *
116133 * The routine returns a request that can be progressed and checked for
117134 * completion with @ref ucp_device_progress_req.
135+ * The routine returns only after the message has been posted or an error has occurred.
118136 *
119137 * This routine can be called repeatedly with the same handle and different
120138 * addresses and length. The flags parameter can be used to modify the behavior
@@ -149,8 +167,9 @@ UCS_F_DEVICE ucs_status_t ucp_device_put_single(
149167 return status;
150168 }
151169
152- return uct_device_ep_put_single<level>(device_ep, uct_elem, address,
153- remote_address, length, flags, comp);
170+ return UCP_DEVICE_PUT_BLOCKING (level, uct_device_ep_put_single, device_ep,
171+ uct_elem, address, remote_address, length,
172+ flags, comp);
154173}
155174
156175
@@ -225,6 +244,7 @@ UCS_F_DEVICE ucs_status_t ucp_device_counter_inc(
225244 *
226245 * The routine returns a request that can be progressed and checked for
227246 * completion with @ref ucp_device_progress_req.
247+ * The routine returns only after all the messages have been posted or an error has occurred.
228248 *
229249 * This routine can be called repeatedly with the same handle and different
230250 * @a addresses, @a lengths and counter related parameters. The @a flags
@@ -261,11 +281,11 @@ UCS_F_DEVICE ucs_status_t ucp_device_put_multi(
261281 return status;
262282 }
263283
264- return uct_device_ep_put_multi<level>(device_ep, uct_mem_list ,
265- mem_list_h->mem_list_length ,
266- addresses, remote_addresses, lengths,
267- counter_inc_value,
268- counter_remote_address, flags, comp);
284+ return UCP_DEVICE_PUT_BLOCKING (level, uct_device_ep_put_multi, device_ep ,
285+ uct_mem_list, mem_list_h->mem_list_length ,
286+ addresses, remote_addresses, lengths,
287+ counter_inc_value, counter_remote_address ,
288+ flags, comp);
269289}
270290
271291
@@ -292,6 +312,7 @@ UCS_F_DEVICE ucs_status_t ucp_device_put_multi(
292312 *
293313 * The routine returns a request that can be progressed and checked for
294314 * completion with @ref ucp_device_progress_req.
315+ * The routine returns only after all the messages have been posted or an error has occurred.
295316 *
296317 * This routine can be called repeatedly with the same handle and different
297318 * mem_list_indices, addresses, lengths and increment related parameters. The
@@ -334,10 +355,11 @@ UCS_F_DEVICE ucs_status_t ucp_device_put_multi_partial(
334355 return status;
335356 }
336357
337- return uct_device_ep_put_multi_partial<level>(
338- device_ep, uct_mem_list, mem_list_indices, mem_list_count,
339- addresses, remote_addresses, lengths, counter_index,
340- counter_inc_value, counter_remote_address, flags, comp);
358+ return UCP_DEVICE_PUT_BLOCKING (level, uct_device_ep_put_multi_partial,
359+ device_ep, uct_mem_list, mem_list_indices,
360+ mem_list_count, addresses, remote_addresses,
361+ lengths, counter_index, counter_inc_value,
362+ counter_remote_address, flags, comp);
341363}
342364
343365
@@ -364,6 +386,28 @@ UCS_F_DEVICE uint64_t ucp_device_counter_read(const void *counter_ptr)
364386}
365387
366388
389+ /* *
390+ * @ingroup UCP_DEVICE
391+ * @brief Write value to the counter memory area.
392+ *
393+ * This function can be used to set counter to a specific value.
394+ *
395+ * The counter memory area must be initialized with the host function
396+ * @ref ucp_device_counter_init.
397+ *
398+ * @tparam level Level of cooperation of the transfer.
399+ * @param [in] counter_ptr Counter memory area.
400+ * @param [in] value Value to write.
401+ *
402+ */
403+ template <ucs_device_level_t level = UCS_DEVICE_LEVEL_THREAD>
404+ UCS_F_DEVICE void ucp_device_counter_write (void *counter_ptr, uint64_t value)
405+ {
406+ return ucs_device_atomic64_write (
407+ reinterpret_cast <uint64_t *>(counter_ptr), value);
408+ }
409+
410+
367411/* *
368412 * @ingroup UCP_DEVICE
369413 * @brief Progress a device request containing a batch of operations.
@@ -390,7 +434,12 @@ UCS_F_DEVICE ucs_status_t ucp_device_progress_req(ucp_device_request_t *req)
390434 }
391435
392436 status = uct_device_ep_progress<level>(req->device_ep );
393- return (status != UCS_OK ? status : UCS_INPROGRESS);
437+ if (status != UCS_OK) {
438+ return status;
439+ }
440+
441+ return (ucs_likely (req->comp .count == 0 )) ? req->comp .status :
442+ UCS_INPROGRESS;
394443}
395444
396445#endif /* UCP_DEVICE_IMPL_H */
0 commit comments