Skip to content
12 changes: 12 additions & 0 deletions src/ucp/api/device/ucp_device_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,10 @@ UCS_F_DEVICE ucs_status_t ucp_device_prepare_send(

if ((mem_list_h->version != UCP_DEVICE_MEM_LIST_VERSION_V1) ||
(first_mem_elem_index >= mem_list_h->mem_list_length)) {
ucs_device_error("invalid parameters: mem_list version=%u (expected %u), "
"first_mem_elem_index=%u, mem_list_length=%u",
mem_list_h->version, UCP_DEVICE_MEM_LIST_VERSION_V1,
first_mem_elem_index, mem_list_h->mem_list_length);
return UCS_ERR_INVALID_PARAM;
}

Expand Down Expand Up @@ -157,6 +161,8 @@ UCS_F_DEVICE ucs_status_t ucp_device_put_single(
status = ucp_device_prepare_send(mem_list_h, mem_list_index, req, device_ep,
uct_elem, comp);
if (status != UCS_OK) {
ucs_device_error("send prepare failed with %s, mem_list_index=%u",
ucs_device_status_string(status), mem_list_index);
return status;
}

Expand Down Expand Up @@ -214,6 +220,8 @@ UCS_F_DEVICE ucs_status_t ucp_device_counter_inc(
status = ucp_device_prepare_send(mem_list_h, mem_list_index, req, device_ep,
uct_elem, comp);
if (status != UCS_OK) {
ucs_device_error("send prepare failed with %s, mem_list_index=%u",
ucs_device_status_string(status), mem_list_index);
return status;
}

Expand Down Expand Up @@ -276,6 +284,8 @@ UCS_F_DEVICE ucs_status_t ucp_device_put_multi(
status = ucp_device_prepare_send(mem_list_h, 0, req, device_ep,
uct_mem_list, comp);
if (status != UCS_OK) {
ucs_device_error("send prepare failed with %s, mem_list_length=%u",
ucs_device_status_string(status), mem_list_h->mem_list_length);
return status;
}

Expand Down Expand Up @@ -362,6 +372,8 @@ UCS_F_DEVICE ucs_status_t ucp_device_put_multi_partial(
status = ucp_device_prepare_send(mem_list_h, 0, req, device_ep,
uct_mem_list, comp);
if (status != UCS_OK) {
ucs_device_error("send prepare failed with %s, mem_list_count=%u",
ucs_device_status_string(status), mem_list_count);
return status;
}

Expand Down
4 changes: 4 additions & 0 deletions src/ucp/core/ucp_device.c
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,7 @@ ucp_device_mem_list_create(ucp_ep_h ep,
uct_allocated_memory_t mem;

if (!(ep->flags & UCP_EP_FLAG_REMOTE_CONNECTED)) {
ucs_error("ep=%p didn't complete wireup", ep);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we need to remove that one eventually?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added a scoped handler in the test

return UCS_ERR_NOT_CONNECTED;
}

Expand All @@ -427,6 +428,8 @@ ucp_device_mem_list_create(ucp_ep_h ep,
&local_sys_dev, &local_md_map,
&mem_type);
if (status != UCS_OK) {
ucs_error("ep=%p check parameters failed: %s", ep,
ucs_status_string(status));
return status;
}

Expand Down Expand Up @@ -464,6 +467,7 @@ ucp_device_mem_list_create(ucp_ep_h ep,
/* Track memory allocator for later release */
status = ucp_device_mem_handle_hash_insert(&mem);
if (status != UCS_OK) {
ucs_error("failed to insert handle: %s", ucs_status_string(status));
uct_mem_free(&mem);
} else {
*handle_p = mem.address;
Expand Down
50 changes: 45 additions & 5 deletions src/ucs/sys/device_code.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
#define UCS_DEVICE_CODE_H

#include <ucs/sys/compiler_def.h>
#include <ucs/sys/string.h>
#include <ucs/type/status.h>
#include <stdint.h>

/*
Expand Down Expand Up @@ -35,8 +37,7 @@ typedef enum {
} ucs_device_level_t;


static UCS_F_ALWAYS_INLINE const char*
ucs_device_level_name(ucs_device_level_t level)
UCS_F_DEVICE const char *ucs_device_level_name(ucs_device_level_t level)
{
switch (level) {
case UCS_DEVICE_LEVEL_THREAD:
Expand Down Expand Up @@ -86,12 +87,34 @@ UCS_F_DEVICE void ucs_device_atomic64_write(uint64_t *ptr, uint64_t value)
}


/**
* @brief Device compatible basename function
*
* Get pointer to file name in path, same as basename but do not modify source
* string.
*
* @param [in] path Path to parse
*
* @return File name
*/
UCS_F_DEVICE const char *ucs_device_basename(const char *path)
{
return UCS_BASENAME(path);
}


/* Device log format - matches UCX host log structure */
#define UCS_DEVICE_LOG_FMT "%20s[%-8d:%-7d] %17s:%-4u %-4s %-5s %*s"


/* Helper macro to print a message from a device function including the
* thread and block indices */
#define ucs_device_printf(_title, _fmt, ...) \
printf("(%d:%d) %6s " _fmt "\n", threadIdx.x, blockIdx.x, _title, \
* thread and block indices, file and line */
#define ucs_device_printf(_level, _fmt, ...) \
printf(UCS_DEVICE_LOG_FMT _fmt "\n", "", threadIdx.x, blockIdx.x, \
ucs_device_basename(__FILE__), __LINE__, "UCX", _level, 0, "", \
##__VA_ARGS__)


/* Print an error message from a device function */
#define ucs_device_error(_fmt, ...) \
ucs_device_printf("ERROR", _fmt, ##__VA_ARGS__)
Expand All @@ -101,4 +124,21 @@ UCS_F_DEVICE void ucs_device_atomic64_write(uint64_t *ptr, uint64_t value)
#define ucs_device_debug(_fmt, ...) \
ucs_device_printf("DEBUG", _fmt, ##__VA_ARGS__)


/**
* @brief Device compatible status code to string conversion
*
* @param [in] status Status code to convert
*
* @return String representation of the status code
*/
UCS_F_DEVICE const char *ucs_device_status_string(ucs_status_t status)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we use original ucs_status_string both on host and device?
In this case we don't need UCS_STATUS_STRING_CASES macro

__host__ UCS_F_DEVICE const char *ucs_status_string(ucs_status_t status)

{
switch (status) {
UCS_STATUS_STRING_CASES
default:
return "Unknown error";
};
}

#endif
20 changes: 17 additions & 3 deletions src/ucs/sys/string.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,22 @@ BEGIN_C_DECLS
#define UCS_VALUE_UNKNOWN_STR "unknown"


/* Macro for basename implementation logic used in both host and device code */
#define UCS_BASENAME(_path) \
({ \
const char *_p = (_path); \
const char *_result = (_path); \
while (*_p != '\0') { \
if (*_p == '/') { \
_result = _p + 1; \
} \
_p++; \
} \
_result; \
})



/**
* Expand a partial path to full path.
*
Expand Down Expand Up @@ -210,9 +226,7 @@ char *ucs_strtrim(char *str);
*/
static UCS_F_ALWAYS_INLINE const char* ucs_basename(const char *path)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we do it without macro by declaring function as both host and device?

Suggested change
static UCS_F_ALWAYS_INLINE const char* ucs_basename(const char *path)
__host__ UCS_F_DEVICE const char* ucs_basename(const char *path)

{
const char *name = strrchr(path, '/');

return (name == NULL) ? path : name + 1;
return UCS_BASENAME(path);
}


Expand Down
57 changes: 1 addition & 56 deletions src/ucs/type/status.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,62 +18,7 @@ const char *ucs_status_string(ucs_status_t status)
static char error_str[128] = {0};

switch (status) {
case UCS_OK:
return "Success";
case UCS_INPROGRESS:
return "Operation in progress";
case UCS_ERR_NO_MESSAGE:
return "No pending message";
case UCS_ERR_NO_RESOURCE:
return "No resources are available to initiate the operation";
case UCS_ERR_IO_ERROR:
return "Input/output error";
case UCS_ERR_NO_MEMORY:
return "Out of memory";
case UCS_ERR_INVALID_PARAM:
return "Invalid parameter";
case UCS_ERR_UNREACHABLE:
return "Destination is unreachable";
case UCS_ERR_INVALID_ADDR:
return "Address not valid";
case UCS_ERR_NOT_IMPLEMENTED:
return "Function not implemented";
case UCS_ERR_MESSAGE_TRUNCATED:
return "Message truncated";
case UCS_ERR_NO_PROGRESS:
return "No progress";
case UCS_ERR_BUFFER_TOO_SMALL:
return "Provided buffer is too small";
case UCS_ERR_NO_ELEM:
return "No such element";
case UCS_ERR_SOME_CONNECTS_FAILED:
return "Failed to connect some of the requested endpoints";
case UCS_ERR_NO_DEVICE:
return "No such device";
case UCS_ERR_BUSY:
return "Device is busy";
case UCS_ERR_CANCELED:
return "Request canceled";
case UCS_ERR_SHMEM_SEGMENT:
return "Shared memory error";
case UCS_ERR_ALREADY_EXISTS:
return "Element already exists";
case UCS_ERR_OUT_OF_RANGE:
return "Index out of range";
case UCS_ERR_TIMED_OUT:
return "Operation timed out";
case UCS_ERR_EXCEEDS_LIMIT:
return "User-defined limit was reached";
case UCS_ERR_UNSUPPORTED:
return "Unsupported operation";
case UCS_ERR_REJECTED:
return "Operation rejected by remote peer";
case UCS_ERR_NOT_CONNECTED:
return "Endpoint is not connected";
case UCS_ERR_CONNECTION_RESET:
return "Connection reset by remote peer";
case UCS_ERR_ENDPOINT_TIMEOUT:
return "Endpoint timeout";
UCS_STATUS_STRING_CASES
default:
snprintf(error_str, sizeof(error_str) - 1, "Unknown error %d", status);
return error_str;
Expand Down
101 changes: 61 additions & 40 deletions src/ucs/type/status.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,50 @@ BEGIN_C_DECLS
* @}
*/

/**
* @ingroup UCS_RESOURCE
* @brief X-macro for defining status codes and their string representations
*
* This macro allows defining status codes and their associated messages in one
* place, avoiding duplication between enum definitions and string conversions.
*
* Usage: UCS_FOREACH_STATUS(_macro) where _macro(ID, VALUE, MSG) is expanded for each status.
*/
#define UCS_FOREACH_STATUS(_macro) \
_macro(UCS_OK, 0, "Success") \
_macro(UCS_INPROGRESS, 1, "Operation in progress") \
_macro(UCS_ERR_NO_MESSAGE, -1, "No pending message") \
_macro(UCS_ERR_NO_RESOURCE, -2, "No resources are available to initiate the operation") \
_macro(UCS_ERR_IO_ERROR, -3, "Input/output error") \
_macro(UCS_ERR_NO_MEMORY, -4, "Out of memory") \
_macro(UCS_ERR_INVALID_PARAM, -5, "Invalid parameter") \
_macro(UCS_ERR_UNREACHABLE, -6, "Destination is unreachable") \
_macro(UCS_ERR_INVALID_ADDR, -7, "Address not valid") \
_macro(UCS_ERR_NOT_IMPLEMENTED, -8, "Function not implemented") \
_macro(UCS_ERR_MESSAGE_TRUNCATED, -9, "Message truncated") \
_macro(UCS_ERR_NO_PROGRESS, -10, "No progress") \
_macro(UCS_ERR_BUFFER_TOO_SMALL, -11, "Provided buffer is too small") \
_macro(UCS_ERR_NO_ELEM, -12, "No such element") \
_macro(UCS_ERR_SOME_CONNECTS_FAILED, -13, "Failed to connect some of the requested endpoints") \
_macro(UCS_ERR_NO_DEVICE, -14, "No such device") \
_macro(UCS_ERR_BUSY, -15, "Device is busy") \
_macro(UCS_ERR_CANCELED, -16, "Request canceled") \
_macro(UCS_ERR_SHMEM_SEGMENT, -17, "Shared memory error") \
_macro(UCS_ERR_ALREADY_EXISTS, -18, "Element already exists") \
_macro(UCS_ERR_OUT_OF_RANGE, -19, "Index out of range") \
_macro(UCS_ERR_TIMED_OUT, -20, "Operation timed out") \
_macro(UCS_ERR_EXCEEDS_LIMIT, -21, "User-defined limit was reached") \
_macro(UCS_ERR_UNSUPPORTED, -22, "Unsupported operation") \
_macro(UCS_ERR_REJECTED, -23, "Operation rejected by remote peer") \
_macro(UCS_ERR_NOT_CONNECTED, -24, "Endpoint is not connected") \
_macro(UCS_ERR_CONNECTION_RESET, -25, "Connection reset by remote peer") \
_macro(UCS_ERR_FIRST_LINK_FAILURE, -40, "First link failure") \
_macro(UCS_ERR_LAST_LINK_FAILURE, -59, "Last link failure") \
_macro(UCS_ERR_FIRST_ENDPOINT_FAILURE, -60, "First endpoint failure") \
_macro(UCS_ERR_ENDPOINT_TIMEOUT, -80, "Endpoint timeout") \
_macro(UCS_ERR_LAST_ENDPOINT_FAILURE, -89, "Last endpoint failure") \
_macro(UCS_ERR_LAST, -100, "Last error code")

/**
* @ingroup UCS_RESOURCE
* @brief Status codes
Expand All @@ -42,47 +86,10 @@ BEGIN_C_DECLS
* For example, if a link fails it may be sufficient to destroy (and possibly
* replace) it, in contrast to an endpoint-level error.
*/
#define UCS_STATUS_ENUMIFY(ID, VALUE, _) ID = VALUE,

typedef enum {
/* Operation completed successfully */
UCS_OK = 0,

/* Operation is queued and still in progress */
UCS_INPROGRESS = 1,

/* Failure codes */
UCS_ERR_NO_MESSAGE = -1,
UCS_ERR_NO_RESOURCE = -2,
UCS_ERR_IO_ERROR = -3,
UCS_ERR_NO_MEMORY = -4,
UCS_ERR_INVALID_PARAM = -5,
UCS_ERR_UNREACHABLE = -6,
UCS_ERR_INVALID_ADDR = -7,
UCS_ERR_NOT_IMPLEMENTED = -8,
UCS_ERR_MESSAGE_TRUNCATED = -9,
UCS_ERR_NO_PROGRESS = -10,
UCS_ERR_BUFFER_TOO_SMALL = -11,
UCS_ERR_NO_ELEM = -12,
UCS_ERR_SOME_CONNECTS_FAILED = -13,
UCS_ERR_NO_DEVICE = -14,
UCS_ERR_BUSY = -15,
UCS_ERR_CANCELED = -16,
UCS_ERR_SHMEM_SEGMENT = -17,
UCS_ERR_ALREADY_EXISTS = -18,
UCS_ERR_OUT_OF_RANGE = -19,
UCS_ERR_TIMED_OUT = -20,
UCS_ERR_EXCEEDS_LIMIT = -21,
UCS_ERR_UNSUPPORTED = -22,
UCS_ERR_REJECTED = -23,
UCS_ERR_NOT_CONNECTED = -24,
UCS_ERR_CONNECTION_RESET = -25,

UCS_ERR_FIRST_LINK_FAILURE = -40,
UCS_ERR_LAST_LINK_FAILURE = -59,
UCS_ERR_FIRST_ENDPOINT_FAILURE = -60,
UCS_ERR_ENDPOINT_TIMEOUT = -80,
UCS_ERR_LAST_ENDPOINT_FAILURE = -89,

UCS_ERR_LAST = -100
UCS_FOREACH_STATUS(UCS_STATUS_ENUMIFY)
} UCS_S_PACKED ucs_status_t;


Expand Down Expand Up @@ -112,6 +119,20 @@ typedef void *ucs_status_ptr_t;
#define UCS_STATUS_PTR(_status) ((void*)(intptr_t)(_status))
#define UCS_STATUS_IS_ERR(_status) ((_status) < 0)

/**
* @brief Helper macro to generate switch case for status to string conversion
*/
#define UCS_STATUS_STRINGIFY(ID, _, MSG) \
case ID: \
return MSG;
/**
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

missing newline

* @brief Common status code to string cases
*
* This macro defines the common switch cases for converting status codes to
* strings. It's used by both the host and device implementations to avoid
* code duplication.
*/
#define UCS_STATUS_STRING_CASES UCS_FOREACH_STATUS(UCS_STATUS_STRINGIFY)

/**
* @param status UCS status code.
Expand Down
Loading
Loading