diff --git a/debian/changelog b/debian/changelog index 3360041a..23ac5120 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +mercury (2.4.1~rc5-1) unstable; urgency=medium + [ Jerome Soumagne ] + * Update to 2.4.1rc5 + + -- Jerome Soumagne Mon, 27 Oct 2025 16:00:00 -0600 + mercury (2.4.0-5) unstable; urgency=medium [ Joseph Moore ] * Update release number to differentiate from test RPMs for prior issue. diff --git a/mercury.spec b/mercury.spec index d26a792d..74187daa 100644 --- a/mercury.spec +++ b/mercury.spec @@ -1,7 +1,9 @@ Name: mercury -Version: 2.4.0 -Release: 5%{?dist} +Version: 2.4.1~rc5 +Release: 1%{?dist} +# --without libfabric build switch +%bcond_without libfabric # --without ucx build switch %bcond_without ucx @@ -15,13 +17,13 @@ License: BSD Group: Development/Libraries URL: http://mercury-hpc.github.io/ Source0: https://github.com/mercury-hpc/%{name}/releases/download/v%{dl_version}/%{name}-%{dl_version}.tar.bz2 -Patch0: na_ucx.patch -Patch1: na_ucx_ep_flush.patch -BuildRequires: libfabric-devel >= 1.20 BuildRequires: cmake BuildRequires: boost-devel BuildRequires: gcc-c++ +%if %{with libfabric} +BuildRequires: libfabric-devel >= 1.20 +%endif %if 0%{?suse_version} %if %{with ucx} BuildRequires: libucp-devel, libucs-devel, libuct-devel @@ -55,6 +57,16 @@ Requires: %{name}%{?_isa} = %{version}-%{release} Mercury development headers and libraries. +%if %{with libfabric} +%package libfabric +Summary: Mercury with libfabric +Requires: %{name}%{?_isa} = %{version}-%{release} + +%description libfabric +Mercury plugin to support the libfabric transport. +%endif + + %if %{with ucx} %package ucx Summary: Mercury with UCX @@ -96,7 +108,9 @@ Mercury plugin to support the UCX transport. %if %{with ucx} -DNA_USE_UCX:BOOL=ON \ %endif +%if %{with libfabric} -DNA_USE_OFI:BOOL=ON +%endif %cmake_build %install @@ -114,7 +128,11 @@ Mercury plugin to support the UCX transport. %{_bindir}/hg_* %{_bindir}/na_* %{_libdir}/*.so.* + +%if %{with libfabric} +%files libfabric %{_libdir}/mercury/libna_plugin_ofi.so +%endif %if %{with ucx} %files ucx @@ -132,8 +150,12 @@ Mercury plugin to support the UCX transport. %{_libdir}/cmake/ %changelog +* Mon Oct 27 2025 Jerome Soumagne - 2.4.1~rc5-1 +- Update to 2.4.1rc5 +- Separate libfabric plugin from main build to align with ucx plugin. + * Wed Jun 25 2025 Joseph Moore - 2.4.0-5 -- Update release number to differentiate from test RPMs for prior issue.. +- Update release number to differentiate from test RPMs for prior issue. * Tue Mar 11 2025 Joseph Moore - 2.4.0-4 - Change to addr_release for handling of "already present" warning. diff --git a/na_ucx.patch b/na_ucx.patch deleted file mode 100644 index 748b7b30..00000000 --- a/na_ucx.patch +++ /dev/null @@ -1,110 +0,0 @@ -diff --git a/src/na/na_ucx.c b/src/na/na_ucx.c -index 84eb8b0..e4b6676 100644 ---- a/src/na/na_ucx.c -+++ b/src/na/na_ucx.c -@@ -614,7 +614,7 @@ na_ucx_addr_map_update(struct na_ucx_class *na_ucx_class, - */ - static na_return_t - na_ucx_addr_map_remove( -- struct na_ucx_map *na_ucx_map, ucs_sock_addr_t *addr_key); -+ struct na_ucx_map *na_ucx_map, struct na_ucx_addr *remove_addr); - - /** - * Hash connection ID. -@@ -1688,8 +1688,12 @@ na_ucp_listener_conn_cb(ucp_conn_request_h conn_request, void *arg) - .addr = (const struct sockaddr *) &conn_request_attrs.client_address, - .addrlen = sizeof(conn_request_attrs.client_address)}; - na_ucx_addr = na_ucx_addr_map_lookup(&na_ucx_class->addr_map, &addr_key); -- NA_CHECK_SUBSYS_ERROR_NORET(addr, na_ucx_addr != NULL, error, -- "An entry is already present for this address"); -+ -+ if (na_ucx_addr != NULL) { -+ NA_LOG_SUBSYS_WARNING(addr, -+ "An entry is already present for this address"); -+ na_ucx_addr_release(na_ucx_addr); -+ } - - /* Insert new entry and create new address */ - na_ret = na_ucx_addr_map_insert(na_ucx_class, &na_ucx_class->addr_map, -@@ -1937,10 +1941,14 @@ na_ucp_ep_error_cb( - static void - na_ucp_ep_close(ucp_ep_h ep) - { -- ucs_status_ptr_t status_ptr = ucp_ep_close_nb(ep, UCP_EP_CLOSE_MODE_FORCE); -+ const ucp_request_param_t close_params = { -+ .op_attr_mask = UCP_OP_ATTR_FIELD_FLAGS, -+ .flags = UCP_EP_CLOSE_FLAG_FORCE}; -+ ucs_status_ptr_t status_ptr = ucp_ep_close_nbx(ep, &close_params); -+ - NA_CHECK_SUBSYS_ERROR_DONE(addr, - status_ptr != NULL && UCS_PTR_IS_ERR(status_ptr), -- "ucp_ep_close_nb() failed (%s)", -+ "ucp_ep_close_nbx() failed (%s)", - ucs_status_string(UCS_PTR_STATUS(status_ptr))); - } - -@@ -2722,7 +2730,7 @@ unlock: - - /*---------------------------------------------------------------------------*/ - static na_return_t --na_ucx_addr_map_remove(struct na_ucx_map *na_ucx_map, ucs_sock_addr_t *addr_key) -+na_ucx_addr_map_remove(struct na_ucx_map *na_ucx_map, struct na_ucx_addr *remove_addr) - { - struct na_ucx_addr *na_ucx_addr = NULL; - na_return_t ret = NA_SUCCESS; -@@ -2731,13 +2739,14 @@ na_ucx_addr_map_remove(struct na_ucx_map *na_ucx_map, ucs_sock_addr_t *addr_key) - hg_thread_rwlock_wrlock(&na_ucx_map->lock); - - na_ucx_addr = hg_hash_table_lookup( -- na_ucx_map->key_map, (hg_hash_table_key_t) addr_key); -- if (na_ucx_addr == HG_HASH_TABLE_NULL) -+ na_ucx_map->key_map, (hg_hash_table_key_t) &remove_addr->addr_key); -+ -+ if (na_ucx_addr == HG_HASH_TABLE_NULL || na_ucx_addr->ucp_ep != remove_addr->ucp_ep) - goto unlock; - - /* Remove addr key from primary map */ - rc = hg_hash_table_remove( -- na_ucx_map->key_map, (hg_hash_table_key_t) addr_key); -+ na_ucx_map->key_map, (hg_hash_table_key_t) &na_ucx_addr->addr_key); - NA_CHECK_SUBSYS_ERROR(addr, rc != 1, unlock, ret, NA_NOENTRY, - "hg_hash_table_remove() failed"); - -@@ -2841,7 +2850,7 @@ na_ucx_addr_release(struct na_ucx_addr *na_ucx_addr) - NA_UCX_PRINT_ADDR_KEY_INFO("Removing address", &na_ucx_addr->addr_key); - - na_ucx_addr_map_remove( -- &na_ucx_addr->na_ucx_class->addr_map, &na_ucx_addr->addr_key); -+ &na_ucx_addr->na_ucx_class->addr_map, na_ucx_addr); - } - - if (na_ucx_addr->ucp_ep != NULL) { -@@ -3023,6 +3032,18 @@ na_ucx_rma(struct na_ucx_class NA_UNUSED *na_ucx_class, na_context_t *context, - - /* There is no need to have a fully resolved address to start an RMA. - * This is only necessary for two-sided communication. */ -+ /* The above assumption is now in question, so the following will resolve -+ * the address if required. */ -+ -+ /* Check addr to ensure the EP for that addr is still valid */ -+ if (!(hg_atomic_get32(&na_ucx_addr->status) & NA_UCX_ADDR_RESOLVED)) { -+ ret = na_ucx_addr_map_update( -+ na_ucx_class, &na_ucx_class->addr_map, na_ucx_addr); -+ NA_CHECK_SUBSYS_NA_ERROR( -+ addr, error, ret, "Could not update NA UCX address"); -+ } -+ NA_CHECK_SUBSYS_ERROR(msg, na_ucx_addr->ucp_ep == NULL, error, ret, -+ NA_ADDRNOTAVAIL, "UCP endpoint is NULL for that address"); - - /* TODO UCX requires the remote key to be bound to the origin, do we need a - * new API? */ -@@ -3061,6 +3082,9 @@ na_ucx_rma_key_resolve(ucp_ep_h ep, struct na_ucx_mem_handle *na_ucx_mem_handle, - - hg_thread_mutex_lock(&na_ucx_mem_handle->rkey_unpack_lock); - -+ NA_CHECK_SUBSYS_ERROR( -+ mem, ep == NULL, error, ret, NA_INVALID_ARG, "Invalid endpoint (%p)", ep); -+ - switch (hg_atomic_get32(&na_ucx_mem_handle->type)) { - case NA_UCX_MEM_HANDLE_REMOTE_PACKED: { - ucs_status_t status = ucp_ep_rkey_unpack(ep, diff --git a/na_ucx_ep_flush.patch b/na_ucx_ep_flush.patch deleted file mode 100644 index f7b38d30..00000000 --- a/na_ucx_ep_flush.patch +++ /dev/null @@ -1,64 +0,0 @@ -diff --git a/src/na/na_ucx.c b/src/na/na_ucx.c -index 6e9c3b0..2f157da 100644 ---- a/src/na/na_ucx.c -+++ b/src/na/na_ucx.c -@@ -441,6 +441,12 @@ na_ucp_ep_create(ucp_worker_h worker, ucp_ep_params_t *ep_params, - static void - na_ucp_ep_error_cb(void *arg, ucp_ep_h ep, ucs_status_t status); - -+/** -+ * Flush endpoint. -+ */ -+static ucs_status_ptr_t -+na_ucp_ep_flush(ucp_ep_h ep); -+ - /** - * Close endpoint. - */ -@@ -1940,6 +1946,21 @@ na_ucp_ep_error_cb( - na_ucx_addr_ref_decr(na_ucx_addr); - } - -+/*---------------------------------------------------------------------------*/ -+static ucs_status_ptr_t -+na_ucp_ep_flush(ucp_ep_h ep) -+{ -+ const ucp_request_param_t flush_params = { -+ .op_attr_mask = 0}; -+ ucs_status_ptr_t status_ptr = ucp_ep_flush_nbx(ep, &flush_params); -+ -+ NA_CHECK_SUBSYS_ERROR_DONE(addr, -+ status_ptr != NULL && UCS_PTR_IS_ERR(status_ptr), -+ "ucp_ep_flush_nb() failed (%s)", -+ ucs_status_string(UCS_PTR_STATUS(status_ptr))); -+ return status_ptr; -+} -+ - /*---------------------------------------------------------------------------*/ - static void - na_ucp_ep_close(ucp_ep_h ep) -@@ -2859,8 +2880,23 @@ na_ucx_addr_release(struct na_ucx_addr *na_ucx_addr) - if (na_ucx_addr->ucp_ep != NULL) { - /* NB. for deserialized addresses that are not "connected" addresses, do - * not close the EP */ -- if (na_ucx_addr->worker_addr == NULL) -+ if (na_ucx_addr->worker_addr == NULL) { -+ if (!na_ucx_addr->na_ucx_class->ucp_listener) { -+ ucs_status_ptr_t status_ptr = na_ucp_ep_flush(na_ucx_addr->ucp_ep); -+ -+ if (UCS_PTR_IS_PTR(status_ptr)) { -+ ucs_status_t status; -+ -+ do { -+ ucp_worker_progress(na_ucx_addr->na_ucx_class->ucp_worker); -+ status = ucp_request_check_status(status_ptr); -+ } while (status == UCS_INPROGRESS); -+ ucp_request_free(status_ptr); -+ } -+ } -+ - na_ucp_ep_close(na_ucx_addr->ucp_ep); -+ } - na_ucx_addr->ucp_ep = NULL; - } - diff --git a/packaging/Dockerfile.mockbuild b/packaging/Dockerfile.mockbuild index d8d86ac9..b6cc4655 100644 --- a/packaging/Dockerfile.mockbuild +++ b/packaging/Dockerfile.mockbuild @@ -16,6 +16,7 @@ LABEL maintainer="daos@daos.groups.io" ARG REPO_FILE_URL ARG DAOS_LAB_CA_FILE_URL ARG REPOSITORY_NAME + # script to install OS updates basic tools and daos dependencies # COPY ./utils/scripts/install-fedora.sh /tmp/install.sh # script to setup local repo if available @@ -26,9 +27,9 @@ RUN chmod +x /tmp/repo-helper.sh && \ rm -f /tmp/repo-helper.sh # Install basic tools -RUN dnf -y install mock make \ - rpm-build createrepo rpmlint redhat-lsb-core git \ - python-srpm-macros rpmdevtools && \ +RUN dnf -y install mock make \ + rpm-build createrepo rpmlint git \ + python-srpm-macros rpmdevtools && \ dnf -y clean all # use same UID as host and default value of 1000 if not specified diff --git a/packaging/scripts/repo-helper-fedora.sh b/packaging/scripts/repo-helper-fedora.sh index baa70266..b567e00c 100644 --- a/packaging/scripts/repo-helper-fedora.sh +++ b/packaging/scripts/repo-helper-fedora.sh @@ -9,9 +9,26 @@ set -uex : "${FVERSION:=latest}" : "${REPOSITORY_NAME:=artifactory}" : "${archive:=}" -if [ "$FVERSION" != "latest" ]; then - archive="-archive" -fi + +is_fedora_eol() { + local eol_url fedora_version eol_date today + if [ -n "$REPO_FILE_URL" ]; then + eol_url="${REPO_FILE_URL%repo-files/}eol-proxy/fedora.json" + fedora_version=$(grep VERSION_ID /etc/os-release | cut -d= -f2 | \ + tr -d '"') + eol_date=$(curl -s "$eol_url" | sed 's/},{/}\n{/g' | \ + grep "cycle\":\"$fedora_version\"" | \ + sed -n 's/.*"eol":"\([^"]*\)".*/\1/p') + if [[ -z "$eol_date" ]]; then + return 1 # Assume NOT EOL if data missing + fi + today=$(date +%Y-%m-%d) + [[ "$today" > "$eol_date" ]] + return $? # Return 0 if EOL, 1 if not + else + return 1 # Assume NOT EOL if url is missing + fi +} # shellcheck disable=SC2120 disable_repos () { @@ -58,11 +75,14 @@ install_optional_ca() { if [ -n "$REPO_FILE_URL" ]; then install_curl install_optional_ca + if is_fedora_eol; then + archive="-archive" + fi mkdir -p /etc/yum.repos.d pushd /etc/yum.repos.d/ curl -k --noproxy '*' -sSf \ -o "daos_ci-fedora${archive}-${REPOSITORY_NAME}.repo" \ - "{$REPO_FILE_URL}daos_ci-fedora${archive}-${REPOSITORY_NAME}.repo" + "${REPO_FILE_URL}daos_ci-fedora${archive}-${REPOSITORY_NAME}.repo" disable_repos /etc/yum.repos.d/ popd fi