From 97aa1b86cc5dce91574db4c48a586783e55eba3b Mon Sep 17 00:00:00 2001 From: "fujianhao.fjh" Date: Mon, 5 Jan 2026 16:16:21 +0800 Subject: [PATCH] fix SIGSEGV when release resources --- src/modules/transport/ibgda/ibgda.cpp | 2 ++ src/modules/transport/ibrc/ibrc.cpp | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/src/modules/transport/ibgda/ibgda.cpp b/src/modules/transport/ibgda/ibgda.cpp index 115f6b6..0399625 100644 --- a/src/modules/transport/ibgda/ibgda.cpp +++ b/src/modules/transport/ibgda/ibgda.cpp @@ -4105,6 +4105,7 @@ int nvshmemt_ibgda_finalize(nvshmem_transport_t transport) { INFO(ibgda_state->log_level, "ibv_dealloc_pd failed for device %d Err: %d:%s.\n", i, errno, strerror(errno)); } + device->common_device.pd = NULL; } if (device->common_device.context) { @@ -4117,6 +4118,7 @@ int nvshmemt_ibgda_finalize(nvshmem_transport_t transport) { NVSHMEMI_WARN_PRINT("ibv_close_device failed for device %d Err: %d:%s.\n", i, errno, strerror(errno)); } + device->common_device.context = NULL; } status = 0; // NVSHMEMI_NZ_ERROR_JMP(status, NVSHMEMX_ERROR_INTERNAL, out, diff --git a/src/modules/transport/ibrc/ibrc.cpp b/src/modules/transport/ibrc/ibrc.cpp index f7c9ce0..2d8ca08 100644 --- a/src/modules/transport/ibrc/ibrc.cpp +++ b/src/modules/transport/ibrc/ibrc.cpp @@ -732,33 +732,39 @@ int nvshmemt_ibrc_finalize(nvshmem_transport_t transport) { status = ftable.dereg_mr(((struct ibrc_device *)state->devices)[dev_id].bpool_mr); NVSHMEMI_NZ_ERROR_JMP(status, NVSHMEMX_ERROR_INTERNAL, out, "ibv_dereg_mr failed \n"); + ((struct ibrc_device *)state->devices)[dev_id].bpool_mr = NULL; } if (((struct ibrc_device *)state->devices)[dev_id].send_cq) { status = ftable.destroy_cq(((struct ibrc_device *)state->devices)[dev_id].send_cq); NVSHMEMI_NZ_ERROR_JMP(status, NVSHMEMX_ERROR_INTERNAL, out, "ibv_destroy_cq failed \n"); + ((struct ibrc_device *)state->devices)[dev_id].send_cq = NULL; } if (((struct ibrc_device *)state->devices)[dev_id].recv_cq) { status = ftable.destroy_cq(((struct ibrc_device *)state->devices)[dev_id].recv_cq); NVSHMEMI_NZ_ERROR_JMP(status, NVSHMEMX_ERROR_INTERNAL, out, "ibv_destroy_cq failed \n"); + ((struct ibrc_device *)state->devices)[dev_id].recv_cq = NULL; } if (((struct ibrc_device *)state->devices)[dev_id].srq) { status = ftable.destroy_srq(((struct ibrc_device *)state->devices)[dev_id].srq); NVSHMEMI_NZ_ERROR_JMP(status, NVSHMEMX_ERROR_INTERNAL, out, "ibv_destroy_srq failed \n"); + ((struct ibrc_device *)state->devices)[dev_id].srq = NULL; } if (((struct ibrc_device *)state->devices)[dev_id].common_device.pd) { status = ftable.dealloc_pd( ((struct ibrc_device *)state->devices)[dev_id].common_device.pd); NVSHMEMI_NZ_ERROR_JMP(status, NVSHMEMX_ERROR_INTERNAL, out, "ibv_dealloc_pd failed \n"); + ((struct ibrc_device *)state->devices)[dev_id].pd = NULL; } if (((struct ibrc_device *)state->devices)[dev_id].common_device.context) { status = ftable.close_device( ((struct ibrc_device *)state->devices)[dev_id].common_device.context); NVSHMEMI_NZ_ERROR_JMP(status, NVSHMEMX_ERROR_INTERNAL, out, "ibv_close_device failed \n"); + ((struct ibrc_device *)state->devices)[dev_id].context = NULL; } } free(state->devices);