From e8db1b5cc4356f3a0aae7e3647261e4cf2dc11eb Mon Sep 17 00:00:00 2001 From: Leonid Belyaev Date: Wed, 16 Aug 2023 19:28:02 -0400 Subject: [PATCH 1/5] ADDED: MPI_Comm_internal_vgroup, creating a virtual group without logging. FIXED: Cleanup at end of registerLocalSendsAndRecvs --- .../mpi-wrappers/mpi_group_wrappers.cpp | 17 +++++++++++++++++ mpi-proxy-split/p2p_drain_send_recv.cpp | 7 ++++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/mpi-proxy-split/mpi-wrappers/mpi_group_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_group_wrappers.cpp index e6e403191..7880e834a 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_group_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_group_wrappers.cpp @@ -50,6 +50,23 @@ USER_DEFINED_WRAPPER(int, Comm_group, (MPI_Comm) comm, (MPI_Group *) group) return retval; } +int +MPI_Comm_internal_vgroup(MPI_Comm comm, MPI_Group *group) +{ + int retval; + DMTCP_PLUGIN_DISABLE_CKPT(); + MPI_Comm realComm = VIRTUAL_TO_REAL_COMM(comm); + JUMP_TO_LOWER_HALF(lh_info.fsaddr); + retval = NEXT_FUNC(Comm_group)(realComm, group); + RETURN_TO_UPPER_HALF(); + if (retval == MPI_SUCCESS) { + MPI_Group virtGroup = ADD_NEW_GROUP(*group); + *group = virtGroup; + } + DMTCP_PLUGIN_ENABLE_CKPT(); + return retval; +} + USER_DEFINED_WRAPPER(int, Group_size, (MPI_Group) group, (int *) size) { int retval; diff --git a/mpi-proxy-split/p2p_drain_send_recv.cpp b/mpi-proxy-split/p2p_drain_send_recv.cpp index c8bc46b27..68f855d5b 100644 --- a/mpi-proxy-split/p2p_drain_send_recv.cpp +++ b/mpi-proxy-split/p2p_drain_send_recv.cpp @@ -75,7 +75,7 @@ registerLocalSendsAndRecvs() // Get a copy of MPI_COMM_WORLD MPI_Group group_world; MPI_Comm mana_comm; - MPI_Comm_group(MPI_COMM_WORLD, &group_world); + MPI_Comm_internal_vgroup(MPI_COMM_WORLD, &group_world); MPI_Comm_create_group_internal(MPI_COMM_WORLD, group_world, 1, &mana_comm); // broadcast sendBytes and recvBytes @@ -84,7 +84,12 @@ registerLocalSendsAndRecvs() g_bytesSentToUsByRank[g_world_rank] = 0; // Free resources + // Semantics -- although mana_comm IS a real id, and MPI_Comm_free_internal does look up a virtual id to remove, since it cannot find the virtual id, the real id is returned back and freed in the lh. So there is no leak with mana_comm. MPI_Comm_free_internal(&mana_comm); + + // Because group_world is a virtual group, we have to free both its virtual and real id to clean up correctly. + MPI_Group_free_internal(&group_world); + REMOVE_OLD_GROUP(group_world); } // status was received by MPI_Iprobe From a88dc36436cfb77ea47e855e8740023fc63cdc41 Mon Sep 17 00:00:00 2001 From: Leonid Belyaev Date: Wed, 16 Aug 2023 19:33:23 -0400 Subject: [PATCH 2/5] extern function declaration --- mpi-proxy-split/p2p_drain_send_recv.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/mpi-proxy-split/p2p_drain_send_recv.cpp b/mpi-proxy-split/p2p_drain_send_recv.cpp index 68f855d5b..709d75a50 100644 --- a/mpi-proxy-split/p2p_drain_send_recv.cpp +++ b/mpi-proxy-split/p2p_drain_send_recv.cpp @@ -45,6 +45,7 @@ extern int MPI_Comm_create_group_internal(MPI_Comm comm, MPI_Group group, extern int MPI_Comm_free_internal(MPI_Comm *comm); extern int MPI_Comm_group_internal(MPI_Comm comm, MPI_Group *group); extern int MPI_Group_free_internal(MPI_Group *group); +extern int MPI_Comm_internal_vgroup(MPI_Comm comm, MPI_Group *group); int *g_sendBytesByRank; // Number of bytes sent to other ranks int *g_rsendBytesByRank; // Number of bytes sent to other ranks by MPI_rsend int *g_bytesSentToUsByRank; // Number of bytes other ranks sent to us From f6e5001201caeaea0d581c2acbf32066a6f531e4 Mon Sep 17 00:00:00 2001 From: Leonid Belyaev Date: Thu, 17 Aug 2023 13:02:43 -0400 Subject: [PATCH 3/5] Improve readability and commentary --- mpi-proxy-split/mpi-wrappers/mpi_group_wrappers.cpp | 4 +++- mpi-proxy-split/p2p_drain_send_recv.cpp | 11 +++++++---- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/mpi-proxy-split/mpi-wrappers/mpi_group_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_group_wrappers.cpp index 7880e834a..2ceb0fcca 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_group_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_group_wrappers.cpp @@ -50,8 +50,10 @@ USER_DEFINED_WRAPPER(int, Comm_group, (MPI_Comm) comm, (MPI_Group *) group) return retval; } +// Calls MPI_Comm_group to define a new group for internal purposes. +// See: p2p_drain_send_recv.cpp int -MPI_Comm_internal_vgroup(MPI_Comm comm, MPI_Group *group) +MPI_Comm_internal_virt_group(MPI_Comm comm, MPI_Group *group) { int retval; DMTCP_PLUGIN_DISABLE_CKPT(); diff --git a/mpi-proxy-split/p2p_drain_send_recv.cpp b/mpi-proxy-split/p2p_drain_send_recv.cpp index 709d75a50..84a29b3bd 100644 --- a/mpi-proxy-split/p2p_drain_send_recv.cpp +++ b/mpi-proxy-split/p2p_drain_send_recv.cpp @@ -45,7 +45,7 @@ extern int MPI_Comm_create_group_internal(MPI_Comm comm, MPI_Group group, extern int MPI_Comm_free_internal(MPI_Comm *comm); extern int MPI_Comm_group_internal(MPI_Comm comm, MPI_Group *group); extern int MPI_Group_free_internal(MPI_Group *group); -extern int MPI_Comm_internal_vgroup(MPI_Comm comm, MPI_Group *group); +extern int MPI_Comm_internal_virt_group(MPI_Comm comm, MPI_Group *group); int *g_sendBytesByRank; // Number of bytes sent to other ranks int *g_rsendBytesByRank; // Number of bytes sent to other ranks by MPI_rsend int *g_bytesSentToUsByRank; // Number of bytes other ranks sent to us @@ -76,7 +76,7 @@ registerLocalSendsAndRecvs() // Get a copy of MPI_COMM_WORLD MPI_Group group_world; MPI_Comm mana_comm; - MPI_Comm_internal_vgroup(MPI_COMM_WORLD, &group_world); + MPI_Comm_internal_virt_group(MPI_COMM_WORLD, &group_world); MPI_Comm_create_group_internal(MPI_COMM_WORLD, group_world, 1, &mana_comm); // broadcast sendBytes and recvBytes @@ -85,10 +85,13 @@ registerLocalSendsAndRecvs() g_bytesSentToUsByRank[g_world_rank] = 0; // Free resources - // Semantics -- although mana_comm IS a real id, and MPI_Comm_free_internal does look up a virtual id to remove, since it cannot find the virtual id, the real id is returned back and freed in the lh. So there is no leak with mana_comm. + // mana_comm is a real id, and MPI_Comm_free_internal expects a + // virtual id, but it works out because virtualToReal(real_id) is + // defined to be real_id. MPI_Comm_free_internal(&mana_comm); - // Because group_world is a virtual group, we have to free both its virtual and real id to clean up correctly. + // Because group_world is a virtual group, we have to free both its + // virtual and real id to clean up correctly. MPI_Group_free_internal(&group_world); REMOVE_OLD_GROUP(group_world); } From d3f11336798b79ac2820fff97762e19f63abc3ed Mon Sep 17 00:00:00 2001 From: Kapil Arya Date: Mon, 28 Aug 2023 11:25:43 -0700 Subject: [PATCH 4/5] Don't skip munmap of mtcp_restart regions. --- restart_plugin/mtcp_restart_plugin.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/restart_plugin/mtcp_restart_plugin.c b/restart_plugin/mtcp_restart_plugin.c index 4eb309cb8..a583f9127 100644 --- a/restart_plugin/mtcp_restart_plugin.c +++ b/restart_plugin/mtcp_restart_plugin.c @@ -849,6 +849,11 @@ mtcp_plugin_skip_memory_region_munmap(Area *area, RestoreInfo *rinfo) LhCoreRegions_t *lh_regions_list = NULL; int total_lh_regions = lh_info->numCoreRegions; + // Don't skip munmap of mtcp_restart regions. + if (mtcp_strendswith(area->name, "/mtcp_restart")) { + return 0; + } + if (regionContains(rinfo->pluginInfo.memRange.start, rinfo->pluginInfo.memRange.end, area->addr, area->endAddr)) { From 2c3aaa35544d87096cee6aa56b9e2284cc71f5de Mon Sep 17 00:00:00 2001 From: Kapil Arya Date: Mon, 28 Aug 2023 12:06:49 -0700 Subject: [PATCH 5/5] Munmap heap on restart. --- restart_plugin/mtcp_restart_plugin.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/restart_plugin/mtcp_restart_plugin.c b/restart_plugin/mtcp_restart_plugin.c index a583f9127..7701b6072 100644 --- a/restart_plugin/mtcp_restart_plugin.c +++ b/restart_plugin/mtcp_restart_plugin.c @@ -850,7 +850,8 @@ mtcp_plugin_skip_memory_region_munmap(Area *area, RestoreInfo *rinfo) int total_lh_regions = lh_info->numCoreRegions; // Don't skip munmap of mtcp_restart regions. - if (mtcp_strendswith(area->name, "/mtcp_restart")) { + if (mtcp_strendswith(area->name, "/mtcp_restart") || + mtcp_strendswith(area->name, "[heap]")) { return 0; }