diff --git a/mpi-proxy-split/mpi-wrappers/mpi_group_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_group_wrappers.cpp index e6e403191..2ceb0fcca 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_group_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_group_wrappers.cpp @@ -50,6 +50,25 @@ USER_DEFINED_WRAPPER(int, Comm_group, (MPI_Comm) comm, (MPI_Group *) group) return retval; } +// Calls MPI_Comm_group to define a new group for internal purposes. +// See: p2p_drain_send_recv.cpp +int +MPI_Comm_internal_virt_group(MPI_Comm comm, MPI_Group *group) +{ + int retval; + DMTCP_PLUGIN_DISABLE_CKPT(); + MPI_Comm realComm = VIRTUAL_TO_REAL_COMM(comm); + JUMP_TO_LOWER_HALF(lh_info.fsaddr); + retval = NEXT_FUNC(Comm_group)(realComm, group); + RETURN_TO_UPPER_HALF(); + if (retval == MPI_SUCCESS) { + MPI_Group virtGroup = ADD_NEW_GROUP(*group); + *group = virtGroup; + } + DMTCP_PLUGIN_ENABLE_CKPT(); + return retval; +} + USER_DEFINED_WRAPPER(int, Group_size, (MPI_Group) group, (int *) size) { int retval; diff --git a/mpi-proxy-split/p2p_drain_send_recv.cpp b/mpi-proxy-split/p2p_drain_send_recv.cpp index c8bc46b27..84a29b3bd 100644 --- a/mpi-proxy-split/p2p_drain_send_recv.cpp +++ b/mpi-proxy-split/p2p_drain_send_recv.cpp @@ -45,6 +45,7 @@ extern int MPI_Comm_create_group_internal(MPI_Comm comm, MPI_Group group, extern int MPI_Comm_free_internal(MPI_Comm *comm); extern int MPI_Comm_group_internal(MPI_Comm comm, MPI_Group *group); extern int MPI_Group_free_internal(MPI_Group *group); +extern int MPI_Comm_internal_virt_group(MPI_Comm comm, MPI_Group *group); int *g_sendBytesByRank; // Number of bytes sent to other ranks int *g_rsendBytesByRank; // Number of bytes sent to other ranks by MPI_rsend int *g_bytesSentToUsByRank; // Number of bytes other ranks sent to us @@ -75,7 +76,7 @@ registerLocalSendsAndRecvs() // Get a copy of MPI_COMM_WORLD MPI_Group group_world; MPI_Comm mana_comm; - MPI_Comm_group(MPI_COMM_WORLD, &group_world); + MPI_Comm_internal_virt_group(MPI_COMM_WORLD, &group_world); MPI_Comm_create_group_internal(MPI_COMM_WORLD, group_world, 1, &mana_comm); // broadcast sendBytes and recvBytes @@ -84,7 +85,15 @@ registerLocalSendsAndRecvs() g_bytesSentToUsByRank[g_world_rank] = 0; // Free resources + // mana_comm is a real id, and MPI_Comm_free_internal expects a + // virtual id, but it works out because virtualToReal(real_id) is + // defined to be real_id. MPI_Comm_free_internal(&mana_comm); + + // Because group_world is a virtual group, we have to free both its + // virtual and real id to clean up correctly. + MPI_Group_free_internal(&group_world); + REMOVE_OLD_GROUP(group_world); } // status was received by MPI_Iprobe diff --git a/restart_plugin/mtcp_restart_plugin.c b/restart_plugin/mtcp_restart_plugin.c index 4eb309cb8..7701b6072 100644 --- a/restart_plugin/mtcp_restart_plugin.c +++ b/restart_plugin/mtcp_restart_plugin.c @@ -849,6 +849,12 @@ mtcp_plugin_skip_memory_region_munmap(Area *area, RestoreInfo *rinfo) LhCoreRegions_t *lh_regions_list = NULL; int total_lh_regions = lh_info->numCoreRegions; + // Don't skip munmap of mtcp_restart regions. + if (mtcp_strendswith(area->name, "/mtcp_restart") || + mtcp_strendswith(area->name, "[heap]")) { + return 0; + } + if (regionContains(rinfo->pluginInfo.memRange.start, rinfo->pluginInfo.memRange.end, area->addr, area->endAddr)) {