From a0d09249e466d61481653d61c66cd47d36317112 Mon Sep 17 00:00:00 2001 From: Gene Cooperman Date: Sat, 2 Sep 2023 00:39:51 -0700 Subject: [PATCH 1/2] Print minLibsStart, maxLibsEnd, etc., during ckpt * This prints the values to easily show the values being passed from the time of checkpoint to the time of restart. It serves the developers, and it serves the end users by letting them know that a ckpt happened. * It's surrounded by '#if 1' to easily turn this off. Turning this off could be a configure option or mana_launch/restart flag in the future, if there's a demand for it. --- bin/mana_launch | 4 ++++ mpi-proxy-split/mpi_plugin.cpp | 39 ++++++++++++++++++++++++++-------- 2 files changed, 34 insertions(+), 9 deletions(-) diff --git a/bin/mana_launch b/bin/mana_launch index 3691a8270..591e21cf6 100755 --- a/bin/mana_launch +++ b/bin/mana_launch @@ -47,6 +47,10 @@ while [ -n "$1" ]; do options="$options $1 $2" shift else + if [ "$1" == --quiet ] || [ "$!" == "-q" ]; then + export MANA_QUIET=1 + # And --quiet is also a flag for DMTCP. Continue from here. + fi # other flags, options, and target_app executable options="$options $1" # The last word will be the target_app. diff --git a/mpi-proxy-split/mpi_plugin.cpp b/mpi-proxy-split/mpi_plugin.cpp index 0141322e8..95e013515 100644 --- a/mpi-proxy-split/mpi_plugin.cpp +++ b/mpi-proxy-split/mpi_plugin.cpp @@ -19,23 +19,23 @@ * . * ****************************************************************************/ -#ifdef SINGLE_CART_REORDER +#include +#include #include #include -#include #include -#include +#include +#include +#include +#include #include +#include +#ifdef SINGLE_CART_REORDER #include "cartesian.h" #endif #include /* For backtrace() */ #include /* For backtrace() */ -#include - -#include -#include -#include #include @@ -458,7 +458,8 @@ dmtcp_skip_truncate_file_at_restart(const char* path) MPI_Comm_rank(MPI_COMM_WORLD, &rank); snprintf(p2p_log_name, sizeof(p2p_log_name) - 1, P2P_LOG_MSG, rank); - snprintf(p2p_log_request_name, sizeof(p2p_log_request_name)-1, P2P_LOG_REQUEST, rank); + snprintf(p2p_log_request_name, sizeof(p2p_log_request_name)-1, + P2P_LOG_REQUEST, rank); if (strstr(path, p2p_log_name) || strstr(path, p2p_log_request_name)) { @@ -852,6 +853,26 @@ computeUnionOfCkptImageAddresses() dmtcp_add_to_ckpt_header("MANA_MinLibsStart", minLibsStartStr.c_str()); dmtcp_add_to_ckpt_header("MANA_MaxLibsEnd", maxLibsEndStr.c_str()); dmtcp_add_to_ckpt_header("MANA_MinHighMemStart", minHighMemStartStr.c_str()); + + if (!getenv("MANA_QUIET")) { + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + if (rank == 0) { + char time_string[30]; + time_t t = time(NULL); + strftime(time_string, sizeof(time_string), "%H:%M:%S", localtime(&t)); + fprintf(stderr, + "%s: *** MANA: Checkpointing; MANA info saved for restart:\n" + "%*c (Set env var MANA_QUIET to silence this.)\n", + time_string, (int)strlen(time_string), ' '); + fprintf(stderr, " %s: %s\n", + "MANA_MinLibsStart", minLibsStartStr.c_str()); + fprintf(stderr, " %s: %s\n", + "MANA_MaxLibsEnd", maxLibsEndStr.c_str()); + fprintf(stderr, "%s: %s\n", + " MANA_MinHighMemStart", minHighMemStartStr.c_str()); + } + } } const char * From ad6f9041852eebe0854ecbe2b9ced9a0a305c43e Mon Sep 17 00:00:00 2001 From: Gene Cooperman Date: Sat, 2 Sep 2023 00:51:07 -0700 Subject: [PATCH 2/2] A hack-ish fix to DMTCP not unmapping mtcp_restart * On second checkpoint (ckpt-restart-ckpt), The checkpoint image keeps a copy of mtcp_restart in its checkpoint image. * On the second restart, we try to restore the mtcp_restart of the ckpt image on top of the current mtcp_restart used for restart. This then segfaults, due to failure of mmap with MAP_FIXED_NOREPLACE. * So now we munmap it within MANA. --- restart_plugin/mtcp_restart_plugin.c | 32 ++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/restart_plugin/mtcp_restart_plugin.c b/restart_plugin/mtcp_restart_plugin.c index 4eb309cb8..4d55bf78a 100644 --- a/restart_plugin/mtcp_restart_plugin.c +++ b/restart_plugin/mtcp_restart_plugin.c @@ -682,6 +682,15 @@ mtcp_plugin_hook(RestoreInfo *rinfo) void mtcp_plugin_hook(RestoreInfo *rinfo) { + // FIXME: DMTCP should remove text/data/heap of mtcp_restart. + // For now, MANA has this workaround, in conjunction with + // mpi-proxy-split/mpi_plugin.cpp:computeUnionOfCkptImageAddresses + // When mtcp_restart starts at main, there is already a heap. + // In case anyone else calls sbrk(), this will create a gap after + // the text/data/heap of mtcp_restart. So, computeUnionOfCkptImageAddresses + // will munmap the text/data/heap, but nothing more. + mtcp_sys_brk((char *)0x11200000 + 0x30000); + remap_vdso_and_vvar_regions(rinfo); mysetauxval(rinfo->environ, AT_SYSINFO_EHDR, (unsigned long int) rinfo->currentVdsoStart); @@ -845,6 +854,29 @@ mtcp_plugin_hook(RestoreInfo *rinfo) int mtcp_plugin_skip_memory_region_munmap(Area *area, RestoreInfo *rinfo) { + // FIXME: All of this is a temporary workaround, until the DMTCP restart + // plugin can be re-designed. See the conversation in PR #357. + // After the DMTCP re-design, we should delete all of the code + // of this paragraph. + // NOTE: 0x11200000 is the address for mtcp_restart. + // See LINKER_FLAGS= -Wl,-Ttext-segment=11200000 + // in dmtcp/src/mtcp/Makefile, for why this hard-wired addess exists. + // NOTE: This is the originally loaded mtcp_restart (text/data/heap), + // before we copied it to the DMTCP "hole" and execute from there. + if (is_overlap(area->addr, area->endAddr, + (char *)0x11200000, (char *)0x11200000 + 0x30000)) { + // Range [0x11200000, nextPageAddr] should cover mtcp_restart text/data/heap + void *nextPageAddr = (char *)0x11200000 + 0x30000; + void *testIfEmpty = mtcp_sys_mmap(nextPageAddr, 4096, + PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + // MANA panic: The next page after the assumed mtcp_restart memory regions + // was occupied. Is mtcp_restart larger than expected. + MTCP_ASSERT(testIfEmpty == nextPageAddr); + mtcp_sys_munmap(nextPageAddr, 4096); // The test passed. Free it again. + mtcp_sys_munmap((void *)0x11200000, 0x30000); // Unmap the old mtcp_restart. + return 0; + } + LowerHalfInfo_t *lh_info = &rinfo->pluginInfo; LhCoreRegions_t *lh_regions_list = NULL; int total_lh_regions = lh_info->numCoreRegions;