diff --git a/bin/mana_launch b/bin/mana_launch index 3691a8270..591e21cf6 100755 --- a/bin/mana_launch +++ b/bin/mana_launch @@ -47,6 +47,10 @@ while [ -n "$1" ]; do options="$options $1 $2" shift else + if [ "$1" == --quiet ] || [ "$!" == "-q" ]; then + export MANA_QUIET=1 + # And --quiet is also a flag for DMTCP. Continue from here. + fi # other flags, options, and target_app executable options="$options $1" # The last word will be the target_app. diff --git a/mpi-proxy-split/mpi_plugin.cpp b/mpi-proxy-split/mpi_plugin.cpp index 0141322e8..95e013515 100644 --- a/mpi-proxy-split/mpi_plugin.cpp +++ b/mpi-proxy-split/mpi_plugin.cpp @@ -19,23 +19,23 @@ * . * ****************************************************************************/ -#ifdef SINGLE_CART_REORDER +#include +#include #include #include -#include #include -#include +#include +#include +#include +#include #include +#include +#ifdef SINGLE_CART_REORDER #include "cartesian.h" #endif #include /* For backtrace() */ #include /* For backtrace() */ -#include - -#include -#include -#include #include @@ -458,7 +458,8 @@ dmtcp_skip_truncate_file_at_restart(const char* path) MPI_Comm_rank(MPI_COMM_WORLD, &rank); snprintf(p2p_log_name, sizeof(p2p_log_name) - 1, P2P_LOG_MSG, rank); - snprintf(p2p_log_request_name, sizeof(p2p_log_request_name)-1, P2P_LOG_REQUEST, rank); + snprintf(p2p_log_request_name, sizeof(p2p_log_request_name)-1, + P2P_LOG_REQUEST, rank); if (strstr(path, p2p_log_name) || strstr(path, p2p_log_request_name)) { @@ -852,6 +853,26 @@ computeUnionOfCkptImageAddresses() dmtcp_add_to_ckpt_header("MANA_MinLibsStart", minLibsStartStr.c_str()); dmtcp_add_to_ckpt_header("MANA_MaxLibsEnd", maxLibsEndStr.c_str()); dmtcp_add_to_ckpt_header("MANA_MinHighMemStart", minHighMemStartStr.c_str()); + + if (!getenv("MANA_QUIET")) { + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + if (rank == 0) { + char time_string[30]; + time_t t = time(NULL); + strftime(time_string, sizeof(time_string), "%H:%M:%S", localtime(&t)); + fprintf(stderr, + "%s: *** MANA: Checkpointing; MANA info saved for restart:\n" + "%*c (Set env var MANA_QUIET to silence this.)\n", + time_string, (int)strlen(time_string), ' '); + fprintf(stderr, " %s: %s\n", + "MANA_MinLibsStart", minLibsStartStr.c_str()); + fprintf(stderr, " %s: %s\n", + "MANA_MaxLibsEnd", maxLibsEndStr.c_str()); + fprintf(stderr, "%s: %s\n", + " MANA_MinHighMemStart", minHighMemStartStr.c_str()); + } + } } const char * diff --git a/restart_plugin/mtcp_restart_plugin.c b/restart_plugin/mtcp_restart_plugin.c index 4eb309cb8..4d55bf78a 100644 --- a/restart_plugin/mtcp_restart_plugin.c +++ b/restart_plugin/mtcp_restart_plugin.c @@ -682,6 +682,15 @@ mtcp_plugin_hook(RestoreInfo *rinfo) void mtcp_plugin_hook(RestoreInfo *rinfo) { + // FIXME: DMTCP should remove text/data/heap of mtcp_restart. + // For now, MANA has this workaround, in conjunction with + // mpi-proxy-split/mpi_plugin.cpp:computeUnionOfCkptImageAddresses + // When mtcp_restart starts at main, there is already a heap. + // In case anyone else calls sbrk(), this will create a gap after + // the text/data/heap of mtcp_restart. So, computeUnionOfCkptImageAddresses + // will munmap the text/data/heap, but nothing more. + mtcp_sys_brk((char *)0x11200000 + 0x30000); + remap_vdso_and_vvar_regions(rinfo); mysetauxval(rinfo->environ, AT_SYSINFO_EHDR, (unsigned long int) rinfo->currentVdsoStart); @@ -845,6 +854,29 @@ mtcp_plugin_hook(RestoreInfo *rinfo) int mtcp_plugin_skip_memory_region_munmap(Area *area, RestoreInfo *rinfo) { + // FIXME: All of this is a temporary workaround, until the DMTCP restart + // plugin can be re-designed. See the conversation in PR #357. + // After the DMTCP re-design, we should delete all of the code + // of this paragraph. + // NOTE: 0x11200000 is the address for mtcp_restart. + // See LINKER_FLAGS= -Wl,-Ttext-segment=11200000 + // in dmtcp/src/mtcp/Makefile, for why this hard-wired addess exists. + // NOTE: This is the originally loaded mtcp_restart (text/data/heap), + // before we copied it to the DMTCP "hole" and execute from there. + if (is_overlap(area->addr, area->endAddr, + (char *)0x11200000, (char *)0x11200000 + 0x30000)) { + // Range [0x11200000, nextPageAddr] should cover mtcp_restart text/data/heap + void *nextPageAddr = (char *)0x11200000 + 0x30000; + void *testIfEmpty = mtcp_sys_mmap(nextPageAddr, 4096, + PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + // MANA panic: The next page after the assumed mtcp_restart memory regions + // was occupied. Is mtcp_restart larger than expected. + MTCP_ASSERT(testIfEmpty == nextPageAddr); + mtcp_sys_munmap(nextPageAddr, 4096); // The test passed. Free it again. + mtcp_sys_munmap((void *)0x11200000, 0x30000); // Unmap the old mtcp_restart. + return 0; + } + LowerHalfInfo_t *lh_info = &rinfo->pluginInfo; LhCoreRegions_t *lh_regions_list = NULL; int total_lh_regions = lh_info->numCoreRegions;