Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions bin/mana_launch
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@ while [ -n "$1" ]; do
options="$options $1 $2"
shift
else
if [ "$1" == --quiet ] || [ "$!" == "-q" ]; then
export MANA_QUIET=1
# And --quiet is also a flag for DMTCP. Continue from here.
fi
# other flags, options, and target_app executable
options="$options $1"
# The last word will be the target_app.
Expand Down
39 changes: 30 additions & 9 deletions mpi-proxy-split/mpi_plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,23 +19,23 @@
* <http://www.gnu.org/licenses/>. *
****************************************************************************/

#ifdef SINGLE_CART_REORDER
#include <fcntl.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/types.h>
#include <time.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/personality.h>
#include <sys/stat.h>
#include <sys/types.h>
#ifdef SINGLE_CART_REORDER
#include "cartesian.h"
#endif

#include <cxxabi.h> /* For backtrace() */
#include <execinfo.h> /* For backtrace() */
#include <fcntl.h>

#include <signal.h>
#include <sys/personality.h>
#include <sys/mman.h>

#include <regex>

Expand Down Expand Up @@ -458,7 +458,8 @@ dmtcp_skip_truncate_file_at_restart(const char* path)

MPI_Comm_rank(MPI_COMM_WORLD, &rank);
snprintf(p2p_log_name, sizeof(p2p_log_name) - 1, P2P_LOG_MSG, rank);
snprintf(p2p_log_request_name, sizeof(p2p_log_request_name)-1, P2P_LOG_REQUEST, rank);
snprintf(p2p_log_request_name, sizeof(p2p_log_request_name)-1,
P2P_LOG_REQUEST, rank);

if (strstr(path, p2p_log_name) ||
strstr(path, p2p_log_request_name)) {
Expand Down Expand Up @@ -852,6 +853,26 @@ computeUnionOfCkptImageAddresses()
dmtcp_add_to_ckpt_header("MANA_MinLibsStart", minLibsStartStr.c_str());
dmtcp_add_to_ckpt_header("MANA_MaxLibsEnd", maxLibsEndStr.c_str());
dmtcp_add_to_ckpt_header("MANA_MinHighMemStart", minHighMemStartStr.c_str());

if (!getenv("MANA_QUIET")) {
int rank;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
if (rank == 0) {
char time_string[30];
time_t t = time(NULL);
strftime(time_string, sizeof(time_string), "%H:%M:%S", localtime(&t));
fprintf(stderr,
"%s: *** MANA: Checkpointing; MANA info saved for restart:\n"
"%*c (Set env var MANA_QUIET to silence this.)\n",
time_string, (int)strlen(time_string), ' ');
fprintf(stderr, " %s: %s\n",
"MANA_MinLibsStart", minLibsStartStr.c_str());
fprintf(stderr, " %s: %s\n",
"MANA_MaxLibsEnd", maxLibsEndStr.c_str());
fprintf(stderr, "%s: %s\n",
" MANA_MinHighMemStart", minHighMemStartStr.c_str());
}
}
}

const char *
Expand Down
32 changes: 32 additions & 0 deletions restart_plugin/mtcp_restart_plugin.c
Original file line number Diff line number Diff line change
Expand Up @@ -682,6 +682,15 @@ mtcp_plugin_hook(RestoreInfo *rinfo)
void
mtcp_plugin_hook(RestoreInfo *rinfo)
{
// FIXME: DMTCP should remove text/data/heap of mtcp_restart.
// For now, MANA has this workaround, in conjunction with
// mpi-proxy-split/mpi_plugin.cpp:computeUnionOfCkptImageAddresses
// When mtcp_restart starts at main, there is already a heap.
// In case anyone else calls sbrk(), this will create a gap after
// the text/data/heap of mtcp_restart. So, computeUnionOfCkptImageAddresses
// will munmap the text/data/heap, but nothing more.
mtcp_sys_brk((char *)0x11200000 + 0x30000);

remap_vdso_and_vvar_regions(rinfo);
mysetauxval(rinfo->environ, AT_SYSINFO_EHDR,
(unsigned long int) rinfo->currentVdsoStart);
Expand Down Expand Up @@ -845,6 +854,29 @@ mtcp_plugin_hook(RestoreInfo *rinfo)
int
mtcp_plugin_skip_memory_region_munmap(Area *area, RestoreInfo *rinfo)
{
// FIXME: All of this is a temporary workaround, until the DMTCP restart
// plugin can be re-designed. See the conversation in PR #357.
// After the DMTCP re-design, we should delete all of the code
// of this paragraph.
// NOTE: 0x11200000 is the address for mtcp_restart.
// See LINKER_FLAGS= -Wl,-Ttext-segment=11200000
// in dmtcp/src/mtcp/Makefile, for why this hard-wired addess exists.
// NOTE: This is the originally loaded mtcp_restart (text/data/heap),
// before we copied it to the DMTCP "hole" and execute from there.
if (is_overlap(area->addr, area->endAddr,
(char *)0x11200000, (char *)0x11200000 + 0x30000)) {
// Range [0x11200000, nextPageAddr] should cover mtcp_restart text/data/heap
void *nextPageAddr = (char *)0x11200000 + 0x30000;
void *testIfEmpty = mtcp_sys_mmap(nextPageAddr, 4096,
PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
// MANA panic: The next page after the assumed mtcp_restart memory regions
// was occupied. Is mtcp_restart larger than expected.
MTCP_ASSERT(testIfEmpty == nextPageAddr);
mtcp_sys_munmap(nextPageAddr, 4096); // The test passed. Free it again.
mtcp_sys_munmap((void *)0x11200000, 0x30000); // Unmap the old mtcp_restart.
return 0;
}

LowerHalfInfo_t *lh_info = &rinfo->pluginInfo;
LhCoreRegions_t *lh_regions_list = NULL;
int total_lh_regions = lh_info->numCoreRegions;
Expand Down