From a5f91a564bdd2b50d80f4d813784d2e57c31878c Mon Sep 17 00:00:00 2001 From: Gene Cooperman Date: Tue, 22 Aug 2023 04:28:08 -0700 Subject: [PATCH 1/2] Add maxHighMemStart (end2 in reserveUpperHalfMem) * restart_plugin/mtcp_restart.c:reserveUpperHalfMemoryRegionsForCkptImgs() requires start1, end1, start2, end2. * start2 is roughly low end of stack, and end2 is 8 MB higher. * But because of address space randomization, the stacks of different ranks can be 1 GB away or more. So, choosing [ minHighMemStart, minHighMemStart + 8MB] is not sufficiient to reserve all possible stack regions. * So, this adds maxHighMemEnd (the maximum of highMemStart+8MB for the highMemStart (stack) in each rank. --- mpi-proxy-split/mpi_plugin.cpp | 20 +++++++++++++++++++- restart_plugin/mtcp_restart_plugin.c | 24 ++++++++++++------------ 2 files changed, 31 insertions(+), 13 deletions(-) diff --git a/mpi-proxy-split/mpi_plugin.cpp b/mpi-proxy-split/mpi_plugin.cpp index cae705c15..2575e0704 100644 --- a/mpi-proxy-split/mpi_plugin.cpp +++ b/mpi-proxy-split/mpi_plugin.cpp @@ -699,7 +699,9 @@ computeUnionOfCkptImageAddresses() void *libsEnd = NULL; void *maxLibsEnd = NULL; void *highMemStart = 0x0; - void *minHighMemStart = NULL; + void *minHighMemStart = 0x0; + void *highMemEnd = 0x0; // FIXME: For now, always 8 MB above highMemStart + void *maxHighMemEnd = 0x0; void *lhEnd = 0x0; // lower bound for actual lhEnd void *minAddrBeyondHeap = NULL; void *maxAddrBeyondHeap = NULL; @@ -766,10 +768,16 @@ computeUnionOfCkptImageAddresses() if (prev_addr_end != NULL && prev_addr_end == libsEnd) { highMemStart = area.addr; // This should be the start of the stack. + // Allow 8 MB for stack/argv/auxv + highMemEnd = (char *)highMemStart + 8 * 1024*1024; } if (strcmp(area.name, "[stack]") == 0) { highMemStart = area.addr; // This should be the start of the stack. + // Allow 8 MB for stack/argv/auxv + highMemEnd = (char *)highMemStart + 8 * 1024*1024; + JASSERT(area.endAddr < highMemEnd)((void*)area.addr)((void*)area.endAddr) + (highMemEnd); } // FIXME: We are no longer using min/maxAddrBeyondHeap. // Given that heapAddr is poorly defined between launch and restart, @@ -811,11 +819,13 @@ computeUnionOfCkptImageAddresses() string minAddrBeyondHeapStr = jalib::XToString(minAddrBeyondHeap); string maxAddrBeyondHeapStr = jalib::XToString(maxAddrBeyondHeap); string highMemStartStr = jalib::XToString(highMemStart); + string highMemEndStr = jalib::XToString(highMemEnd); kvdb::set(workerPath, "MANA_heapAddr", heapAddrStr); kvdb::set(workerPath, "MANA_libsStart_Orig", origLibsStartStr); kvdb::set(workerPath, "MANA_libsStart", libsStartStr); kvdb::set(workerPath, "MANA_libsEnd", libsEndStr); kvdb::set(workerPath, "MANA_highMemStart", highMemStartStr); + kvdb::set(workerPath, "MANA_highMemEnd", highMemEndStr); kvdb::set(workerPath, "MANA_minAddrBeyondHeap", minAddrBeyondHeapStr); kvdb::set(workerPath, "MANA_maxAddrBeyondHeap", maxAddrBeyondHeapStr); @@ -826,6 +836,8 @@ computeUnionOfCkptImageAddresses() (int64_t)libsEnd) == KVDBResponse::SUCCESS); JASSERT(kvdb::request64(KVDBRequest::MIN, kvdb, "highMemStart", (int64_t)highMemStart) == KVDBResponse::SUCCESS); + JASSERT(kvdb::request64(KVDBRequest::MAX, kvdb, "highMemEnd", + (int64_t)highMemEnd) == KVDBResponse::SUCCESS); dmtcp_global_barrier("MANA_CKPT_UNION"); @@ -835,6 +847,8 @@ computeUnionOfCkptImageAddresses() KVDBResponse::SUCCESS); JASSERT(kvdb::get64(kvdb, "highMemStart", (int64_t *)&minHighMemStart) == KVDBResponse::SUCCESS); + JASSERT(kvdb::get64(kvdb, "highMemEnd", (int64_t *)&maxHighMemEnd) == + KVDBResponse::SUCCESS); ostringstream o; @@ -842,20 +856,24 @@ computeUnionOfCkptImageAddresses() HEXSTR(o, libsStart); HEXSTR(o, libsEnd); HEXSTR(o, highMemStart); + HEXSTR(o, highMemEnd); HEXSTR(o, minLibsStart); HEXSTR(o, maxLibsEnd); HEXSTR(o, minHighMemStart); + HEXSTR(o, maxHighMemEnd); JTRACE("Union of memory regions") (o.str()); string minLibsStartStr = jalib::XToString(minLibsStart); string maxLibsEndStr = jalib::XToString(maxLibsEnd); string minHighMemStartStr = jalib::XToString(minHighMemStart); + string maxHighMemEndStr = jalib::XToString(maxHighMemEnd); // Now publish these values to DMTCP ckpt-header. dmtcp_add_to_ckpt_header("MANA_MinLibsStart", minLibsStartStr.c_str()); dmtcp_add_to_ckpt_header("MANA_MaxLibsEnd", maxLibsEndStr.c_str()); dmtcp_add_to_ckpt_header("MANA_MinHighMemStart", minHighMemStartStr.c_str()); + dmtcp_add_to_ckpt_header("MANA_MaxHighMemEnd", maxHighMemEndStr.c_str()); } const char * diff --git a/restart_plugin/mtcp_restart_plugin.c b/restart_plugin/mtcp_restart_plugin.c index 862ab4f6a..3658919b8 100644 --- a/restart_plugin/mtcp_restart_plugin.c +++ b/restart_plugin/mtcp_restart_plugin.c @@ -568,10 +568,10 @@ mtcp_plugin_hook(RestoreInfo *rinfo) // mtcp_split_process.c, in both restart_plugin and mpi-proxy-split dirs. end1 = rinfo->maxLibsEnd; - // Reserve 8MB above min high memory region. That should include space for - // stack, argv, env, auxvec. + // maxHighMemEnd reserves 8MB above min high memory region. + // That should include space for stack, argv, env, auxvec. start2 = rinfo->minHighMemStart - 1 * GB; // Allow for stack to grow - end2 = rinfo->minHighMemStart + 8 * MB; + end2 = rinfo->maxHighMemEnd; // Ignore region start2:end2 if it is overlapped with region start1:end1 if (is_overlap(start1, end1, start2, end2)) { if (end1 < end2) { end1 = end2; } @@ -649,10 +649,10 @@ mtcp_plugin_hook(RestoreInfo *rinfo) Area stack_area; MTCP_ASSERT(getMappedArea(&stack_area, "[stack]") == 1); end1 = MIN(stack_area.endAddr - 4 * GB, rinfo->minHighMemStart - 4 * GB); - // Reserve 8MB above min high memory region. That should include space for - // stack, argv, env, auxvec. + // maxHighMemEnd reserves 8MB above min high memory region. + // That should include space for stack, argv, env, auxvec. start2 = rinfo->minHighMemStart; - end2 = rinfo->minHighMemStart + 8 * MB; + end2 = rinfo->maxHighMemEnd; // Ignore region start2:end2 if it is overlapped with region start1:end1 if (is_overlap(start1, end1, start2, end2)) { start2 = 0; @@ -787,10 +787,10 @@ mtcp_plugin_hook(RestoreInfo *rinfo) // mtcp_split_process.c, in both restart_plugin and mpi-proxy-split dirs. end1 = rinfo->maxLibsEnd; - // Reserve 8MB above min high memory region. That should include space for - // stack, argv, env, auxvec. + // maxHighMemEnd reserves 8MB above min high memory region. + // That should include space for stack, argv, env, auxvec. start2 = rinfo->minHighMemStart - 1 * GB; // Allow for stack to grow - end2 = rinfo->minHighMemStart + 8 * MB; + end2 = rinfo->maxHighMemEnd; // Ignore region start2:end2 if it is overlapped with region start1:end1 if (is_overlap(start1, end1, start2, end2)) { if (end1 < end2) { end1 = end2; } @@ -868,10 +868,10 @@ mtcp_plugin_hook(RestoreInfo *rinfo) Area stack_area; MTCP_ASSERT(getMappedArea(&stack_area, "[stack]") == 1); end1 = MIN(stack_area.endAddr - 4 * GB, rinfo->minHighMemStart - 4 * GB); - // Reserve 8MB above min high memory region. That should include space for - // stack, argv, env, auxvec. + // maxHighMemEnd reserves 8MB above min high memory region. + // That should include space for stack, argv, env, auxvec. start2 = rinfo->minHighMemStart; - end2 = rinfo->minHighMemStart + 8 * MB; + end2 = rinfo->maxHighMemEnd; // Ignore region start2:end2 if it is overlapped with region start1:end1 if (is_overlap(start1, end1, start2, end2)) { start2 = 0; From d1b5043d55af36cacc274e084f22cc9c476c1498 Mon Sep 17 00:00:00 2001 From: Gene Cooperman Date: Tue, 22 Aug 2023 04:43:24 -0700 Subject: [PATCH 2/2] mtcp_restart.c: Check if end2 > vvar_area.endAddr --- restart_plugin/dmtcp_restart_plugin.cpp | 5 +++- restart_plugin/mtcp_restart_plugin.c | 31 +++++++++++++++++++++++-- 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/restart_plugin/dmtcp_restart_plugin.cpp b/restart_plugin/dmtcp_restart_plugin.cpp index 422b67f1b..6f6ec4a92 100644 --- a/restart_plugin/dmtcp_restart_plugin.cpp +++ b/restart_plugin/dmtcp_restart_plugin.cpp @@ -53,6 +53,9 @@ void dmtcp_restart_plugin(const string &restartDir, mtcpArgs.push_back((char*) "--minHighMemStart"); mtcpArgs.push_back((char*) kvmap.at("MANA_MinHighMemStart").c_str()); + mtcpArgs.push_back((char*) "--maxHighMemEnd"); + mtcpArgs.push_back((char*) kvmap.at("MANA_MaxHighMemEnd").c_str()); + if (!restartDir.empty()) { mtcpArgs.push_back((char *)"--restartdir"); mtcpArgs.push_back((char *)restartDir.c_str()); @@ -65,4 +68,4 @@ void dmtcp_restart_plugin(const string &restartDir, mtcpArgs.push_back(NULL); execvp(mtcpArgs[0], &mtcpArgs[0]); JASSERT(false)(mtcpArgs[0]).Text("execvp failed!"); -} \ No newline at end of file +} diff --git a/restart_plugin/mtcp_restart_plugin.c b/restart_plugin/mtcp_restart_plugin.c index 3658919b8..21fae8ef6 100644 --- a/restart_plugin/mtcp_restart_plugin.c +++ b/restart_plugin/mtcp_restart_plugin.c @@ -579,6 +579,12 @@ mtcp_plugin_hook(RestoreInfo *rinfo) end2 = 0; } + Area vvar_area; + MTCP_ASSERT(getMappedArea(&vvar_area, "[vvar]") == 1); + // if (end2 > vvar_area.addr) { + // end2 = vvar_area.addr; + // } + // ADJUST THE [start2, end2] AROUND THE LOWER-HALF STACK: // The lower-half stack is present. We will restore the upper-half // stack later, while restoring the upper half. We may have an @@ -659,7 +665,14 @@ mtcp_plugin_hook(RestoreInfo *rinfo) end2 = 0; } } - // FIXME: End of '#if 1'; Remove '# else' branch when the code is stable. + + Area vvar_area; + MTCP_ASSERT(getMappedArea(&vvar_area, "[vvar]") == 1); + if (end2 > vvar_area.addr) { + end2 = vvar_area.addr; + } + + // FIXME: End of '#if 1'; Remove this '# else' branch when the code is stable # endif reserveUpperHalfMemoryRegionsForCkptImgs(start1, end1, start2, end2); @@ -798,6 +811,12 @@ mtcp_plugin_hook(RestoreInfo *rinfo) end2 = 0; } + Area vvar_area; + MTCP_ASSERT(getMappedArea(&vvar_area, "[vvar]") == 1); + // if (end2 > vvar_area.addr) { + // end2 = vvar_area.addr; + // } + // ADJUST THE [start2, end2] AROUND THE LOWER-HALF STACK: // The lower-half stack is present. We will restore the upper-half // stack later, while restoring the upper half. We may have an @@ -868,6 +887,7 @@ mtcp_plugin_hook(RestoreInfo *rinfo) Area stack_area; MTCP_ASSERT(getMappedArea(&stack_area, "[stack]") == 1); end1 = MIN(stack_area.endAddr - 4 * GB, rinfo->minHighMemStart - 4 * GB); + MTCP_ASSERT(getMappedArea(&vvar_area, "[vvar]") == 1); // maxHighMemEnd reserves 8MB above min high memory region. // That should include space for stack, argv, env, auxvec. start2 = rinfo->minHighMemStart; @@ -878,7 +898,14 @@ mtcp_plugin_hook(RestoreInfo *rinfo) end2 = 0; } } - // FIXME: End of '#if 1'; Remove '# else' branch when the code is stable. + + Area vvar_area; + MTCP_ASSERT(getMappedArea(&vvar_area, "[vvar]") == 1); + if (end2 > vvar_area.addr) { + end2 = vvar_area.addr; + } + + // FIXME: End of '#if 1'; Remove this '# else' branch when the code is stable # endif char full_filename[PATH_MAX];