diff --git a/mpi-proxy-split/mpi_plugin.cpp b/mpi-proxy-split/mpi_plugin.cpp index cae705c15..2575e0704 100644 --- a/mpi-proxy-split/mpi_plugin.cpp +++ b/mpi-proxy-split/mpi_plugin.cpp @@ -699,7 +699,9 @@ computeUnionOfCkptImageAddresses() void *libsEnd = NULL; void *maxLibsEnd = NULL; void *highMemStart = 0x0; - void *minHighMemStart = NULL; + void *minHighMemStart = 0x0; + void *highMemEnd = 0x0; // FIXME: For now, always 8 MB above highMemStart + void *maxHighMemEnd = 0x0; void *lhEnd = 0x0; // lower bound for actual lhEnd void *minAddrBeyondHeap = NULL; void *maxAddrBeyondHeap = NULL; @@ -766,10 +768,16 @@ computeUnionOfCkptImageAddresses() if (prev_addr_end != NULL && prev_addr_end == libsEnd) { highMemStart = area.addr; // This should be the start of the stack. + // Allow 8 MB for stack/argv/auxv + highMemEnd = (char *)highMemStart + 8 * 1024*1024; } if (strcmp(area.name, "[stack]") == 0) { highMemStart = area.addr; // This should be the start of the stack. + // Allow 8 MB for stack/argv/auxv + highMemEnd = (char *)highMemStart + 8 * 1024*1024; + JASSERT(area.endAddr < highMemEnd)((void*)area.addr)((void*)area.endAddr) + (highMemEnd); } // FIXME: We are no longer using min/maxAddrBeyondHeap. // Given that heapAddr is poorly defined between launch and restart, @@ -811,11 +819,13 @@ computeUnionOfCkptImageAddresses() string minAddrBeyondHeapStr = jalib::XToString(minAddrBeyondHeap); string maxAddrBeyondHeapStr = jalib::XToString(maxAddrBeyondHeap); string highMemStartStr = jalib::XToString(highMemStart); + string highMemEndStr = jalib::XToString(highMemEnd); kvdb::set(workerPath, "MANA_heapAddr", heapAddrStr); kvdb::set(workerPath, "MANA_libsStart_Orig", origLibsStartStr); kvdb::set(workerPath, "MANA_libsStart", libsStartStr); kvdb::set(workerPath, "MANA_libsEnd", libsEndStr); kvdb::set(workerPath, "MANA_highMemStart", highMemStartStr); + kvdb::set(workerPath, "MANA_highMemEnd", highMemEndStr); kvdb::set(workerPath, "MANA_minAddrBeyondHeap", minAddrBeyondHeapStr); kvdb::set(workerPath, "MANA_maxAddrBeyondHeap", maxAddrBeyondHeapStr); @@ -826,6 +836,8 @@ computeUnionOfCkptImageAddresses() (int64_t)libsEnd) == KVDBResponse::SUCCESS); JASSERT(kvdb::request64(KVDBRequest::MIN, kvdb, "highMemStart", (int64_t)highMemStart) == KVDBResponse::SUCCESS); + JASSERT(kvdb::request64(KVDBRequest::MAX, kvdb, "highMemEnd", + (int64_t)highMemEnd) == KVDBResponse::SUCCESS); dmtcp_global_barrier("MANA_CKPT_UNION"); @@ -835,6 +847,8 @@ computeUnionOfCkptImageAddresses() KVDBResponse::SUCCESS); JASSERT(kvdb::get64(kvdb, "highMemStart", (int64_t *)&minHighMemStart) == KVDBResponse::SUCCESS); + JASSERT(kvdb::get64(kvdb, "highMemEnd", (int64_t *)&maxHighMemEnd) == + KVDBResponse::SUCCESS); ostringstream o; @@ -842,20 +856,24 @@ computeUnionOfCkptImageAddresses() HEXSTR(o, libsStart); HEXSTR(o, libsEnd); HEXSTR(o, highMemStart); + HEXSTR(o, highMemEnd); HEXSTR(o, minLibsStart); HEXSTR(o, maxLibsEnd); HEXSTR(o, minHighMemStart); + HEXSTR(o, maxHighMemEnd); JTRACE("Union of memory regions") (o.str()); string minLibsStartStr = jalib::XToString(minLibsStart); string maxLibsEndStr = jalib::XToString(maxLibsEnd); string minHighMemStartStr = jalib::XToString(minHighMemStart); + string maxHighMemEndStr = jalib::XToString(maxHighMemEnd); // Now publish these values to DMTCP ckpt-header. dmtcp_add_to_ckpt_header("MANA_MinLibsStart", minLibsStartStr.c_str()); dmtcp_add_to_ckpt_header("MANA_MaxLibsEnd", maxLibsEndStr.c_str()); dmtcp_add_to_ckpt_header("MANA_MinHighMemStart", minHighMemStartStr.c_str()); + dmtcp_add_to_ckpt_header("MANA_MaxHighMemEnd", maxHighMemEndStr.c_str()); } const char * diff --git a/restart_plugin/dmtcp_restart_plugin.cpp b/restart_plugin/dmtcp_restart_plugin.cpp index 422b67f1b..6f6ec4a92 100644 --- a/restart_plugin/dmtcp_restart_plugin.cpp +++ b/restart_plugin/dmtcp_restart_plugin.cpp @@ -53,6 +53,9 @@ void dmtcp_restart_plugin(const string &restartDir, mtcpArgs.push_back((char*) "--minHighMemStart"); mtcpArgs.push_back((char*) kvmap.at("MANA_MinHighMemStart").c_str()); + mtcpArgs.push_back((char*) "--maxHighMemEnd"); + mtcpArgs.push_back((char*) kvmap.at("MANA_MaxHighMemEnd").c_str()); + if (!restartDir.empty()) { mtcpArgs.push_back((char *)"--restartdir"); mtcpArgs.push_back((char *)restartDir.c_str()); @@ -65,4 +68,4 @@ void dmtcp_restart_plugin(const string &restartDir, mtcpArgs.push_back(NULL); execvp(mtcpArgs[0], &mtcpArgs[0]); JASSERT(false)(mtcpArgs[0]).Text("execvp failed!"); -} \ No newline at end of file +} diff --git a/restart_plugin/mtcp_restart_plugin.c b/restart_plugin/mtcp_restart_plugin.c index 862ab4f6a..21fae8ef6 100644 --- a/restart_plugin/mtcp_restart_plugin.c +++ b/restart_plugin/mtcp_restart_plugin.c @@ -568,10 +568,10 @@ mtcp_plugin_hook(RestoreInfo *rinfo) // mtcp_split_process.c, in both restart_plugin and mpi-proxy-split dirs. end1 = rinfo->maxLibsEnd; - // Reserve 8MB above min high memory region. That should include space for - // stack, argv, env, auxvec. + // maxHighMemEnd reserves 8MB above min high memory region. + // That should include space for stack, argv, env, auxvec. start2 = rinfo->minHighMemStart - 1 * GB; // Allow for stack to grow - end2 = rinfo->minHighMemStart + 8 * MB; + end2 = rinfo->maxHighMemEnd; // Ignore region start2:end2 if it is overlapped with region start1:end1 if (is_overlap(start1, end1, start2, end2)) { if (end1 < end2) { end1 = end2; } @@ -579,6 +579,12 @@ mtcp_plugin_hook(RestoreInfo *rinfo) end2 = 0; } + Area vvar_area; + MTCP_ASSERT(getMappedArea(&vvar_area, "[vvar]") == 1); + // if (end2 > vvar_area.addr) { + // end2 = vvar_area.addr; + // } + // ADJUST THE [start2, end2] AROUND THE LOWER-HALF STACK: // The lower-half stack is present. We will restore the upper-half // stack later, while restoring the upper half. We may have an @@ -649,17 +655,24 @@ mtcp_plugin_hook(RestoreInfo *rinfo) Area stack_area; MTCP_ASSERT(getMappedArea(&stack_area, "[stack]") == 1); end1 = MIN(stack_area.endAddr - 4 * GB, rinfo->minHighMemStart - 4 * GB); - // Reserve 8MB above min high memory region. That should include space for - // stack, argv, env, auxvec. + // maxHighMemEnd reserves 8MB above min high memory region. + // That should include space for stack, argv, env, auxvec. start2 = rinfo->minHighMemStart; - end2 = rinfo->minHighMemStart + 8 * MB; + end2 = rinfo->maxHighMemEnd; // Ignore region start2:end2 if it is overlapped with region start1:end1 if (is_overlap(start1, end1, start2, end2)) { start2 = 0; end2 = 0; } } - // FIXME: End of '#if 1'; Remove '# else' branch when the code is stable. + + Area vvar_area; + MTCP_ASSERT(getMappedArea(&vvar_area, "[vvar]") == 1); + if (end2 > vvar_area.addr) { + end2 = vvar_area.addr; + } + + // FIXME: End of '#if 1'; Remove this '# else' branch when the code is stable # endif reserveUpperHalfMemoryRegionsForCkptImgs(start1, end1, start2, end2); @@ -787,10 +800,10 @@ mtcp_plugin_hook(RestoreInfo *rinfo) // mtcp_split_process.c, in both restart_plugin and mpi-proxy-split dirs. end1 = rinfo->maxLibsEnd; - // Reserve 8MB above min high memory region. That should include space for - // stack, argv, env, auxvec. + // maxHighMemEnd reserves 8MB above min high memory region. + // That should include space for stack, argv, env, auxvec. start2 = rinfo->minHighMemStart - 1 * GB; // Allow for stack to grow - end2 = rinfo->minHighMemStart + 8 * MB; + end2 = rinfo->maxHighMemEnd; // Ignore region start2:end2 if it is overlapped with region start1:end1 if (is_overlap(start1, end1, start2, end2)) { if (end1 < end2) { end1 = end2; } @@ -798,6 +811,12 @@ mtcp_plugin_hook(RestoreInfo *rinfo) end2 = 0; } + Area vvar_area; + MTCP_ASSERT(getMappedArea(&vvar_area, "[vvar]") == 1); + // if (end2 > vvar_area.addr) { + // end2 = vvar_area.addr; + // } + // ADJUST THE [start2, end2] AROUND THE LOWER-HALF STACK: // The lower-half stack is present. We will restore the upper-half // stack later, while restoring the upper half. We may have an @@ -868,17 +887,25 @@ mtcp_plugin_hook(RestoreInfo *rinfo) Area stack_area; MTCP_ASSERT(getMappedArea(&stack_area, "[stack]") == 1); end1 = MIN(stack_area.endAddr - 4 * GB, rinfo->minHighMemStart - 4 * GB); - // Reserve 8MB above min high memory region. That should include space for - // stack, argv, env, auxvec. + MTCP_ASSERT(getMappedArea(&vvar_area, "[vvar]") == 1); + // maxHighMemEnd reserves 8MB above min high memory region. + // That should include space for stack, argv, env, auxvec. start2 = rinfo->minHighMemStart; - end2 = rinfo->minHighMemStart + 8 * MB; + end2 = rinfo->maxHighMemEnd; // Ignore region start2:end2 if it is overlapped with region start1:end1 if (is_overlap(start1, end1, start2, end2)) { start2 = 0; end2 = 0; } } - // FIXME: End of '#if 1'; Remove '# else' branch when the code is stable. + + Area vvar_area; + MTCP_ASSERT(getMappedArea(&vvar_area, "[vvar]") == 1); + if (end2 > vvar_area.addr) { + end2 = vvar_area.addr; + } + + // FIXME: End of '#if 1'; Remove this '# else' branch when the code is stable # endif char full_filename[PATH_MAX];