Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 19 additions & 1 deletion mpi-proxy-split/mpi_plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -699,7 +699,9 @@ computeUnionOfCkptImageAddresses()
void *libsEnd = NULL;
void *maxLibsEnd = NULL;
void *highMemStart = 0x0;
void *minHighMemStart = NULL;
void *minHighMemStart = 0x0;
void *highMemEnd = 0x0; // FIXME: For now, always 8 MB above highMemStart
void *maxHighMemEnd = 0x0;
void *lhEnd = 0x0; // lower bound for actual lhEnd
void *minAddrBeyondHeap = NULL;
void *maxAddrBeyondHeap = NULL;
Expand Down Expand Up @@ -766,10 +768,16 @@ computeUnionOfCkptImageAddresses()
if (prev_addr_end != NULL && prev_addr_end == libsEnd)
{
highMemStart = area.addr; // This should be the start of the stack.
// Allow 8 MB for stack/argv/auxv
highMemEnd = (char *)highMemStart + 8 * 1024*1024;
}
if (strcmp(area.name, "[stack]") == 0)
{
highMemStart = area.addr; // This should be the start of the stack.
// Allow 8 MB for stack/argv/auxv
highMemEnd = (char *)highMemStart + 8 * 1024*1024;
JASSERT(area.endAddr < highMemEnd)((void*)area.addr)((void*)area.endAddr)
(highMemEnd);
}
// FIXME: We are no longer using min/maxAddrBeyondHeap.
// Given that heapAddr is poorly defined between launch and restart,
Expand Down Expand Up @@ -811,11 +819,13 @@ computeUnionOfCkptImageAddresses()
string minAddrBeyondHeapStr = jalib::XToString(minAddrBeyondHeap);
string maxAddrBeyondHeapStr = jalib::XToString(maxAddrBeyondHeap);
string highMemStartStr = jalib::XToString(highMemStart);
string highMemEndStr = jalib::XToString(highMemEnd);
kvdb::set(workerPath, "MANA_heapAddr", heapAddrStr);
kvdb::set(workerPath, "MANA_libsStart_Orig", origLibsStartStr);
kvdb::set(workerPath, "MANA_libsStart", libsStartStr);
kvdb::set(workerPath, "MANA_libsEnd", libsEndStr);
kvdb::set(workerPath, "MANA_highMemStart", highMemStartStr);
kvdb::set(workerPath, "MANA_highMemEnd", highMemEndStr);
kvdb::set(workerPath, "MANA_minAddrBeyondHeap", minAddrBeyondHeapStr);
kvdb::set(workerPath, "MANA_maxAddrBeyondHeap", maxAddrBeyondHeapStr);

Expand All @@ -826,6 +836,8 @@ computeUnionOfCkptImageAddresses()
(int64_t)libsEnd) == KVDBResponse::SUCCESS);
JASSERT(kvdb::request64(KVDBRequest::MIN, kvdb, "highMemStart",
(int64_t)highMemStart) == KVDBResponse::SUCCESS);
JASSERT(kvdb::request64(KVDBRequest::MAX, kvdb, "highMemEnd",
(int64_t)highMemEnd) == KVDBResponse::SUCCESS);

dmtcp_global_barrier("MANA_CKPT_UNION");

Expand All @@ -835,27 +847,33 @@ computeUnionOfCkptImageAddresses()
KVDBResponse::SUCCESS);
JASSERT(kvdb::get64(kvdb, "highMemStart", (int64_t *)&minHighMemStart) ==
KVDBResponse::SUCCESS);
JASSERT(kvdb::get64(kvdb, "highMemEnd", (int64_t *)&maxHighMemEnd) ==
KVDBResponse::SUCCESS);

ostringstream o;

#define HEXSTR(o, x) o << #x << std::hex << x;
HEXSTR(o, libsStart);
HEXSTR(o, libsEnd);
HEXSTR(o, highMemStart);
HEXSTR(o, highMemEnd);
HEXSTR(o, minLibsStart);
HEXSTR(o, maxLibsEnd);
HEXSTR(o, minHighMemStart);
HEXSTR(o, maxHighMemEnd);

JTRACE("Union of memory regions") (o.str());

string minLibsStartStr = jalib::XToString(minLibsStart);
string maxLibsEndStr = jalib::XToString(maxLibsEnd);
string minHighMemStartStr = jalib::XToString(minHighMemStart);
string maxHighMemEndStr = jalib::XToString(maxHighMemEnd);

// Now publish these values to DMTCP ckpt-header.
dmtcp_add_to_ckpt_header("MANA_MinLibsStart", minLibsStartStr.c_str());
dmtcp_add_to_ckpt_header("MANA_MaxLibsEnd", maxLibsEndStr.c_str());
dmtcp_add_to_ckpt_header("MANA_MinHighMemStart", minHighMemStartStr.c_str());
dmtcp_add_to_ckpt_header("MANA_MaxHighMemEnd", maxHighMemEndStr.c_str());
}

const char *
Expand Down
5 changes: 4 additions & 1 deletion restart_plugin/dmtcp_restart_plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ void dmtcp_restart_plugin(const string &restartDir,
mtcpArgs.push_back((char*) "--minHighMemStart");
mtcpArgs.push_back((char*) kvmap.at("MANA_MinHighMemStart").c_str());

mtcpArgs.push_back((char*) "--maxHighMemEnd");
mtcpArgs.push_back((char*) kvmap.at("MANA_MaxHighMemEnd").c_str());

if (!restartDir.empty()) {
mtcpArgs.push_back((char *)"--restartdir");
mtcpArgs.push_back((char *)restartDir.c_str());
Expand All @@ -65,4 +68,4 @@ void dmtcp_restart_plugin(const string &restartDir,
mtcpArgs.push_back(NULL);
execvp(mtcpArgs[0], &mtcpArgs[0]);
JASSERT(false)(mtcpArgs[0]).Text("execvp failed!");
}
}
55 changes: 41 additions & 14 deletions restart_plugin/mtcp_restart_plugin.c
Original file line number Diff line number Diff line change
Expand Up @@ -568,17 +568,23 @@ mtcp_plugin_hook(RestoreInfo *rinfo)
// mtcp_split_process.c, in both restart_plugin and mpi-proxy-split dirs.
end1 = rinfo->maxLibsEnd;

// Reserve 8MB above min high memory region. That should include space for
// stack, argv, env, auxvec.
// maxHighMemEnd reserves 8MB above min high memory region.
// That should include space for stack, argv, env, auxvec.
start2 = rinfo->minHighMemStart - 1 * GB; // Allow for stack to grow
end2 = rinfo->minHighMemStart + 8 * MB;
end2 = rinfo->maxHighMemEnd;
// Ignore region start2:end2 if it is overlapped with region start1:end1
if (is_overlap(start1, end1, start2, end2)) {
if (end1 < end2) { end1 = end2; }
start2 = 0;
end2 = 0;
}

Area vvar_area;
MTCP_ASSERT(getMappedArea(&vvar_area, "[vvar]") == 1);
// if (end2 > vvar_area.addr) {
// end2 = vvar_area.addr;
// }

// ADJUST THE [start2, end2] AROUND THE LOWER-HALF STACK:
// The lower-half stack is present. We will restore the upper-half
// stack later, while restoring the upper half. We may have an
Expand Down Expand Up @@ -649,17 +655,24 @@ mtcp_plugin_hook(RestoreInfo *rinfo)
Area stack_area;
MTCP_ASSERT(getMappedArea(&stack_area, "[stack]") == 1);
end1 = MIN(stack_area.endAddr - 4 * GB, rinfo->minHighMemStart - 4 * GB);
// Reserve 8MB above min high memory region. That should include space for
// stack, argv, env, auxvec.
// maxHighMemEnd reserves 8MB above min high memory region.
// That should include space for stack, argv, env, auxvec.
start2 = rinfo->minHighMemStart;
end2 = rinfo->minHighMemStart + 8 * MB;
end2 = rinfo->maxHighMemEnd;
// Ignore region start2:end2 if it is overlapped with region start1:end1
if (is_overlap(start1, end1, start2, end2)) {
start2 = 0;
end2 = 0;
}
}
// FIXME: End of '#if 1'; Remove '# else' branch when the code is stable.

Area vvar_area;
MTCP_ASSERT(getMappedArea(&vvar_area, "[vvar]") == 1);
if (end2 > vvar_area.addr) {
end2 = vvar_area.addr;
}

// FIXME: End of '#if 1'; Remove this '# else' branch when the code is stable
# endif

reserveUpperHalfMemoryRegionsForCkptImgs(start1, end1, start2, end2);
Expand Down Expand Up @@ -787,17 +800,23 @@ mtcp_plugin_hook(RestoreInfo *rinfo)
// mtcp_split_process.c, in both restart_plugin and mpi-proxy-split dirs.
end1 = rinfo->maxLibsEnd;

// Reserve 8MB above min high memory region. That should include space for
// stack, argv, env, auxvec.
// maxHighMemEnd reserves 8MB above min high memory region.
// That should include space for stack, argv, env, auxvec.
start2 = rinfo->minHighMemStart - 1 * GB; // Allow for stack to grow
end2 = rinfo->minHighMemStart + 8 * MB;
end2 = rinfo->maxHighMemEnd;
// Ignore region start2:end2 if it is overlapped with region start1:end1
if (is_overlap(start1, end1, start2, end2)) {
if (end1 < end2) { end1 = end2; }
start2 = 0;
end2 = 0;
}

Area vvar_area;
MTCP_ASSERT(getMappedArea(&vvar_area, "[vvar]") == 1);
// if (end2 > vvar_area.addr) {
// end2 = vvar_area.addr;
// }

// ADJUST THE [start2, end2] AROUND THE LOWER-HALF STACK:
// The lower-half stack is present. We will restore the upper-half
// stack later, while restoring the upper half. We may have an
Expand Down Expand Up @@ -868,17 +887,25 @@ mtcp_plugin_hook(RestoreInfo *rinfo)
Area stack_area;
MTCP_ASSERT(getMappedArea(&stack_area, "[stack]") == 1);
end1 = MIN(stack_area.endAddr - 4 * GB, rinfo->minHighMemStart - 4 * GB);
// Reserve 8MB above min high memory region. That should include space for
// stack, argv, env, auxvec.
MTCP_ASSERT(getMappedArea(&vvar_area, "[vvar]") == 1);
// maxHighMemEnd reserves 8MB above min high memory region.
// That should include space for stack, argv, env, auxvec.
start2 = rinfo->minHighMemStart;
end2 = rinfo->minHighMemStart + 8 * MB;
end2 = rinfo->maxHighMemEnd;
// Ignore region start2:end2 if it is overlapped with region start1:end1
if (is_overlap(start1, end1, start2, end2)) {
start2 = 0;
end2 = 0;
}
}
// FIXME: End of '#if 1'; Remove '# else' branch when the code is stable.

Area vvar_area;
MTCP_ASSERT(getMappedArea(&vvar_area, "[vvar]") == 1);
if (end2 > vvar_area.addr) {
end2 = vvar_area.addr;
}

// FIXME: End of '#if 1'; Remove this '# else' branch when the code is stable
# endif

char full_filename[PATH_MAX];
Expand Down