Skip to content

Commit

Permalink
Add ability for Ariel to trace MPI applications
Browse files Browse the repository at this point in the history
  • Loading branch information
Patrick Lavin authored and plavin committed Jul 19, 2024
1 parent d6244da commit e5f33b5
Show file tree
Hide file tree
Showing 28 changed files with 1,712 additions and 55 deletions.
3 changes: 3 additions & 0 deletions src/sst/elements/ariel/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ EXTRA_DIST = \
frontend/simple/examples/stream/tests/refFiles/test_Ariel_runstreamNB.out \
frontend/simple/examples/stream/tests/refFiles/test_Ariel_runstreamSt.out \
tests/testsuite_default_Ariel.py \
tests/testsuite_testio_Ariel.py \
tests/testsuite_mpi_Ariel.py \
tests/testopenMP/ompmybarrier/ompmybarrier.c \
tests/testopenMP/ompmybarrier/Makefile

Expand Down Expand Up @@ -104,6 +106,7 @@ libariel_la_CPPFLAGS = \
-DARIEL_TRACE_LIB=$(libdir)/sst \
-DARIEL_TOOL_DIR="$(libexecdir)" \
-DPINTOOL_EXECUTABLE="$(PINTOOL_RUNTIME)" \
-DMPILAUNCHER_EXECUTABLE="$(CURDIR)/mpi/mpilauncher" \
$(AM_CPPFLAGS) $(CPPFLAGS)

sstdir = $(includedir)/sst/elements/ariel
Expand Down
4 changes: 2 additions & 2 deletions src/sst/elements/ariel/api/Makefile
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
all: libarielapi.so libarielapi.a

libarielapi.so: arielapi.c arielapi.h
$(CC) -fPIC -shared -o libarielapi.so arielapi.c
$(CC) -fPIC -shared -o libarielapi.so arielapi.c -fopenmp

arielapi.o: arielapi.c arielapi.h
$(CC) -c -o arielapi.o arielapi.c
$(CC) -c -o arielapi.o arielapi.c -fopenmp

libarielapi.a: arielapi.o
ar rcs $@ $^
Expand Down
66 changes: 66 additions & 0 deletions src/sst/elements/ariel/api/arielapi.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,23 @@

#include "arielapi.h"
#include <stdio.h>
#include <stdlib.h>
#include <inttypes.h>
#if __has_include(<mpi.h>)
#include <mpi.h>
#define HAVE_MPI_H
#endif

/* These definitions are replaced during simulation */

void ariel_enable() {
printf("ARIEL: ENABLE called in Ariel API.\n");
}

void ariel_disable() {
printf("ARIEL: DISABLE called in Ariel API.\n");
}

void ariel_fence() {
printf("ARIEL: FENCE called in Ariel API.\n");
}
Expand All @@ -38,3 +47,60 @@ void ariel_output_stats() {
void ariel_malloc_flag(int64_t id, int count, int level) {
printf("ARIEL: flagging next %d mallocs at id %" PRId64 "\n", count, id);
}

// To ensure that the Pintool (fesimple.cc) numbers our application's OpenMP threads
// from 0..N-1, we need to run an OpenMP parallel region before calling MPI Init.
// Otherwise, some MPI threads which aren't used for our application will be
// numbered 1 and 2.
void omp_parallel_region() {
volatile int x = 0;
#if defined(_OPENMP)
#pragma omp parallel
{
#pragma omp critical
{
x += 1;
}
}
#else
printf("ERROR: libarielapi.c: libarielapi was compiled without OpenMP enabled\n");
exit(1);
#endif
}

// This function only exists to get mapped by the frontend. It should only be called
// from MPI_Init or MPI_Init_thread to allow the frontend to distinguish between our
// custom versions of of those functions and the normal MPI library's versions.
int _api_mpi_init() {
printf("notifying fesimple\n");
}

// Custom version of MPI_Init. We override the normal version in order to call an
// OpenMP parallel region to ensure threads are numbered properly by the frontend.
int MPI_Init(int *argc, char ***argv) {
#ifdef HAVE_MPI_H
// Communicate to the frontend that we have replaced the nomal MPI_Init with
// the one in the Ariel API
_api_mpi_init();
omp_parallel_region();
return PMPI_Init(argc, argv);
#else
printf("Error: arielapi.c: MPI_Init called in arielapi.c but this file was compiled without MPI. Please recompile the API with `CC=mpicc make`.\n");
exit(1);
#endif
}

// Custom version of MPI_Init_thread. We override the normal verison in order to call an
// OpenMP parallel region to ensure threads are numbered properly by the frontend.
int MPI_Init_thread(int *argc, char ***argv, int required, int *provided) {
#ifdef HAVE_MPI_H
// Communicate to the frontend that we have replaced the nomal MPI_Init_thread with
// the one in the Ariel API
_api_mpi_init();
omp_parallel_region();
return PMPI_Init_thread(argc, argv, required, provided);
#else
printf("Error: arielapi.c: MPI_Init_thread called in arielapi.c but this file was compiled without MPI. Please recompile the API with `CC=mpicc make`.\n");
exit(1);
#endif
}
5 changes: 5 additions & 0 deletions src/sst/elements/ariel/api/arielapi.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@ extern "C" {
*/
void ariel_enable();

/* Disable simulation when this fucntion is encountered.
* Works regardless of the 'arielmode' parameter.
*/
void ariel_disable();

/* Execute a fence */
void ariel_fence();

Expand Down
4 changes: 4 additions & 0 deletions src/sst/elements/ariel/arielcpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ class ArielCPU : public SST::Component {
{"appstderrappend", "If appstderr is set, set this to 1 to append the file intead of overwriting", "0"},
{"launchparamcount", "Number of parameters supplied for the launch tool", "0" },
{"launchparam%(launchparamcount)d", "Set the parameter to the launcher", "" },
{"mpimode", "Whether to use <mpilauncher> to to launch <launcher> in order to trace MPI-enabled applications.", "0"},
{"mpilauncher", "Specify a launcher to be used for MPI executables in conjuction with <launcher>", STRINGIZE(MPILAUNCHER_EXECUTABLE)},
{"mpiranks", "Number of ranks to be launched by <mpilauncher>. Only <mpitracerank> will be traced by <launcher>.", "1" },
{"mpitracerank", "Rank to be traced by <launcher>.", "0" },
{"envparamcount", "Number of environment parameters to supply to the Ariel executable, default=-1 (use SST environment)", "-1"},
{"envparamname%(envparamcount)d", "Sets the environment parameter name", ""},
{"envparamval%(envparamcount)d", "Sets the environment parameter value", ""},
Expand Down
114 changes: 111 additions & 3 deletions src/sst/elements/ariel/frontend/pin3/fesimple.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,12 @@
#include "builtin_types.h"
#endif

#if __has_include(<mpi.h>)
#include <mpi.h>
#define HAVE_MPI_H
#endif


// TODO add check for PinCRT compatible libz and try to pick that up
/*#ifdef HAVE_PINCRT_LIBZ
Expand Down Expand Up @@ -130,11 +136,15 @@ GpuDataTunnel *tunnelD = NULL;

// Time function interception
struct timeval offset_tv;
int timer_initialized = 0;
#if !defined(__APPLE__)
struct timespec offset_tp_mono;
struct timespec offset_tp_real;
#endif

// MPI
int api_mpi_init_used = 0;

/****************************************************************/
/********************** SHADOW STACK ****************************/
/* Used by 'sieve' to associate mallocs to the code they */
Expand Down Expand Up @@ -367,6 +377,7 @@ VOID WriteInstructionRead(ADDRINT* address, UINT32 readSize, THREADID thr, ADDRI
ac.inst.instClass = instClass;
ac.inst.simdElemCount = simdOpWidth;

//printf("fesimple.cc: patrick: writing end instuction marker\n");
tunnel->writeMessage(thr, ac);
}

Expand Down Expand Up @@ -409,6 +420,7 @@ VOID WriteStartInstructionMarker(UINT32 thr, ADDRINT ip, UINT32 instClass, UINT3
ac.instPtr = (uint64_t) ip;
ac.inst.simdElemCount = simdOpWidth;
ac.inst.instClass = instClass;
//printf("fesimple.cc: patrick: writing start instuction marker\n");
tunnel->writeMessage(thr, ac);
}

Expand All @@ -417,6 +429,7 @@ VOID WriteEndInstructionMarker(UINT32 thr, ADDRINT ip)
ArielCommand ac;
ac.command = ARIEL_END_INSTRUCTION;
ac.instPtr = (uint64_t) ip;
//printf("fesimple.cc: patrick: writing end instuction marker\n");
tunnel->writeMessage(thr, ac);
}

Expand Down Expand Up @@ -664,6 +677,57 @@ void mapped_ariel_enable()
return;
}

// Setup timers to count start time + elapsed simulated time
// Only do this the first time Ariel is enabled
if (!timer_initialized) {
timer_initialized = 1;
struct timeval tvsim;
gettimeofday(&offset_tv, NULL);
tunnel->getTime(&tvsim);
offset_tv.tv_sec -= tvsim.tv_sec;
offset_tv.tv_usec -= tvsim.tv_usec;
#if ! defined(__APPLE__)
struct timespec tpsim;
tunnel->getTimeNs(&tpsim);
offset_tp_mono.tv_sec = tvsim.tv_sec - tpsim.tv_sec;
offset_tp_mono.tv_nsec = (tvsim.tv_usec * 1000) - tpsim.tv_nsec;
offset_tp_real.tv_sec = tvsim.tv_sec - tpsim.tv_sec;
offset_tp_real.tv_nsec = (tvsim.tv_usec * 1000) - tpsim.tv_nsec;
#endif
/* ENABLE */
}
enable_output = true;

/* UNLOCK */
PIN_ReleaseLock(&mainLock);

fprintf(stderr, "ARIEL: Enabling memory and instruction tracing from program control at simulated Ariel cycle %" PRIu64 ".\n",
tunnel->getCycles());
fflush(stdout);
fflush(stderr);
}

/* Intercept ariel_disable() in application & start simulating instructions */
void mapped_ariel_disable()
{

// Note
// By adding clock offset calculation, this function now has visible side-effects when called more than once
// In most cases won't matter -> ariel_disable() called once or close together in time so offsets will stabilize quickly
// In some cases could cause a big jump in time in the middle of simulation -> ariel_disable() left in app but mode is always-on
// So, update ariel_disable & offsets in lock & don't update if already enabled

/* LOCK */
THREADID thr = PIN_ThreadId();
PIN_GetLock(&mainLock, thr);

if (!enable_output) {
PIN_ReleaseLock(&mainLock);
return;
}

// TODO: I just copied these timers from enable. Need to figure out what to do with them here
// so that we can re-enable properly later.
// Setup timers to count start time + elapsed simulated time
struct timeval tvsim;
gettimeofday(&offset_tv, NULL);
Expand All @@ -678,13 +742,13 @@ void mapped_ariel_enable()
offset_tp_real.tv_sec = tvsim.tv_sec - tpsim.tv_sec;
offset_tp_real.tv_nsec = (tvsim.tv_usec * 1000) - tpsim.tv_nsec;
#endif
/* ENABLE */
enable_output = true;
/* DISABLE */
enable_output = false;

/* UNLOCK */
PIN_ReleaseLock(&mainLock);

fprintf(stderr, "ARIEL: Enabling memory and instruction tracing from program control at simulated Ariel cycle %" PRIu64 ".\n",
fprintf(stderr, "ARIEL: Disabling memory and instruction tracing from program control at simulated Ariel cycle %" PRIu64 ".\n",
tunnel->getCycles());
fflush(stdout);
fflush(stderr);
Expand Down Expand Up @@ -787,6 +851,18 @@ void mapped_ariel_fence(void *virtualAddress)
WriteFenceInstructionMarker(thr, ip);
}

void mapped_api_mpi_init() {
api_mpi_init_used = 1;
}

int check_for_api_mpi_init() {
if (!api_mpi_init_used && !getenv("ARIEL_DISABLE_MPI_INIT_CHECK")) {
fprintf(stderr, "Error: fesimple.cc: The Ariel API verion of MPI_Init_{thread} was not used, which can result in errors when used in conjunction with OpenMP. Please link against the Ariel API (included in this distribution at src/sst/elements/ariel/api) or disable this message by setting the environment variable `ARIEL_DISABLE_MPI_INIT_CHECK`\n");
exit(1);
}
return 0;
}

int ariel_mlm_memcpy(void* dest, void* source, size_t size) {
#ifdef ARIEL_DEBUG
fprintf(stderr, "Perform a mlm_memcpy from Ariel from %p to %p length %llu\n",
Expand Down Expand Up @@ -1664,6 +1740,11 @@ VOID InstrumentRoutine(RTN rtn, VOID* args)
enable_output = false;
}
return;
} else if (RTN_Name(rtn) == "ariel_disable" || RTN_Name(rtn) == "_ariel_disable" || RTN_Name(rtn) == "__arielfort_MOD_ariel_disable") {
fprintf(stderr,"Identified routine: ariel_disable, replacing with Ariel equivalent...\n");
RTN_Replace(rtn, (AFUNPTR) mapped_ariel_disable);
fprintf(stderr,"Replacement complete.\n");
return;
} else if (RTN_Name(rtn) == "gettimeofday" || RTN_Name(rtn) == "_gettimeofday") {
fprintf(stderr,"Identified routine: gettimeofday, replacing with Ariel equivalent...\n");
RTN_Replace(rtn, (AFUNPTR) mapped_gettimeofday);
Expand All @@ -1674,6 +1755,24 @@ VOID InstrumentRoutine(RTN rtn, VOID* args)
RTN_Replace(rtn, (AFUNPTR) mapped_ariel_cycles);
fprintf(stderr, "Replacement complete\n");
return;
} else if (RTN_Name(rtn) == "MPI_Init" || RTN_Name(rtn) == "_MPI_Init") {
fprintf(stderr, "Identified routine: MPI_Init. Instrumenting.\n");
RTN_Open(rtn);
RTN_InsertCall(rtn, IPOINT_AFTER, (AFUNPTR) check_for_api_mpi_init, IARG_END);
RTN_Close(rtn);
fprintf(stderr, "Instrumentation complete\n");
} else if (RTN_Name(rtn) == "MPI_Init_thread" || RTN_Name(rtn) == "_MPI_Init_thread") {
fprintf(stderr, "Identified routine: MPI_Init_thread. Instrumenting.\n");
RTN_Open(rtn);
RTN_InsertCall(rtn, IPOINT_AFTER, (AFUNPTR) check_for_api_mpi_init, IARG_END);
RTN_Close(rtn);
fprintf(stderr, "Instrumentation complete\n");
} else if (RTN_Name(rtn) == "api_mpi_init" || RTN_Name(rtn) == "_api_mpi_init") {
fprintf(stderr, "Replacing api_mpi_init with mapped_api_mpi_init.\n");
RTN_Replace(rtn, (AFUNPTR) mapped_api_mpi_init);
fprintf(stderr, "Replacement complete\n");
return;
return;
#if ! defined(__APPLE__)
} else if (RTN_Name(rtn) == "clock_gettime" || RTN_Name(rtn) == "_clock_gettime" ||
RTN_Name(rtn) == "__clock_gettime") {
Expand Down Expand Up @@ -1836,6 +1935,11 @@ VOID InstrumentRoutine(RTN rtn, VOID* args)
}
}

void fork_disable_child_output(THREADID threadid, const CONTEXT *ctx, VOID *v) {
fprintf(stderr, "Warning: fesimple cannot trace forked processes. Disabling Pin for pid %d\n", getpid());
PIN_Detach();
}

void loadFastMemLocations()
{
std::ifstream infile(UseMallocMap.Value().c_str());
Expand Down Expand Up @@ -1913,6 +2017,7 @@ int main(int argc, char *argv[])
// Pin version specific tunnel attach
tunnelmgr = new SST::Core::Interprocess::MMAPChild_Pin3<ArielTunnel>(SSTNamedPipe.Value());
tunnel = tunnelmgr->getTunnel();
//printf("fesimple.cc: patrick : got tunnel, %s\n", SSTNamedPipe.Value().c_str());
#ifdef HAVE_CUDA
tunnelRmgr = new SST::Core::Interprocess::MMAPChild_Pin3<GpuReturnTunnel>(SSTNamedPipe2.Value());
tunnelDmgr = new SST::Core::Interprocess::MMAPChild_Pin3<GpuDataTunnel>(SSTNamedPipe3.Value());
Expand Down Expand Up @@ -1998,6 +2103,9 @@ int main(int argc, char *argv[])
}
}

// Fork callback
PIN_AddForkFunction(FPOINT_AFTER_IN_CHILD, (FORK_CALLBACK) fork_disable_child_output, NULL);

fprintf(stderr, "ARIEL: Starting program.\n");
fflush(stdout);
PIN_StartProgram();
Expand Down
Loading

0 comments on commit e5f33b5

Please sign in to comment.