From 1d639b30f7b53171115c45648dec52b5076f04c4 Mon Sep 17 00:00:00 2001 From: Leonid Belyaev Date: Thu, 31 Aug 2023 17:01:55 -0400 Subject: [PATCH 01/49] initial virtual-id refactoring --- mpi-proxy-split/Makefile | 2 +- .../mpi-wrappers/mpi_file_wrappers.cpp | 10 +- mpi-proxy-split/mpi-wrappers/mpi_wrappers.cpp | 2 + mpi-proxy-split/virtual-ids.cpp | 179 +++++++ mpi-proxy-split/virtual-ids.h | 503 ++++++++++-------- 5 files changed, 468 insertions(+), 228 deletions(-) create mode 100644 mpi-proxy-split/virtual-ids.cpp diff --git a/mpi-proxy-split/Makefile b/mpi-proxy-split/Makefile index 345303ce4..50d442b4a 100644 --- a/mpi-proxy-split/Makefile +++ b/mpi-proxy-split/Makefile @@ -30,7 +30,7 @@ WRAPPERS_SRCDIR=mpi-wrappers # As you add new files to your plugin library, add the object file names here. LIBOBJS = mpi_plugin.o p2p_drain_send_recv.o p2p_log_replay.o \ - record-replay.o seq_num.o \ + record-replay.o seq_num.o virtual-ids.o \ split_process.o ${LOWER_HALF_SRCDIR}/procmapsutils.o #MANA_COORD_OBJS = mana_coordinator.o diff --git a/mpi-proxy-split/mpi-wrappers/mpi_file_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_file_wrappers.cpp index f3ff2abac..7089c2c71 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_file_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_file_wrappers.cpp @@ -150,6 +150,10 @@ USER_DEFINED_WRAPPER(int, File_set_view, (MPI_File) fh, (MPI_Offset) disp, return retval; } +// FIXME: This is the only wrapper function in the entirety of MANA that uses +// REAL_TO_VIRTUAL. O(1) real-to-virtual translation was a goal of the vid +// refactoring, but we need to think more carefully about this. Maybe +// REAL_TO_VIRTUAL could be eliminated, if it's only used once. USER_DEFINED_WRAPPER(int, File_get_view, (MPI_File) fh, (MPI_Offset*) disp, (MPI_Datatype*) etype, (MPI_Datatype*) filetype, (char*) datarep) @@ -163,8 +167,8 @@ USER_DEFINED_WRAPPER(int, File_get_view, (MPI_File) fh, (MPI_Offset*) disp, retval = NEXT_FUNC(File_get_view)(realFile, disp, &realEtype, &realFtype, datarep); RETURN_TO_UPPER_HALF(); - *etype = REAL_TO_VIRTUAL_TYPE(realEtype); - *filetype = REAL_TO_VIRTUAL_TYPE(realFtype); + // *etype = REAL_TO_VIRTUAL_TYPE(realEtype); + // *filetype = REAL_TO_VIRTUAL_TYPE(realFtype); DMTCP_PLUGIN_ENABLE_CKPT(); return retval; } @@ -442,4 +446,4 @@ PMPI_IMPL(int, MPI_File_set_errhandler, MPI_File file, MPI_Errhandler errhandler) PMPI_IMPL(int, MPI_File_get_errhandler, MPI_File file, MPI_Errhandler *errhandler) -PMPI_IMPL(int, MPI_File_delete, const char *filename, MPI_Info info) \ No newline at end of file +PMPI_IMPL(int, MPI_File_delete, const char *filename, MPI_Info info) diff --git a/mpi-proxy-split/mpi-wrappers/mpi_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_wrappers.cpp index 342e8e8d3..2061f0c22 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_wrappers.cpp @@ -74,6 +74,7 @@ USER_DEFINED_WRAPPER(int, Init, (int *) argc, (char ***) argv) { recordPostMpiInitMaps(); + init_comm_world(); g_world_comm = ADD_NEW_COMM(g_world_comm); LOG_CALL(restoreComms, Comm_dup, MPI_COMM_WORLD, g_world_comm); initialize_drain_send_recv(); @@ -99,6 +100,7 @@ USER_DEFINED_WRAPPER(int, Init_thread, (int *) argc, (char ***) argv, recordPostMpiInitMaps(); + init_comm_world(); g_world_comm = ADD_NEW_COMM(g_world_comm); LOG_CALL(restoreComms, Comm_dup, MPI_COMM_WORLD, g_world_comm); initialize_drain_send_recv(); diff --git a/mpi-proxy-split/virtual-ids.cpp b/mpi-proxy-split/virtual-ids.cpp new file mode 100644 index 000000000..6e5acad8a --- /dev/null +++ b/mpi-proxy-split/virtual-ids.cpp @@ -0,0 +1,179 @@ +/**************************************************************************** + * Copyright (C) 2019-2021 by Gene Cooperman, Rohan Garg, Yao Xu * + * gene@ccs.neu.edu, rohgarg@ccs.neu.edu, xu.yao1@northeastern.edu * + * * + * Edited 2023 by Leonid Belyaev * + * belyaev.l@northeastern.edu * + * * + * This file is part of DMTCP. * + * * + * DMTCP is free software: you can redistribute it and/or * + * modify it under the terms of the GNU Lesser General Public License as * + * published by the Free Software Foundation, either version 3 of the * + * License, or (at your option) any later version. * + * * + * DMTCP is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU Lesser General Public License for more details. * + * * + * You should have received a copy of the GNU Lesser General Public * + * License in the files COPYING and COPYING.LESSER. If not, see * + * . * + ****************************************************************************/ +#include +#include + +#include +#include + +#include "dmtcp.h" + +#include "jassert.h" +#include "jconvert.h" +#include "split_process.h" +#include "lower_half_api.h" +#include "virtual-ids.h" +#include "mpi_nextfunc.h" + +#define MAX_VIRTUAL_ID 999 + +// #define DEBUG_VIDS + +// TODO I use an explicitly integer virtual id, which under macro +// reinterpretation will fit into an int64 pointer. This should be +// fine if no real MPI Type is smaller than an int32. +typedef typename std::map::iterator id_desc_iterator; + +// Per Yao Xu, MANA does not require the thread safety offered by +// DMTCP's VirtualIdTable. We use std::map. + +// int vId -> id_desc_t*, which contains rId. +std::map idDescriptorTable; + +// dead-simple vid generation mechanism. +int base = 1; +int nextvId = base; + +// This is a descriptor initializer. Its job is to write an initial +// descriptor for a real MPI Communicator. + +comm_desc_t* init_comm_desc_t(MPI_Comm realComm) { + comm_desc_t* desc = ((comm_desc_t*)malloc(sizeof(comm_desc_t))); + desc->real_id = realComm; + desc->ranks = NULL; + return desc; +} + +void destroy_comm_desc_t(comm_desc_t* desc) { + free(desc->ranks); + free(desc); +} + +group_desc_t* init_group_desc_t(MPI_Group realGroup) { + group_desc_t* desc = ((group_desc_t*)malloc(sizeof(group_desc_t))); + desc->real_id = realGroup; + desc->ranks = NULL; + desc->size = 0; + return desc; +} + +void destroy_group_desc_t(group_desc_t* group) { + free(group->ranks); + free(group); +} + +request_desc_t* init_request_desc_t(MPI_Request realReq) { + request_desc_t* desc = ((request_desc_t*)malloc(sizeof(request_desc_t))); // FIXME + desc->real_id = realReq; + return desc; +} + +void destroy_request_desc_t(request_desc_t* request) { + free(request); +} + +op_desc_t* init_op_desc_t(MPI_Op realOp) { + op_desc_t* desc = ((op_desc_t*)malloc(sizeof(op_desc_t))); + desc->real_id = realOp; + desc->user_fn = NULL; + return desc; +} + +void destroy_op_desc_t(op_desc_t* op) { + free(op); +} + +datatype_desc_t* init_datatype_desc_t(MPI_Datatype realType) { + datatype_desc_t* desc = ((datatype_desc_t*)malloc(sizeof(datatype_desc_t))); + desc->real_id = realType; + + desc->num_integers = 0; + desc->integers = NULL; + + desc->num_addresses = 0; + desc->addresses = NULL; + + // TODO this was in Yao's spec sheet, but I haven't seen it used + // in any of the relevant functions. Why is this here? + desc->num_large_counts = 0; + desc->large_counts = NULL; + + desc->num_datatypes = 0; + desc->datatypes = NULL; + + desc->combiner = 0; + desc->is_freed = false; + return desc; +} + +void destroy_datatype_desc_t(datatype_desc_t* datatype) { + free(datatype->integers); + free(datatype->addresses); + free(datatype->large_counts); + free(datatype->datatypes); + free(datatype); +} + +file_desc_t* init_file_desc_t(MPI_File realFile) { + file_desc_t* desc = ((file_desc_t*)malloc(sizeof(file_desc_t))); + desc->real_id = realFile; + return desc; +} + +void destroy_file_desc_t(file_desc_t* file) { + free(file); +} + +void print_id_descriptors() { + printf("Printing %u id_descriptors:\n", idDescriptorTable.size()); + fflush(stdout); + for (id_desc_pair pair : idDescriptorTable) { + printf("%x\n", pair.first); + fflush(stdout); + } +} + +// Given int virtualid, return the contained id_desc_t if it exists. +// Otherwise return NULL +id_desc_t* virtualToDescriptor(int virtId) { +#ifdef DEBUG_VIDS + // print_id_descriptors(); +#endif + id_desc_iterator it = idDescriptorTable.find(virtId); + if (it != idDescriptorTable.end()) { + return it->second; + } + return NULL; +} + +// We need to preemptively virtualize MPI_COMM_WORLD. +void init_comm_world() { + comm_desc_t* comm_world = ((comm_desc_t*)malloc(sizeof(comm_desc_t))); + + // FIXME: This WILL NOT WORK when moving to OpenMPI, ExaMPI, as written. + // Yao, Twinkle, should apply their lh_constants_map strategy here. + comm_world->real_id = 0x84000000; + + idDescriptorTable[MPI_COMM_WORLD] = ((union id_desc_t*)comm_world); +} diff --git a/mpi-proxy-split/virtual-ids.h b/mpi-proxy-split/virtual-ids.h index 3f5befa93..4c7e39a0d 100644 --- a/mpi-proxy-split/virtual-ids.h +++ b/mpi-proxy-split/virtual-ids.h @@ -1,44 +1,35 @@ -/**************************************************************************** - * Copyright (C) 2019-2021 by Gene Cooperman, Rohan Garg, Yao Xu * - * gene@ccs.neu.edu, rohgarg@ccs.neu.edu, xu.yao1@northeastern.edu * - * * - * This file is part of DMTCP. * - * * - * DMTCP is free software: you can redistribute it and/or * - * modify it under the terms of the GNU Lesser General Public License as * - * published by the Free Software Foundation, either version 3 of the * - * License, or (at your option) any later version. * - * * - * DMTCP is distributed in the hope that it will be useful, * - * but WITHOUT ANY WARRANTY; without even the implied warranty of * - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * - * GNU Lesser General Public License for more details. * - * * - * You should have received a copy of the GNU Lesser General Public * - * License in the files COPYING and COPYING.LESSER. If not, see * - * . * - ****************************************************************************/ - #pragma once #ifndef MPI_VIRTUAL_IDS_H #define MPI_VIRTUAL_IDS_H #include - #include "virtualidtable.h" #include "jassert.h" #include "jconvert.h" #include "split_process.h" #include "dmtcp.h" -// Convenience macros -#define MpiCommList dmtcp_mpi::MpiVirtualization -#define MpiGroupList dmtcp_mpi::MpiVirtualization -#define MpiTypeList dmtcp_mpi::MpiVirtualization -#define MpiOpList dmtcp_mpi::MpiVirtualization -#define MpiFileList dmtcp_mpi::MpiVirtualization -#define MpiCommKeyvalList dmtcp_mpi::MpiVirtualization -#define MpiRequestList dmtcp_mpi::MpiVirtualization +#define CONCAT(a,b) a ## b + +// num - type - VID MASK +// 0 - undefined - 0x00000000 +// 1 - communicator - 0x01000000 +// 2 - group - 0x02000000 +// 3 - request - 0x03000000 +// 4 - op - 0x04000000 +// 5 - datatype - 0x05000000 +// 6 - file - 0x06000000 +// 7 - comm_keyval - 0x07000000 + +#define UNDEFINED_MASK 0x00000000 +#define COMM_MASK 0x01000000 +#define GROUP_MASK 0x02000000 +#define REQUEST_MASK 0x03000000 +#define OP_MASK 0x04000000 +#define DATATYPE_MASK 0x05000000 +#define FILE_MASK 0x06000000 +#define COMM_KEYVAL_MASK 0x07000000 + #ifndef NEXT_FUNC # define NEXT_FUNC(func) \ ({ \ @@ -50,243 +41,305 @@ _real_MPI_ ## func; \ }) #endif // ifndef NEXT_FUNC -#define REAL_TO_VIRTUAL_FILE(id) \ - MpiFileList::instance("MpiFile", MPI_FILE_NULL).realToVirtual(id) + + +#define DESC_TO_VIRTUAL(desc, null, real_type) \ + ({ \ + real_type _DTV_vId = (desc == NULL) ? null : desc->handle; \ + _DTV_vId; \ + }) + +#define VIRTUAL_TO_DESC(virtual_id, null, desc_type) \ + (virtual_id == null) ? NULL : ((desc_type*) virtualToDescriptor( *((int*)(&virtual_id)) )) + +#define VIRTUAL_TO_REAL(id, null, real_id_type, desc_type) \ + ({ \ + desc_type* _VTR_tmp = VIRTUAL_TO_DESC(id, null, desc_type); \ + real_id_type _VTR_id = (_VTR_tmp == NULL) ? id : _VTR_tmp->real_id; \ + _VTR_id; \ + }) + +// FIXME: Looping through every real id is /very/ inefficient for a large +// program, but we don't have any way to loop through only the real ids of a +// particular type yet. That would change with a two-level table approach. +#define ADD_NEW(given_real_id, null, real_id_type, descriptor_type, vid_mask) \ + ({ \ + real_id_type _AD_retval; \ + descriptor_type* _AD_desc; \ + if (given_real_id != null) { \ + bool real_id_exists = false; \ + for (id_desc_pair pair : idDescriptorTable) { \ + if ((vid_mask & pair.first) == vid_mask && ((descriptor_type*)pair.second)->real_id == given_real_id) { \ + real_id_exists = true; \ + _AD_retval = *((real_id_type*)&pair.first); \ + break; \ + } \ + } \ + if (!real_id_exists) { \ + _AD_desc = CONCAT(init_,descriptor_type)(given_real_id); \ + int _AD_vId = nextvId++; \ + _AD_vId = _AD_vId | vid_mask; \ + _AD_desc->handle = _AD_vId; \ + idDescriptorTable[_AD_vId] = ((union id_desc_t*) _AD_desc); \ + _AD_retval = *((real_id_type*)&_AD_vId); \ + } \ + } else { \ + _AD_retval = null; \ + } \ + _AD_retval; \ + }) + +#define REMOVE_OLD(virtual_id, null, descriptor_type, real_type) \ + ({ \ + real_type _RO_retval; \ + if (virtual_id == null) { \ + _RO_retval = null; \ + } else { \ + descriptor_type* _RO_torem; \ + id_desc_iterator it = idDescriptorTable.find( *((int*)&virtual_id) ); \ + if (it != idDescriptorTable.end()) { \ + _RO_torem = ((descriptor_type*)it->second); \ + idDescriptorTable.erase(*((int*)&virtual_id)); \ + _RO_retval = _RO_torem->real_id; \ + CONCAT(destroy_,descriptor_type)(_RO_torem); \ + } else { \ + _RO_retval = null; \ + } \ + } \ + _RO_retval; \ + }) + +// FIXME: this cannot easily have precisely the same semantics as ADD_NEW: the [] +// operator inserts when the argument is not yet present. If the usage in the +// code is indicative of the "update" name despite this, then it's not a +// problem. +// I just checked every usage of UPDATE_MAP to see if those semantics of [] are employed -- +// They are not, with the exception of MPI_COMM_WORLD, so we previrtualize it. +#define UPDATE_MAP(virtual_id, to_update, null, descriptor_type, to_update_type) \ + ({ \ + to_update_type _UM_retval; \ + if (virtual_id == null) { \ + _UM_retval = null; \ + } else { \ + id_desc_iterator _UM_it = idDescriptorTable.find(*((int*)(&virtual_id))); \ + if (_UM_it != idDescriptorTable.end()) { \ + descriptor_type* desc = ((descriptor_type*)_UM_it->second); \ + desc->real_id = to_update; \ + _UM_retval = virtual_id; \ + } else { \ + _UM_retval = null; \ + } \ + } \ + _UM_retval; \ +}) + +#define DESC_TO_VIRTUAL_FILE(desc) \ + DESC_TO_VIRTUAL(desc, MPI_FILE_NULL, MPI_File) +#define VIRTUAL_TO_DESC_FILE(id) \ + VIRTUAL_TO_DESC(id, MPI_FILE_NULL, file_desc_t) #define VIRTUAL_TO_REAL_FILE(id) \ - MpiFileList::instance("MpiFile", MPI_FILE_NULL).virtualToReal(id) + VIRTUAL_TO_REAL(id, MPI_FILE_NULL, MPI_File, file_desc_t) #define ADD_NEW_FILE(id) \ - MpiFileList::instance("MpiFile", MPI_FILE_NULL).onCreate(id) + ADD_NEW(id, MPI_FILE_NULL, MPI_File, file_desc_t, FILE_MASK) #define REMOVE_OLD_FILE(id) \ - MpiFileList::instance("MpiFile", MPI_FILE_NULL).onRemove(id) + REMOVE_OLD(id, MPI_FILE_NULL, file_desc_t, MPI_File) #define UPDATE_FILE_MAP(v, r) \ - MpiFileList::instance("MpiFile", MPI_FILE_NULL).updateMapping(v, r) + UPDATE_MAP(v, r, MPI_FILE_NULL, file_desc_t, MPI_File) -#define REAL_TO_VIRTUAL_COMM(id) \ - MpiCommList::instance("MpiComm", MPI_COMM_NULL).realToVirtual(id) +#define DESC_TO_VIRTUAL_COMM(desc) \ + DESC_TO_VIRTUAL(desc, MPI_COMM_NULL, MPI_Comm) +#define VIRTUAL_TO_DESC_COMM(id) \ + VIRTUAL_TO_DESC(id, MPI_COMM_NULL, comm_desc_t) #define VIRTUAL_TO_REAL_COMM(id) \ - MpiCommList::instance("MpiComm", MPI_COMM_NULL).virtualToReal(id) + VIRTUAL_TO_REAL(id, MPI_COMM_NULL, MPI_Comm, comm_desc_t) #define ADD_NEW_COMM(id) \ - MpiCommList::instance("MpiComm", MPI_COMM_NULL).onCreate(id) + ADD_NEW(id, MPI_COMM_NULL, MPI_Comm, comm_desc_t, COMM_MASK) #define REMOVE_OLD_COMM(id) \ - MpiCommList::instance("MpiComm", MPI_COMM_NULL).onRemove(id) + REMOVE_OLD(id, MPI_COMM_NULL, comm_desc_t, MPI_Comm) #define UPDATE_COMM_MAP(v, r) \ - MpiCommList::instance("MpiComm", MPI_COMM_NULL).updateMapping(v, r) + UPDATE_MAP(v, r, MPI_COMM_NULL, comm_desc_t, MPI_Comm) -#define REAL_TO_VIRTUAL_GROUP(id) \ - MpiGroupList::instance("MpiGroup", MPI_GROUP_NULL).realToVirtual(id) +#define DESC_TO_VIRTUAL_GROUP(desc) \ + DESC_TO_VIRTUAL(desc, MPI_GROUP_NULL, MPI_Group) +#define VIRTUAL_TO_DESC_GROUP(id) \ + VIRTUAL_TO_DESC(id, MPI_GROUP_NULL, group_desc_t) #define VIRTUAL_TO_REAL_GROUP(id) \ - MpiGroupList::instance("MpiGroup", MPI_GROUP_NULL).virtualToReal(id) + VIRTUAL_TO_REAL(id, MPI_GROUP_NULL, MPI_Group, group_desc_t) #define ADD_NEW_GROUP(id) \ - MpiGroupList::instance("MpiGroup", MPI_GROUP_NULL).onCreate(id) + ADD_NEW(id, MPI_GROUP_NULL, MPI_Group, group_desc_t, GROUP_MASK) #define REMOVE_OLD_GROUP(id) \ - MpiGroupList::instance("MpiGroup", MPI_GROUP_NULL).onRemove(id) + REMOVE_OLD(id, MPI_GROUP_NULL, group_desc_t, MPI_Group) #define UPDATE_GROUP_MAP(v, r) \ - MpiGroupList::instance("MpiGroup", MPI_GROUP_NULL).updateMapping(v, r) + UPDATE_MAP(v, r, MPI_GROUP_NULL, group_desc_t, MPI_Group) -#define REAL_TO_VIRTUAL_TYPE(id) \ - MpiTypeList::instance("MpiType", MPI_DATATYPE_NULL).realToVirtual(id) +#define DESC_TO_VIRTUAL_TYPE(desc) \ + DESC_TO_VIRTUAL(desc, MPI_DATATYPE_NULL, MPI_Datatype) +#define VIRTUAL_TO_DESC_TYPE(id) \ + VIRTUAL_TO_DESC(id, MPI_DATATYPE_NULL, datatype_desc_t) #define VIRTUAL_TO_REAL_TYPE(id) \ - MpiTypeList::instance("MpiType", MPI_DATATYPE_NULL).virtualToReal(id) + VIRTUAL_TO_REAL(id, MPI_DATATYPE_NULL, MPI_Datatype, datatype_desc_t) #define ADD_NEW_TYPE(id) \ - MpiTypeList::instance("MpiType", MPI_DATATYPE_NULL).onCreate(id) + ADD_NEW(id, MPI_DATATYPE_NULL, MPI_Datatype, datatype_desc_t, DATATYPE_MASK) #define REMOVE_OLD_TYPE(id) \ - MpiTypeList::instance("MpiType", MPI_DATATYPE_NULL).onRemove(id) + REMOVE_OLD(id, MPI_DATATYPE_NULL, datatype_desc_t, MPI_Datatype) #define UPDATE_TYPE_MAP(v, r) \ - MpiTypeList::instance("MpiType", MPI_DATATYPE_NULL).updateMapping(v, r) + UPDATE_MAP(v, r, MPI_DATATYPE_NULL, datatype_desc_t, MPI_Datatype) -#define REAL_TO_VIRTUAL_OP(id) \ - MpiOpList::instance("MpiOp", MPI_OP_NULL).realToVirtual(id) +#define DESC_TO_VIRTUAL_OP(desc) \ + DESC_TO_VIRTUAL(desc, MPI_OP_NULL, MPI_Op) +#define VIRTUAL_TO_DESC_OP(id) \ + VIRTUAL_TO_DESC(id, MPI_OP_NULL, op_desc_t) #define VIRTUAL_TO_REAL_OP(id) \ - MpiOpList::instance("MpiOp", MPI_OP_NULL).virtualToReal(id) + VIRTUAL_TO_REAL(id, MPI_OP_NULL, MPI_Op, op_desc_t) #define ADD_NEW_OP(id) \ - MpiOpList::instance("MpiOp", MPI_OP_NULL).onCreate(id) + ADD_NEW(id, MPI_OP_NULL, MPI_Op, op_desc_t, OP_MASK) #define REMOVE_OLD_OP(id) \ - MpiOpList::instance("MpiOp", MPI_OP_NULL).onRemove(id) + REMOVE_OLD(id, MPI_OP_NULL, op_desc_t, MPI_Op) #define UPDATE_OP_MAP(v, r) \ - MpiOpList::instance("MpiOp", MPI_OP_NULL).updateMapping(v, r) + UPDATE_MAP(v, r, MPI_OP_NULL, op_desc_t, MPI_Op) -#define REAL_TO_VIRTUAL_COMM_KEYVAL(id) \ - MpiOpList::instance("MpiCommKeyval", 0).realToVirtual(id) +// FIXME: Earlier, I was under the impression that we didn't need to virtualize +// communicator keyvals anymore, but without these, VASP5 DBG will not run. So, +// what is the case? +#define DESC_TO_VIRTUAL_COMM_KEYVAL(desc) \ + DESC_TO_VIRTUAL(desc, 0, int) +#define VIRTUAL_TO_DESC_COMM_KEYVAL(id) \ + VIRTUAL_TO_DESC(id, 0, comm_keyval_desc_t) #define VIRTUAL_TO_REAL_COMM_KEYVAL(id) \ - MpiOpList::instance("MpiCommKeyval", 0).virtualToReal(id) + VIRTUAL_TO_REAL(id, 0, int, comm_keyval_desc_t) #define ADD_NEW_COMM_KEYVAL(id) \ - MpiOpList::instance("MpiCommKeyval", 0).onCreate(id) + ADD_NEW(id, 0, int, comm_keyval_desc_t, COMM_KEYVAL_MASK) #define REMOVE_OLD_COMM_KEYVAL(id) \ - MpiOpList::instance("MpiCommKeyval", 0).onRemove(id) + REMOVE_OLD(id, 0, comm_keyval_desc_t, int) #define UPDATE_COMM_KEYVAL_MAP(v, r) \ - MpiOpList::instance("MpiCommKeyval", 0).updateMapping(v, r) + UPDATE_MAP(v, r, 0, comm_keyval_desc_t, int) -#if 1 -#define REAL_TO_VIRTUAL_REQUEST(id) \ - MpiRequestList::instance("MpiRequest", MPI_REQUEST_NULL).realToVirtual(id) +#define DESC_TO_VIRTUAL_REQUEST(desc) \ + DESC_TO_VIRTUAL(desc, MPI_REQUEST_NULL, MPI_Request) +#define VIRTUAL_TO_DESC_REQUEST(id) \ + VIRTUAL_TO_DESC(id, MPI_REQUEST_NULL, request_desc_t) #define VIRTUAL_TO_REAL_REQUEST(id) \ - MpiRequestList::instance("MpiRequest", MPI_REQUEST_NULL).virtualToReal(id) + VIRTUAL_TO_REAL(id, MPI_REQUEST_NULL, MPI_Request, request_desc_t) #define ADD_NEW_REQUEST(id) \ - MpiRequestList::instance("MpiRequest", MPI_REQUEST_NULL).onCreate(id) + ADD_NEW(id, MPI_REQUEST_NULL, MPI_Request, request_desc_t, REQUEST_MASK) #define REMOVE_OLD_REQUEST(id) \ - MpiRequestList::instance("MpiRequest", MPI_REQUEST_NULL).onRemove(id) + REMOVE_OLD(id, MPI_REQUEST_NULL, request_desc_t, MPI_Request) #define UPDATE_REQUEST_MAP(v, r) \ - MpiRequestList::instance("MpiRequest", MPI_REQUEST_NULL).updateMapping(v, r) -#else -#define VIRTUAL_TO_REAL_REQUEST(id) id -#define ADD_NEW_REQUEST(id) id -#define UPDATE_REQUEST_MAP(v, r) r -#endif + UPDATE_MAP(v, r, MPI_REQUEST_NULL, request_desc_t, MPI_Request) -namespace dmtcp_mpi -{ +struct comm_desc_t { + MPI_Comm real_id; // Real MPI communicator in the lower-half + int handle; // A copy of the int type handle generated from the address of this struct + int size; // Size of this communicator + int local_rank; // local rank number of this communicator + int *ranks; // list of ranks of the group. - template - class MpiVirtualization - { - public: -#ifdef JALIB_ALLOCATOR - static void* operator new(size_t nbytes, void* p) { return p; } - static void* operator new(size_t nbytes) { JALLOC_HELPER_NEW(nbytes); } - static void operator delete(void* p) { JALLOC_HELPER_DELETE(p); } -#endif - static MpiVirtualization& instance(const char *name, T nullId) - { - // FIXME: - // dmtcp_mpi::MpiVirtualization::instance("MpiGroup", 1) - // ._vIdTable.printMaps(true) - // to access _virTableMpiGroup in GDB. - // We need a cleaner way to access it. - if (strcmp(name, "MpiOp") == 0) { - static MpiVirtualization _virTableMpiOp(name, nullId); - return _virTableMpiOp; - } else if (strcmp(name, "MpiComm") == 0) { - static MpiVirtualization _virTableMpiComm(name, nullId); - return _virTableMpiComm; - } else if (strcmp(name, "MpiGroup") == 0) { - static MpiVirtualization _virTableMpiGroup(name, nullId); - return _virTableMpiGroup; - } else if (strcmp(name, "MpiType") == 0) { - static MpiVirtualization _virTableMpiType(name, nullId); - return _virTableMpiType; - } else if (strcmp(name, "MpiCommKeyval") == 0) { - static MpiVirtualization _virTableMpiCommKeyval(name, nullId); - return _virTableMpiCommKeyval; - } else if (strcmp(name, "MpiRequest") == 0) { - static MpiVirtualization _virTableMpiRequest(name, nullId); - return _virTableMpiRequest; - } else if (strcmp(name, "MpiFile") == 0) { - static MpiVirtualization _virTableMpiFile(name, nullId); - return _virTableMpiFile; - } - JWARNING(false)(name)(nullId).Text("Unhandled type"); - static MpiVirtualization _virTableNoSuchObject(name, nullId); - return _virTableNoSuchObject; - } + // struct virt_group_t *group; // Or should this field be a pointer to virt_group_t? +}; - T virtualToReal(T virt) - { - // Don't need to virtualize the null id - if (virt == _nullId) { - return virt; - } - // DMTCP virtual id table already does the lock around the table. - // FIXME: Even with an empty map, we are seeing 1 microsecond overhead. - return _vIdTable.virtualToReal(virt); - } +struct group_desc_t { + MPI_Group real_id; // Real MPI group in the lower-half + int handle; // A copy of the int type handle generated from the address of this struct + int size; // The size of this group in ranks. + int *ranks; // list of ranks of the group. + // unsigned int ggid; // Global Group ID +}; - T realToVirtual(T real) - { - // Don't need to virtualize the null id - if (real == _nullId) { - return real; - } - // DMTCP virtual id table already does the lock around the table. - return _vIdTable.realToVirtual(real); - } +enum mpi_request_kind { + COLLECTIVE, + PEER_TO_PEER +}; - // Adds the given real id to the virtual id table and creates a new - // corresponding virtual id. - // Returns the new virtual id on success, null id otherwise. - T onCreate(T real) - { - T vId = _nullId; - // Don't need to virtualize the null id - if (real == _nullId) { - return vId; - } - // DMTCP virtual id table already does the lock around the table. - if (_vIdTable.realIdExists(real)) { - // Adding a existing real id is a legal operation and - // we should not report warning/error. - // For example, MPI_Comm_group accesses the group associated with - // given communicator. It can be called multiple times from - // different localtions. They should get the same virtual id and - // real id of the same group. - // JWARNING(false)(real)(_vIdTable.getTypeStr()) - // (_vIdTable.realToVirtual(real)) - // .Text("Real id exists. Will overwrite existing mapping"); - vId = _vIdTable.realToVirtual(real); - } else { - if (!_vIdTable.getNewVirtualId(&vId)) { - JWARNING(false)(real)(_vIdTable.getTypeStr()) - .Text("Failed to create a new vId"); - } else { - _vIdTable.updateMapping(vId, real); - } - } - return vId; - } +struct request_desc_t { + MPI_Request real_id; // Real MPI request in the lower-half + int handle; // A copy of the int type handle generated from the address of this struct + mpi_request_kind request_kind; // P2P request or collective request + MPI_Status* status; // Real MPI status in the lower-half +}; - // Removes virtual id from table and returns the real id corresponding - // to the virtual id; if the virtual id does not exist in the table, - // returns null id. - T onRemove(T virt) - { - T realId = _nullId; - // Don't need to virtualize the null id - if (virt == _nullId) { - return realId; - } - // DMTCP virtual id table already does the lock around the table. - if (_vIdTable.virtualIdExists(virt)) { - realId = _vIdTable.virtualToReal(virt); - _vIdTable.erase(virt); - } else { - JWARNING(false)(virt)(_vIdTable.getTypeStr()) - .Text("Cannot delete non-existent virtual id"); - } - return realId; - } +struct op_desc_t { + MPI_Op real_id; // Real MPI operator in the lower-half + int handle; // A copy of the int type handle generated from the address of this struct + MPI_User_function *user_fn; // Function pointer to the user defined op function + int commute; // True if op is commutative. +}; - // Updates the mapping for the given virtual id to the given real id. - // Returns virtual id on success, null-id otherwise - T updateMapping(T virt, T real) - { - // If the virt is the null id, then return it directly. - // Don't need to virtualize the null id - if (virt == _nullId) { - return _nullId; - } - // DMTCP virtual id table already does the lock around the table. - if (!_vIdTable.virtualIdExists(virt)) { - JWARNING(false)(virt)(real)(_vIdTable.getTypeStr()) - (_vIdTable.realToVirtual(real)) - .Text("Cannot update mapping for a non-existent virt. id"); - return _nullId; - } - _vIdTable.updateMapping(virt, real); - return virt; - } +struct datatype_desc_t { + MPI_Datatype real_id; // Real MPI type in the lower-half + int handle; // A copy of the int type handle generated from the address of this struct + // Components of user-defined datatype. + int num_integers; + int *integers; + int num_addresses; + MPI_Aint *addresses; + int num_large_counts; + int *large_counts; + int num_datatypes; + MPI_Datatype *datatypes; // hmmm.. hierarchical restore? + int combiner; + // if is_freed is true, then we should not update the descriptor on a checkpoint. + bool is_freed; +}; - private: - // Pvt. constructor - MpiVirtualization(const char *name, T nullId) - : _vIdTable(name, (T)0, (size_t)999999), - _nullId(nullId) - { - } +struct file_desc_t { + MPI_File real_id; + int handle; +}; + +struct comm_keyval_desc_t { + int real_id; + int handle; +}; + +// FIXME: Some of these structs (request_desc_t, file_desc_t, +// comm_keyval_desc_t) Are just very thin wrappers around a real id, with no +// other information. So, should these be virtualized after all? In an "#else" +// branch of an "#if 1" in the main code, VIRTUAL_TO_REAL_REQUEST(id) is +// defined as just id, for instance. +union id_desc_t { + comm_desc_t comm; + group_desc_t group; + request_desc_t request; + op_desc_t op; + datatype_desc_t datatype; + file_desc_t file; + comm_keyval_desc_t comm_keyval; + + operator comm_desc_t () const { return comm; } + operator group_desc_t () const { return group; } + operator request_desc_t () const { return request; } + operator op_desc_t () const { return op; } + operator datatype_desc_t () const { return datatype; } + operator file_desc_t () const { return file; } + operator comm_keyval_desc_t () const { return comm_keyval; } +}; + +extern std::map idDescriptorTable; +extern int base; +extern int nextvId; +typedef typename std::map::iterator id_desc_iterator; +typedef std::pair id_desc_pair; + +id_desc_t* virtualToDescriptor(int virtId); - // Virtual Ids Table - dmtcp::VirtualIdTable _vIdTable; - // Default "NULL" value for id - T _nullId; - }; // class MpiId +datatype_desc_t* init_datatype_desc_t(MPI_Datatype realType); +op_desc_t* init_op_desc_t(MPI_Op realOp); +request_desc_t* init_request_desc_t(MPI_Request realReq); +group_desc_t* init_group_desc_t(MPI_Group realGroup); +comm_desc_t* init_comm_desc_t(MPI_Comm realComm); +file_desc_t* init_file_desc_t(MPI_File realFile); + +void destroy_datatype_desc_t(datatype_desc_t* datatype); +void destroy_op_desc_t(op_desc_t* op); +void destroy_request_desc_t(request_desc_t* request); +void destroy_group_desc_t(group_desc_t* group); +void destroy_comm_desc_t(comm_desc_t* comm); +void destroy_file_desc_t(file_desc_t* file); + +void init_comm_world(); + +namespace dmtcp_mpi +{ // FIXME: The new name should be: GlobalIdOfSimiliarComm class VirtualGlobalCommId { @@ -394,4 +447,6 @@ namespace dmtcp_mpi }; }; // namespace dmtcp_mpi + + #endif // ifndef MPI_VIRTUAL_IDS_H From 027450409384a1a06bc9def62ab86292dec48771 Mon Sep 17 00:00:00 2001 From: Leonid Belyaev Date: Thu, 31 Aug 2023 19:00:18 -0400 Subject: [PATCH 02/49] integrate ggid machinery into vid machinery --- .../mpi-wrappers/mpi_cart_wrappers.cpp | 6 +- .../mpi-wrappers/mpi_collective_wrappers.cpp | 4 +- .../mpi-wrappers/mpi_comm_wrappers.cpp | 17 +-- mpi-proxy-split/seq_num.cpp | 83 +++++------ mpi-proxy-split/seq_num.h | 6 +- mpi-proxy-split/virtual-ids.cpp | 108 ++++++++++++-- mpi-proxy-split/virtual-ids.h | 132 +++--------------- 7 files changed, 170 insertions(+), 186 deletions(-) diff --git a/mpi-proxy-split/mpi-wrappers/mpi_cart_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_cart_wrappers.cpp index 082c7ea22..2fd4af4e3 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_cart_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_cart_wrappers.cpp @@ -129,7 +129,7 @@ USER_DEFINED_WRAPPER(int, Cart_sub, (MPI_Comm) comm, int ndims = 0; MPI_Cartdim_get(comm, &ndims); MPI_Comm virtComm = ADD_NEW_COMM(*new_comm); - VirtualGlobalCommId::instance().createGlobalId(virtComm); + grant_ggid(virtComm); *new_comm = virtComm; active_comms.insert(virtComm); FncArg rs = CREATE_LOG_BUF(remain_dims, ndims * sizeof(int)); @@ -202,7 +202,7 @@ USER_DEFINED_WRAPPER(int, Cart_create, (MPI_Comm)old_comm, (int)ndims, if (retval == MPI_SUCCESS && MPI_LOGGING()) { MPI_Comm virtComm = ADD_NEW_COMM(*comm_cart); - VirtualGlobalCommId::instance().createGlobalId(virtComm); + grant_ggid(virtComm); *comm_cart = virtComm; active_comms.insert(virtComm); @@ -251,7 +251,7 @@ USER_DEFINED_WRAPPER(int, Cart_create, (MPI_Comm) old_comm, (int) ndims, RETURN_TO_UPPER_HALF(); if (retval == MPI_SUCCESS && MPI_LOGGING()) { MPI_Comm virtComm = ADD_NEW_COMM(*comm_cart); - VirtualGlobalCommId::instance().createGlobalId(virtComm); + grant_ggid(virtComm); *comm_cart = virtComm; active_comms.insert(virtComm); FncArg ds = CREATE_LOG_BUF(dims, ndims * sizeof(int)); diff --git a/mpi-proxy-split/mpi-wrappers/mpi_collective_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_collective_wrappers.cpp index eb1026946..e215bfabb 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_collective_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_collective_wrappers.cpp @@ -633,7 +633,7 @@ USER_DEFINED_WRAPPER(int, Comm_split, (MPI_Comm) comm, (int) color, (int) key, RETURN_TO_UPPER_HALF(); if (retval == MPI_SUCCESS && MPI_LOGGING()) { MPI_Comm virtComm = ADD_NEW_COMM(*newcomm); - VirtualGlobalCommId::instance().createGlobalId(virtComm); + grant_ggid(virtComm); *newcomm = virtComm; active_comms.insert(virtComm); LOG_CALL(restoreComms, Comm_split, comm, color, key, *newcomm); @@ -655,7 +655,7 @@ USER_DEFINED_WRAPPER(int, Comm_dup, (MPI_Comm) comm, (MPI_Comm *) newcomm) RETURN_TO_UPPER_HALF(); if (retval == MPI_SUCCESS && MPI_LOGGING()) { MPI_Comm virtComm = ADD_NEW_COMM(*newcomm); - VirtualGlobalCommId::instance().createGlobalId(virtComm); + grant_ggid(virtComm); *newcomm = virtComm; active_comms.insert(virtComm); LOG_CALL(restoreComms, Comm_dup, comm, *newcomm); diff --git a/mpi-proxy-split/mpi-wrappers/mpi_comm_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_comm_wrappers.cpp index 9ae2a36d7..0718e5df2 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_comm_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_comm_wrappers.cpp @@ -128,16 +128,9 @@ USER_DEFINED_WRAPPER(int, Comm_create, (MPI_Comm) comm, (MPI_Group) group, RETURN_TO_UPPER_HALF(); if (retval == MPI_SUCCESS && MPI_LOGGING()) { MPI_Comm virtComm = ADD_NEW_COMM(*newcomm); - unsigned int gid = VirtualGlobalCommId::instance() - .createGlobalId(virtComm); + grant_ggid(virtComm); *newcomm = virtComm; active_comms.insert(virtComm); - std::map::iterator it = - seq_num.find(gid); - if (it == seq_num.end()) { - seq_num[gid] = 0; - target[gid] = 0; - } LOG_CALL(restoreComms, Comm_create, comm, group, virtComm); } DMTCP_PLUGIN_ENABLE_CKPT(); @@ -213,10 +206,6 @@ USER_DEFINED_WRAPPER(int, Comm_free, (MPI_Comm *) comm) // realComm = REMOVE_OLD_COMM(*comm); // CLEAR_COMM_LOGS(*comm); active_comms.erase(*comm); - unsigned int gid = VirtualGlobalCommId::instance().getGlobalId(*comm); -#if 0 - seq_num.erase(gid); -#endif LOG_CALL(restoreComms, Comm_free, *comm); } DMTCP_PLUGIN_ENABLE_CKPT(); @@ -355,7 +344,7 @@ USER_DEFINED_WRAPPER(int, Comm_split_type, (MPI_Comm) comm, (int) split_type, RETURN_TO_UPPER_HALF(); if (retval == MPI_SUCCESS && MPI_LOGGING()) { MPI_Comm virtComm = ADD_NEW_COMM(*newcomm); - VirtualGlobalCommId::instance().createGlobalId(virtComm); + grant_ggid(virtComm); *newcomm = virtComm; active_comms.insert(virtComm); LOG_CALL(restoreComms, Comm_split_type, comm, @@ -486,7 +475,7 @@ USER_DEFINED_WRAPPER(int, Comm_create_group, (MPI_Comm) comm, int retval = MPI_Comm_create_group_internal(comm, group, tag, newcomm); if (retval == MPI_SUCCESS && MPI_LOGGING()) { MPI_Comm virtComm = ADD_NEW_COMM(*newcomm); - VirtualGlobalCommId::instance().createGlobalId(virtComm); + grant_ggid(virtComm); *newcomm = virtComm; active_comms.insert(virtComm); LOG_CALL(restoreComms, Comm_create_group, comm, group, tag, virtComm); diff --git a/mpi-proxy-split/seq_num.cpp b/mpi-proxy-split/seq_num.cpp index e3cb13acd..8eb55a027 100644 --- a/mpi-proxy-split/seq_num.cpp +++ b/mpi-proxy-split/seq_num.cpp @@ -53,10 +53,6 @@ sem_t ckpt_thread_sem; sem_t freepass_sem; sem_t freepass_sync_sem; -std::map seq_num; -std::map target; -typedef std::pair comm_seq_pair_t; - constexpr const char *comm_seq_max_db = "/plugin/MANA/comm-seq-max"; void seq_num_init() { @@ -82,30 +78,32 @@ void seq_num_destroy() { } int print_seq_nums() { - unsigned int comm_id; - unsigned long seq; + unsigned int comm_ggid; + unsigned long seq_num; int target_reached = 1; - for (comm_seq_pair_t pair : seq_num) { - comm_id = pair.first; - seq = pair.second; - printf("%d, %u, %lu\n", g_world_rank, comm_id, seq); + for (ggid_desc_pair pair : ggidDescriptorTable) { + comm_ggid = pair.first; + seq_num = pair.second->seq_num; + printf("%d, %u, %lu\n", g_world_rank, comm_ggid, seq_num); } fflush(stdout); return target_reached; } int check_seq_nums(bool exclusive) { - unsigned int comm_id; + unsigned int comm_ggid; + ggid_desc_t* ggid_desc; int target_reached = 1; - for (comm_seq_pair_t pair : seq_num) { - comm_id = pair.first; + for (ggid_desc_pair pair : ggidDescriptorTable) { + comm_ggid = pair.first; + ggid_desc = pair.second; if (exclusive) { - if (target[comm_id] + 1 > seq_num[comm_id]) { + if (ggid_desc->target_num + 1 > ggid_desc->seq_num) { target_reached = 0; break; } } else { - if (target[comm_id] > seq_num[comm_id]) { + if (ggid_desc->target_num > ggid_desc->seq_num) { target_reached = 0; break; } @@ -127,8 +125,9 @@ int twoPhaseCommit(MPI_Comm comm, } void seq_num_broadcast(MPI_Comm comm, unsigned long new_target) { - unsigned int comm_gid = VirtualGlobalCommId::instance().getGlobalId(comm); - unsigned long msg[2] = {comm_gid, new_target}; + ggid_desc_t* comm_ggid_desc = VIRTUAL_TO_DESC_COMM(comm)->ggid_desc; + unsigned int comm_ggid = comm_ggid_desc->ggid; + unsigned long msg[2] = {comm_ggid, new_target}; int comm_size; int comm_rank; int world_rank; @@ -151,7 +150,7 @@ void seq_num_broadcast(MPI_Comm comm, unsigned long new_target) { RETURN_TO_UPPER_HALF(); #ifdef DEBUG_SEQ_NUM printf("rank %d sending to rank %d new target comm %u seq %lu target %lu\n", - g_world_rank, world_rank, comm_gid, seq_num[comm_gid], new_target); + g_world_rank, world_rank, comm_ggid, comm_ggid_desc->seq_num, new_target); fflush(stdout); #endif } @@ -179,14 +178,13 @@ void commit_begin(MPI_Comm comm, bool passthrough) { MPI_STATUS_IGNORE); RETURN_TO_UPPER_HALF(); unsigned int updated_comm = (unsigned int) new_target[0]; + ggid_desc_t* updated_ggid_desc = VIRTUAL_TO_DESC_COMM(updated_comm)->ggid_desc; unsigned long updated_target = new_target[1]; - std::map::iterator it = - target.find(updated_comm); - if (it != target.end() && it->second < updated_target) { - target[updated_comm] = updated_target; + if (updated_ggid_desc != NULL && updated_ggid_desc->target_num < updated_target) { + updated_ggid_desc->target_num = updated_target; #ifdef DEBUG_SEQ_NUM printf("rank %d received new target comm %u seq %lu target %lu\n", - g_world_rank, updated_comm, seq_num[updated_comm], updated_target); + g_world_rank, updated_comm, updated_ggid_desc->seq_num, updated_target); fflush(stdout); #endif } @@ -194,15 +192,15 @@ void commit_begin(MPI_Comm comm, bool passthrough) { } pthread_mutex_lock(&seq_num_lock); current_phase = IN_CS; - unsigned int comm_gid = VirtualGlobalCommId::instance().getGlobalId(comm); - seq_num[comm_gid]++; + ggid_desc_t* comm_ggid_desc = VIRTUAL_TO_DESC_COMM(comm)->ggid_desc; + comm_ggid_desc->seq_num++; pthread_mutex_unlock(&seq_num_lock); #ifdef DEBUG_SEQ_NUM // print_seq_nums(); #endif - if (ckpt_pending && seq_num[comm_gid] > target[comm_gid]) { - target[comm_gid] = seq_num[comm_gid]; - seq_num_broadcast(comm, seq_num[comm_gid]); + if (ckpt_pending && comm_ggid_desc->seq_num > comm_ggid_desc->target_num) { + comm_ggid_desc->target_num = comm_ggid_desc->seq_num; + seq_num_broadcast(comm, comm_ggid_desc->seq_num); } } @@ -228,13 +226,14 @@ void commit_finish(MPI_Comm comm, bool passthrough) { RETURN_TO_UPPER_HALF(); unsigned int updated_comm = (unsigned int) new_target[0]; unsigned long updated_target = new_target[1]; - std::map::iterator it = - target.find(updated_comm); - if (it != target.end() && it->second < updated_target) { - target[updated_comm] = updated_target; + ggid_desc_t* updated_ggid_desc = VIRTUAL_TO_DESC_COMM(updated_comm)->ggid_desc; + // std::map::iterator it = + // target.find(updated_comm); + if (updated_ggid_desc != NULL && updated_ggid_desc->target_num < updated_target) { + updated_ggid_desc->target_num = updated_target; #ifdef DEBUG_SEQ_NUM printf("rank %d received new target comm %u seq %lu target %lu\n", - g_world_rank, updated_comm, seq_num[updated_comm], updated_target); + g_world_rank, updated_comm, updated_ggid_desc->seq_num, updated_target); fflush(stdout); #endif } @@ -243,30 +242,32 @@ void commit_finish(MPI_Comm comm, bool passthrough) { } void upload_seq_num() { - for (comm_seq_pair_t pair : seq_num) { + for (ggid_desc_pair pair : ggidDescriptorTable) { dmtcp::string comm_id_str(jalib::XToString(pair.first)); - unsigned int seq = pair.second; + unsigned int seq = pair.second->seq_num; JASSERT(dmtcp::kvdb::request64(KVDBRequest::MAX, comm_seq_max_db, comm_id_str, seq) == KVDBResponse::SUCCESS); } } -void download_targets(std::map &target) { +void download_targets(std::map &ggidDescriptorTable) { int64_t max_seq = 0; unsigned int comm_id; - for (comm_seq_pair_t pair : seq_num) { + ggid_desc_t* ggid_desc; + for (ggid_desc_pair pair : ggidDescriptorTable) { comm_id = pair.first; + ggid_desc = pair.second; dmtcp::string comm_id_str(jalib::XToString(pair.first)); JASSERT(dmtcp::kvdb::get64(comm_seq_max_db, comm_id_str, &max_seq) == KVDBResponse::SUCCESS); - target[comm_id] = max_seq; + ggid_desc->target_num = max_seq; } } -void share_seq_nums(std::map &target) { +void share_seq_nums(std::map &ggidDescriptorTable) { upload_seq_num(); dmtcp_global_barrier("mana/share-seq-num"); - download_targets(target); + download_targets(ggidDescriptorTable); } void drain_mpi_collective() { @@ -275,7 +276,7 @@ void drain_mpi_collective() { int64_t in_cs = 0; pthread_mutex_lock(&seq_num_lock); ckpt_pending = true; - share_seq_nums(target); + share_seq_nums(ggidDescriptorTable); pthread_mutex_unlock(&seq_num_lock); while (1) { char key[32] = {0}; diff --git a/mpi-proxy-split/seq_num.h b/mpi-proxy-split/seq_num.h index 745b227d5..0cb4c9003 100644 --- a/mpi-proxy-split/seq_num.h +++ b/mpi-proxy-split/seq_num.h @@ -4,6 +4,7 @@ #include #include #include +#include "virtual-ids.h" typedef enum _reset_type_t { RESUME, @@ -44,16 +45,13 @@ typedef struct __rank_state_t // Global communicator for MANA internal use extern MPI_Comm g_world_comm; -extern std::map seq_num; -extern std::map target; - // The main functions of the sequence number algorithm for MPI collectives void commit_begin(MPI_Comm comm, bool passthrough); void commit_finish(MPI_Comm comm, bool passthrough); int twoPhaseCommit(MPI_Comm comm, std::functiondoRealCollectiveComm); void drain_mpi_collective(); -void share_seq_nums(std::map &target); +void share_seq_nums(std::map &ggidDescriptorTable); int check_seq_nums(bool exclusive); int print_seq_nums(); void seq_num_init(); diff --git a/mpi-proxy-split/virtual-ids.cpp b/mpi-proxy-split/virtual-ids.cpp index 6e5acad8a..2761ef5cc 100644 --- a/mpi-proxy-split/virtual-ids.cpp +++ b/mpi-proxy-split/virtual-ids.cpp @@ -44,6 +44,7 @@ // reinterpretation will fit into an int64 pointer. This should be // fine if no real MPI Type is smaller than an int32. typedef typename std::map::iterator id_desc_iterator; +typedef typename std::map::iterator ggid_desc_iterator; // Per Yao Xu, MANA does not require the thread safety offered by // DMTCP's VirtualIdTable. We use std::map. @@ -51,40 +52,117 @@ typedef typename std::map::iterator id_desc_iterator; // int vId -> id_desc_t*, which contains rId. std::map idDescriptorTable; -// dead-simple vid generation mechanism. +// int ggid -> ggid_desc_t*, which contains CVC information. +std::map ggidDescriptorTable; + +// dead-simple vid generation mechanism, add one to get new ids. int base = 1; int nextvId = base; +// Hash function on integers. Consult https://stackoverflow.com/questions/664014/. +// Returns a hash. +int hash(int i) { + return i * 2654435761 % ((unsigned long)1 << 32); +} + +// Compute the ggid [Global Group Identifier] of a real MPI communicator. +// This consists of a hash of its integer ranks. +// OUT: rbuf +// Returns ggid. +int getggid(MPI_Comm comm, int worldRank, int commSize, int* rbuf) { + if (comm == MPI_COMM_NULL || comm == MPI_COMM_WORLD) { + return comm; + } + unsigned int ggid = 0; + + DMTCP_PLUGIN_DISABLE_CKPT(); + JUMP_TO_LOWER_HALF(lh_info.fsaddr); + NEXT_FUNC(Allgather)(&worldRank, 1, MPI_INT, + rbuf, 1, MPI_INT, comm); + RETURN_TO_UPPER_HALF(); + DMTCP_PLUGIN_ENABLE_CKPT(); + + for (int i = 0; i < commSize; i++) { + ggid ^= hash(rbuf[i] + 1); + } + + return ggid; +} + // This is a descriptor initializer. Its job is to write an initial // descriptor for a real MPI Communicator. comm_desc_t* init_comm_desc_t(MPI_Comm realComm) { comm_desc_t* desc = ((comm_desc_t*)malloc(sizeof(comm_desc_t))); + desc->real_id = realComm; - desc->ranks = NULL; + desc->global_ranks = NULL; + return desc; } +void grant_ggid(MPI_Comm virtualComm) { + int worldRank, commSize, localRank; + + comm_desc_t* desc = ((comm_desc_t*)virtualToDescriptor(*((int*)&virtualComm))); + + MPI_Comm realComm = desc->real_id; + + DMTCP_PLUGIN_DISABLE_CKPT(); + JUMP_TO_LOWER_HALF(lh_info.fsaddr); + NEXT_FUNC(Comm_rank)(MPI_COMM_WORLD, &worldRank); + NEXT_FUNC(Comm_size)(realComm, &commSize); + NEXT_FUNC(Comm_rank)(realComm, &localRank); + RETURN_TO_UPPER_HALF(); + DMTCP_PLUGIN_ENABLE_CKPT(); + + int* ranks = ((int* )malloc(sizeof(int) * commSize)); + + int ggid = getggid(realComm, worldRank, commSize, ranks); + ggid_desc_iterator it = ggidDescriptorTable.find(ggid); + + if (it != ggidDescriptorTable.end()) { + // There exists another communicator with the same ggid, i.e., + // this communicator is an alias. We use the same ggid_desc. + desc->ggid_desc = it->second; + } else { + ggid_desc_t* gd = ((ggid_desc_t *) malloc(sizeof(ggid_desc_t))); + gd->ggid = ggid; + gd->target_num = 0; + gd->seq_num = 0; + ggidDescriptorTable[ggid] = gd; + desc->ggid_desc = gd; + } + + desc->global_ranks = ranks; + desc->local_rank = localRank; + desc->size = commSize; + + return; +} + +// HACK See notes in virtual-ids.h about the reference counting. void destroy_comm_desc_t(comm_desc_t* desc) { - free(desc->ranks); + free(desc->global_ranks); + // We DO NOT free the ggid_desc. Need to think about how to do it correctly. free(desc); } group_desc_t* init_group_desc_t(MPI_Group realGroup) { group_desc_t* desc = ((group_desc_t*)malloc(sizeof(group_desc_t))); desc->real_id = realGroup; - desc->ranks = NULL; + desc->global_ranks = NULL; desc->size = 0; return desc; } void destroy_group_desc_t(group_desc_t* group) { - free(group->ranks); + free(group->global_ranks); free(group); } request_desc_t* init_request_desc_t(MPI_Request realReq) { - request_desc_t* desc = ((request_desc_t*)malloc(sizeof(request_desc_t))); // FIXME + request_desc_t* desc = ((request_desc_t*)malloc(sizeof(request_desc_t))); desc->real_id = realReq; return desc; } @@ -114,8 +192,8 @@ datatype_desc_t* init_datatype_desc_t(MPI_Datatype realType) { desc->num_addresses = 0; desc->addresses = NULL; - // TODO this was in Yao's spec sheet, but I haven't seen it used - // in any of the relevant functions. Why is this here? + // FIXME: this was in Yao's spec sheet, but I haven't seen it used in any + // of the relevant functions. Why is this here? desc->num_large_counts = 0; desc->large_counts = NULL; @@ -167,13 +245,25 @@ id_desc_t* virtualToDescriptor(int virtId) { return NULL; } -// We need to preemptively virtualize MPI_COMM_WORLD. +// FIXME: We need to preemptively virtualize MPI_COMM_WORLD, with GGID. In the +// original code, there is also a pre-initialization for MPI_COMM_NULL, but +// that doesn't appear to be necessary for CVC algorithm functionality. void init_comm_world() { comm_desc_t* comm_world = ((comm_desc_t*)malloc(sizeof(comm_desc_t))); + ggid_desc_t* comm_world_ggid = ((ggid_desc_t*)malloc(sizeof(ggid_desc_t))); + comm_world->ggid_desc = comm_world_ggid; + // The upper-half one. + comm_world_ggid->ggid = MPI_COMM_WORLD; + comm_world_ggid->seq_num = 0; + comm_world_ggid->target_num = 0; // FIXME: This WILL NOT WORK when moving to OpenMPI, ExaMPI, as written. // Yao, Twinkle, should apply their lh_constants_map strategy here. comm_world->real_id = 0x84000000; + // FIXME: the other fields are not initialized. This is an INTERNAL + // communicator descriptor, strictly for bookkeeping, not for reconstructing, + // etc. idDescriptorTable[MPI_COMM_WORLD] = ((union id_desc_t*)comm_world); + ggidDescriptorTable[MPI_COMM_WORLD] = comm_world_ggid; } diff --git a/mpi-proxy-split/virtual-ids.h b/mpi-proxy-split/virtual-ids.h index 4c7e39a0d..96477c642 100644 --- a/mpi-proxy-split/virtual-ids.h +++ b/mpi-proxy-split/virtual-ids.h @@ -227,12 +227,22 @@ #define UPDATE_REQUEST_MAP(v, r) \ UPDATE_MAP(v, r, MPI_REQUEST_NULL, request_desc_t, MPI_Request) +struct ggid_desc_t { + int ggid; // hashing results of communicator members + + unsigned long seq_num; + + unsigned long target_num; + +}; + struct comm_desc_t { MPI_Comm real_id; // Real MPI communicator in the lower-half int handle; // A copy of the int type handle generated from the address of this struct int size; // Size of this communicator int local_rank; // local rank number of this communicator - int *ranks; // list of ranks of the group. + int *global_ranks; // list of ranks of the group. + ggid_desc_t* ggid_desc; // struct virt_group_t *group; // Or should this field be a pointer to virt_group_t? }; @@ -241,7 +251,7 @@ struct group_desc_t { MPI_Group real_id; // Real MPI group in the lower-half int handle; // A copy of the int type handle generated from the address of this struct int size; // The size of this group in ranks. - int *ranks; // list of ranks of the group. + int *global_ranks; // list of ranks of the group. // unsigned int ggid; // Global Group ID }; @@ -315,13 +325,19 @@ union id_desc_t { }; extern std::map idDescriptorTable; +extern std::map ggidDescriptorTable; extern int base; extern int nextvId; typedef typename std::map::iterator id_desc_iterator; typedef std::pair id_desc_pair; +typedef typename std::map::iterator ggid_desc_iterator; +typedef std::pair ggid_desc_pair; id_desc_t* virtualToDescriptor(int virtId); +int getggid(MPI_Comm comm); +int hash(int i); + datatype_desc_t* init_datatype_desc_t(MPI_Datatype realType); op_desc_t* init_op_desc_t(MPI_Op realOp); request_desc_t* init_request_desc_t(MPI_Request realReq); @@ -337,116 +353,6 @@ void destroy_comm_desc_t(comm_desc_t* comm); void destroy_file_desc_t(file_desc_t* file); void init_comm_world(); - -namespace dmtcp_mpi -{ - - // FIXME: The new name should be: GlobalIdOfSimiliarComm - class VirtualGlobalCommId { - public: - unsigned int createGlobalId(MPI_Comm comm) { - if (comm == MPI_COMM_NULL) { - return comm; - } - unsigned int gid = 0; - int worldRank, commSize; - int realComm = VIRTUAL_TO_REAL_COMM(comm); - MPI_Comm_rank(MPI_COMM_WORLD, &worldRank); - MPI_Comm_size(comm, &commSize); - int rbuf[commSize]; - // FIXME: Use MPI_Group_translate_ranks instead of Allgather. - // MPI_Group_translate_ranks only executes locally. So we can avoid - // the cost of collective communication - // FIXME: cray cc complains "catastrophic error" that can't find - // split-process.h -#if 1 - DMTCP_PLUGIN_DISABLE_CKPT(); - JUMP_TO_LOWER_HALF(lh_info.fsaddr); - NEXT_FUNC(Allgather)(&worldRank, 1, MPI_INT, - rbuf, 1, MPI_INT, realComm); - RETURN_TO_UPPER_HALF(); - DMTCP_PLUGIN_ENABLE_CKPT(); -#else - MPI_Allgather(&worldRank, 1, MPI_INT, rbuf, 1, MPI_INT, comm); -#endif - for (int i = 0; i < commSize; i++) { - gid ^= hash(rbuf[i] + 1); - } - // FIXME: We assume the hash collision between communicators who - // have different members is low. - // FIXME: We want to prune virtual communicators to avoid long - // restart time. - // FIXME: In VASP we observed that for the same virtual communicator - // (adding 1 to each new communicator with the same rank members), - // the virtual group can change over time, using: - // virtual Comm -> real Comm -> real Group -> virtual Group - // We don't understand why since vasp does not seem to free groups. -#if 0 - // FIXME: Some code can create new communicators during execution, - // and so hash conflict may occur later. - // if the new gid already exists in the map, add one and test again - while (1) { - bool found = false; - for (std::pair idPair : globalIdTable) { - if (idPair.second == gid) { - found = true; - break; - } - } - if (found) { - gid++; - } else { - break; - } - } -#endif - globalIdTable[comm] = gid; - return gid; - } - - unsigned int getGlobalId(MPI_Comm comm) { - std::map::iterator it = - globalIdTable.find(comm); - JASSERT(it != globalIdTable.end())(comm) - .Text("Can't find communicator in the global id table"); - return it->second; - } - - static VirtualGlobalCommId& instance() { - static VirtualGlobalCommId _vGlobalId; - return _vGlobalId; - } - - private: - VirtualGlobalCommId() - { - globalIdTable[MPI_COMM_NULL] = MPI_COMM_NULL; - globalIdTable[MPI_COMM_WORLD] = MPI_COMM_WORLD; - } - - void printMap(bool flag = false) { - for (std::pair idPair : globalIdTable) { - if (flag) { - printf("virtual comm: %x, real comm: %x, global id: %x\n", - idPair.first, VIRTUAL_TO_REAL_COMM(idPair.first), - idPair.second); - fflush(stdout); - } else { - JTRACE("Print global id mapping")((void*) (uint64_t) idPair.first) - ((void*) (uint64_t) VIRTUAL_TO_REAL_COMM(idPair.first)) - ((void*) (uint64_t) idPair.second); - } - } - } - // from https://stackoverflow.com/questions/664014/ - // what-integer-hash-function-are-good-that-accepts-an-integer-hash-key - int hash(int i) { - return i * 2654435761 % ((unsigned long)1 << 32); - } - std::map globalIdTable; - }; -}; // namespace dmtcp_mpi - - +void grant_ggid(MPI_Comm virtualComm); #endif // ifndef MPI_VIRTUAL_IDS_H From c6819115208710b9af8f17891c65829cc763504c Mon Sep 17 00:00:00 2001 From: Leonid Belyaev Date: Thu, 31 Aug 2023 19:20:28 -0400 Subject: [PATCH 03/49] ggid machinery integration: FIXME commentary --- mpi-proxy-split/virtual-ids.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/mpi-proxy-split/virtual-ids.cpp b/mpi-proxy-split/virtual-ids.cpp index 2761ef5cc..4f5640b41 100644 --- a/mpi-proxy-split/virtual-ids.cpp +++ b/mpi-proxy-split/virtual-ids.cpp @@ -128,6 +128,11 @@ void grant_ggid(MPI_Comm virtualComm) { } else { ggid_desc_t* gd = ((ggid_desc_t *) malloc(sizeof(ggid_desc_t))); gd->ggid = ggid; + + // FIXME: In the old system, using VirtualGlobalCommId, These were only + // initiatialized in the wrapper function for Comm_create, (but not + // Comm_split, etc.) + // So, what is correct? gd->target_num = 0; gd->seq_num = 0; ggidDescriptorTable[ggid] = gd; @@ -141,10 +146,12 @@ void grant_ggid(MPI_Comm virtualComm) { return; } -// HACK See notes in virtual-ids.h about the reference counting. void destroy_comm_desc_t(comm_desc_t* desc) { free(desc->global_ranks); - // We DO NOT free the ggid_desc. Need to think about how to do it correctly. + // FIXME: We DO NOT free the ggid_desc, because it is aliased, and as such + // shouldn't always be removed. We could do a form of reference counting, but + // we would need to make sure that it is thread-safe. Need to think about how + // to do it correctly in the MANA architecture. free(desc); } @@ -254,6 +261,7 @@ void init_comm_world() { comm_world->ggid_desc = comm_world_ggid; // The upper-half one. comm_world_ggid->ggid = MPI_COMM_WORLD; + comm_world_ggid->seq_num = 0; comm_world_ggid->target_num = 0; From 13c9de06614e993e2f9e316e802ff34917c9543b Mon Sep 17 00:00:00 2001 From: Leonid Belyaev Date: Thu, 31 Aug 2023 20:33:54 -0400 Subject: [PATCH 04/49] fix logic bug: updated_comm -> updated_comm_ggid --- mpi-proxy-split/seq_num.cpp | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/mpi-proxy-split/seq_num.cpp b/mpi-proxy-split/seq_num.cpp index 8eb55a027..d253be335 100644 --- a/mpi-proxy-split/seq_num.cpp +++ b/mpi-proxy-split/seq_num.cpp @@ -177,14 +177,14 @@ void commit_begin(MPI_Comm comm, bool passthrough) { status.MPI_SOURCE, status.MPI_TAG, real_world_comm, MPI_STATUS_IGNORE); RETURN_TO_UPPER_HALF(); - unsigned int updated_comm = (unsigned int) new_target[0]; - ggid_desc_t* updated_ggid_desc = VIRTUAL_TO_DESC_COMM(updated_comm)->ggid_desc; + unsigned int updated_comm_ggid = (unsigned int) new_target[0]; + ggid_desc_iterator it = ggidDescriptorTable.find(updated_comm_ggid); unsigned long updated_target = new_target[1]; - if (updated_ggid_desc != NULL && updated_ggid_desc->target_num < updated_target) { - updated_ggid_desc->target_num = updated_target; + if (it != ggidDescriptorTable.end() && it->second->target_num < updated_target) { + it->second->target_num = updated_target; #ifdef DEBUG_SEQ_NUM printf("rank %d received new target comm %u seq %lu target %lu\n", - g_world_rank, updated_comm, updated_ggid_desc->seq_num, updated_target); + g_world_rank, updated_comm_ggid, it->second->seq_num, updated_target); fflush(stdout); #endif } @@ -224,16 +224,14 @@ void commit_finish(MPI_Comm comm, bool passthrough) { status.MPI_SOURCE, status.MPI_TAG, real_world_comm, MPI_STATUS_IGNORE); RETURN_TO_UPPER_HALF(); - unsigned int updated_comm = (unsigned int) new_target[0]; + unsigned int updated_comm_ggid = (unsigned int) new_target[0]; unsigned long updated_target = new_target[1]; - ggid_desc_t* updated_ggid_desc = VIRTUAL_TO_DESC_COMM(updated_comm)->ggid_desc; - // std::map::iterator it = - // target.find(updated_comm); - if (updated_ggid_desc != NULL && updated_ggid_desc->target_num < updated_target) { - updated_ggid_desc->target_num = updated_target; + ggid_desc_iterator it = ggidDescriptorTable.find(updated_comm_ggid); + if (it != ggidDescriptorTable.end() && it->second->target_num < updated_target) { + it->second->target_num = updated_target; #ifdef DEBUG_SEQ_NUM printf("rank %d received new target comm %u seq %lu target %lu\n", - g_world_rank, updated_comm, updated_ggid_desc->seq_num, updated_target); + g_world_rank, updated_comm_ggid, it->second->seq_num, updated_target); fflush(stdout); #endif } From 99b29df4e525285f753141be2827cf6a7cf126cf Mon Sep 17 00:00:00 2001 From: Leonid Belyaev Date: Thu, 31 Aug 2023 21:54:46 -0400 Subject: [PATCH 05/49] record-replay -> decode-recode --- .../mpi-wrappers/mpi_op_wrappers.cpp | 6 + mpi-proxy-split/mpi_plugin.cpp | 14 +- mpi-proxy-split/virtual-ids.cpp | 324 +++++++++++++++++- mpi-proxy-split/virtual-ids.h | 21 ++ 4 files changed, 361 insertions(+), 4 deletions(-) diff --git a/mpi-proxy-split/mpi-wrappers/mpi_op_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_op_wrappers.cpp index 540ca431c..04a6230e5 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_op_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_op_wrappers.cpp @@ -44,6 +44,12 @@ USER_DEFINED_WRAPPER(int, Op_create, RETURN_TO_UPPER_HALF(); if (retval == MPI_SUCCESS && MPI_LOGGING()) { MPI_Op virtOp = ADD_NEW_OP(*op); + // FIXME HACK: Since MPI does not provide any functions To deserialize an + // operator, we get the data at creation time. + // + // FIXME: Do we also have to reconstruct the MPI_User_function? + op_desc_t* desc = VIRTUAL_TO_DESC_OP(virtOp); + update_op_desc_t(desc, user_fn, commute); *op = virtOp; LOG_CALL(restoreOps, Op_create, user_fn, commute, virtOp); } diff --git a/mpi-proxy-split/mpi_plugin.cpp b/mpi-proxy-split/mpi_plugin.cpp index 0141322e8..560a70b5e 100644 --- a/mpi-proxy-split/mpi_plugin.cpp +++ b/mpi-proxy-split/mpi_plugin.cpp @@ -1110,6 +1110,11 @@ mpi_plugin_event_hook(DmtcpEvent_t event, DmtcpEventData_t *data) } case DMTCP_EVENT_PRECHECKPOINT: { + // FIXME: We want update_descriptors to capture all userland descriptors, + // but not any internal descriptors. A function here was creating an + // errant descriptor, but that should be fixed by PR #348. + update_descriptors(); + dmtcp_global_barrier("MPI:recordMpiInitMaps"); recordMpiInitMaps(); recordOpenFds(); dmtcp_local_barrier("MPI:GetLocalLhMmapList"); @@ -1171,9 +1176,12 @@ mpi_plugin_event_hook(DmtcpEvent_t event, DmtcpEventData_t *data) // record-replay.cpp setCartesianCommunicator(lh_info.getCartesianCommunicatorFptr); #endif - dmtcp_global_barrier("MPI:restoreMpiLogState"); - restoreMpiLogState(); // record-replay.cpp - dmtcp_global_barrier("MPI:record-replay.cpp-void"); + dmtcp_global_barrier("MPI:reconstruct_with_descriptors"); + // restoreMpiLogState(); // record-replay.cpp + reconstruct_with_descriptors(); + // FIXME: I place reconstruct_with_descriptors in the analogous place to + // restoreMpiLogState. + dmtcp_global_barrier("MPI:virtual-ids.cpp-void"); replayMpiP2pOnRestart(); // p2p_log_replay.cpp dmtcp_local_barrier("MPI:p2p_log_replay.cpp-void"); seq_num_reset(RESTART); diff --git a/mpi-proxy-split/virtual-ids.cpp b/mpi-proxy-split/virtual-ids.cpp index 4f5640b41..2b6804596 100644 --- a/mpi-proxy-split/virtual-ids.cpp +++ b/mpi-proxy-split/virtual-ids.cpp @@ -59,6 +59,10 @@ std::map ggidDescriptorTable; int base = 1; int nextvId = base; +// Internal /real/ group, used to reconstruct and update. A way to access the +// whole bag of global ranks. +MPI_Group g_world_group; + // Hash function on integers. Consult https://stackoverflow.com/questions/664014/. // Returns a hash. int hash(int i) { @@ -146,6 +150,86 @@ void grant_ggid(MPI_Comm virtualComm) { return; } +// FIXME: In some cases, this could cause a group to be needlessly constructed. +// A more efficient design would link groups and comms, but would introduce +// complexity. +void update_comm_desc_t(comm_desc_t* desc) { + // We need to virtualize MPI_COMM_WORLD, but should not reconstruct it. + if (desc->handle == MPI_COMM_WORLD) { + return; + } + + // Cleanup from a previous CKPT, if one occured. FIXME: Is this the correct + // way? + free(desc->global_ranks); + desc->global_ranks = NULL; + + MPI_Group group; + + // FIXME: Should we free this group? + DMTCP_PLUGIN_DISABLE_CKPT(); + JUMP_TO_LOWER_HALF(lh_info.fsaddr); + NEXT_FUNC(Comm_group)(desc->real_id, &group); + RETURN_TO_UPPER_HALF(); + + int groupSize = 0; + + JUMP_TO_LOWER_HALF(lh_info.fsaddr); + NEXT_FUNC(Group_size)(group, &groupSize); + RETURN_TO_UPPER_HALF(); + + desc->size = groupSize; + + int* local_ranks = ((int*)malloc(sizeof(int) * groupSize)); + int* global_ranks = ((int*)malloc(sizeof(int) * groupSize)); + for (int i = 0; i < groupSize; i++) { + local_ranks[i] = i; + } + + JUMP_TO_LOWER_HALF(lh_info.fsaddr); + NEXT_FUNC(Group_translate_ranks)(group, groupSize, local_ranks, g_world_group, global_ranks); + RETURN_TO_UPPER_HALF(); + DMTCP_PLUGIN_ENABLE_CKPT(); + + free(local_ranks); + desc->global_ranks = global_ranks; +} + +void reconstruct_with_comm_desc_t(comm_desc_t* desc) { + if (desc->handle == MPI_COMM_WORLD) { + return; + } +#ifdef DEBUG_VIDS + printf("reconstruct_comm_desc_t comm: %x -> %x\n", desc->handle, desc->real_id); + + fflush(stdout); + printf("reconstruct_with_comm_desc_t comm size: 0x%x\n", desc->size); + + fflush(stdout); + printf("reconstruct_with_comm_desc_t ranks:"); + + fflush(stdout); + for (int i = 0; i < desc->size; i++) { + printf(" %i", desc->ranks[i]); + } + printf("\n"); + + fflush(stdout); + fflush(stdout); +#endif + + MPI_Group group; + + // We recreate the communicator with the reconstructed group and MPI_COMM_WORLD. + + DMTCP_PLUGIN_DISABLE_CKPT(); + JUMP_TO_LOWER_HALF(lh_info.fsaddr); + NEXT_FUNC(Group_incl)(g_world_group, desc->size, desc->global_ranks, &group); + NEXT_FUNC(Comm_create_group)(MPI_COMM_WORLD, group, 0, &desc->real_id); + RETURN_TO_UPPER_HALF(); + DMTCP_PLUGIN_ENABLE_CKPT(); +} + void destroy_comm_desc_t(comm_desc_t* desc) { free(desc->global_ranks); // FIXME: We DO NOT free the ggid_desc, because it is aliased, and as such @@ -163,6 +247,42 @@ group_desc_t* init_group_desc_t(MPI_Group realGroup) { return desc; } +void update_group_desc_t(group_desc_t* group) { + // Cleanup from a previous checkpoint. + free(group->global_ranks); + group->global_ranks = NULL; + + int groupSize; + + DMTCP_PLUGIN_DISABLE_CKPT(); + JUMP_TO_LOWER_HALF(lh_info.fsaddr); + NEXT_FUNC(Group_size)(group->real_id, &groupSize); + RETURN_TO_UPPER_HALF(); + + int* local_ranks = ((int*)malloc(sizeof(int) * groupSize)); + int* global_ranks = ((int*)malloc(sizeof(int) * groupSize)); + for (int i = 0; i < groupSize; i++) { + local_ranks[i] = i; + } + + JUMP_TO_LOWER_HALF(lh_info.fsaddr); + NEXT_FUNC(Group_translate_ranks)(group->real_id, groupSize, local_ranks, g_world_group, global_ranks); + RETURN_TO_UPPER_HALF(); + + free(local_ranks); + group->global_ranks = global_ranks; + group->size = groupSize; + DMTCP_PLUGIN_ENABLE_CKPT(); +} + +void reconstruct_with_group_desc_t(group_desc_t* group) { + DMTCP_PLUGIN_DISABLE_CKPT(); + JUMP_TO_LOWER_HALF(lh_info.fsaddr); + NEXT_FUNC(Group_incl)(g_world_group, group->size, group->global_ranks, &group->real_id); + RETURN_TO_UPPER_HALF(); + DMTCP_PLUGIN_ENABLE_CKPT(); +} + void destroy_group_desc_t(group_desc_t* group) { free(group->global_ranks); free(group); @@ -185,6 +305,15 @@ op_desc_t* init_op_desc_t(MPI_Op realOp) { return desc; } +// This update function is special, for the information we need is +// only available at creation time, and not with MPI calls. We +// manually invoke this function at creation time. Mercifully, there +// is only one way to create a new operator, AFAIK. +void update_op_desc_t(op_desc_t* op, MPI_User_function* user_fn, int commute) { + op->user_fn = user_fn; + op->commute = commute; +} + void destroy_op_desc_t(op_desc_t* op) { free(op); } @@ -208,10 +337,112 @@ datatype_desc_t* init_datatype_desc_t(MPI_Datatype realType) { desc->datatypes = NULL; desc->combiner = 0; - desc->is_freed = false; + + // FIXME: I had a design that would keep freed descriptors, to implement + // doubly-derived reconstruction. But we can probably ignore this for now. + + // desc->is_freed = false; return desc; } +void update_datatype_desc_t(datatype_desc_t* datatype) { + // Free the existing memory, if it is not NULL. (i.e., from an older checkpoint) + free(datatype->integers); + free(datatype->addresses); + free(datatype->datatypes); + datatype->integers = NULL; + datatype->addresses = NULL; + datatype->datatypes = NULL; + + DMTCP_PLUGIN_DISABLE_CKPT(); + + JUMP_TO_LOWER_HALF(lh_info.fsaddr); + NEXT_FUNC(Type_get_envelope)(datatype->real_id, &datatype->num_integers, &datatype->num_addresses, &datatype->num_datatypes, &datatype->combiner); + RETURN_TO_UPPER_HALF(); + + // FIXME: "If combiner is MPI_COMBINER_NAMED then it is erroneous + // to call MPI_TYPE_GET_CONTENTS. " So, we might want to exit + // early (although this shouldn't happen anyway, as nobody should + // make a virtualization of these types) + + // Use the malloc in the upper-half. + datatype->integers = ((int*)malloc(sizeof(int) * datatype->num_integers)); + datatype->addresses = ((MPI_Aint*)malloc(sizeof(MPI_Aint) * datatype->num_addresses)); + datatype->datatypes = ((MPI_Datatype*)malloc(sizeof(MPI_Datatype) * datatype->num_datatypes)); + + JUMP_TO_LOWER_HALF(lh_info.fsaddr); + NEXT_FUNC(Type_get_contents)(datatype->real_id, datatype->num_integers, datatype->num_addresses, datatype->num_datatypes, datatype->integers, datatype->addresses, datatype->datatypes); + RETURN_TO_UPPER_HALF(); + DMTCP_PLUGIN_ENABLE_CKPT(); +} + +// Reconstruction arguments taken from here: +// https://www.mpi-forum.org/docs/mpi-3.1/mpi31-report/node90.htm +void reconstruct_with_datatype_desc_t(datatype_desc_t* datatype) { + DMTCP_PLUGIN_DISABLE_CKPT(); + JUMP_TO_LOWER_HALF(lh_info.fsaddr); + switch (datatype->combiner) { + case MPI_COMBINER_DUP: + NEXT_FUNC(Type_dup)(datatype->datatypes[0], &datatype->real_id); + break; + case MPI_COMBINER_NAMED: + // if the type is named and predefined, we shouldn't need to do anything. + // "If combiner is MPI_COMBINER_NAMED then it is erroneous to call MPI_TYPE_GET_CONTENTS. " + break; + case MPI_COMBINER_VECTOR: + NEXT_FUNC(Type_vector)(datatype->integers[0], datatype->integers[1], datatype->integers[2], datatype->datatypes[0], &datatype->real_id); + break; + case MPI_COMBINER_HVECTOR: + NEXT_FUNC(Type_hvector)(datatype->integers[0], datatype->integers[1], datatype->addresses[0], datatype->datatypes[0], &datatype->real_id); + break; + case MPI_COMBINER_INDEXED: + NEXT_FUNC(Type_indexed)(datatype->integers[0], datatype->integers + 1, datatype->integers + 1 + datatype->integers[0], datatype->datatypes[0], &datatype->real_id); + break; + case MPI_COMBINER_HINDEXED: + NEXT_FUNC(Type_hindexed)(datatype->integers[0], datatype->integers + 1, datatype->addresses, datatype->datatypes[0], &datatype->real_id); + break; + case MPI_COMBINER_INDEXED_BLOCK: + NEXT_FUNC(Type_create_indexed_block)(datatype->integers[0], datatype->integers[1], datatype->integers + 2, datatype->datatypes[0], &datatype->real_id); + break; + case MPI_COMBINER_HINDEXED_BLOCK: + NEXT_FUNC(Type_create_hindexed_block)(datatype->integers[0], datatype->integers[1], datatype->addresses, datatype->datatypes[0], &datatype->real_id); + break; + case MPI_COMBINER_STRUCT: + NEXT_FUNC(Type_create_struct)(datatype->integers[0], datatype->integers + 1, datatype->addresses, datatype->datatypes, &datatype->real_id); + break; + case MPI_COMBINER_SUBARRAY: + NEXT_FUNC(Type_create_subarray)(datatype->integers[0], datatype->integers + 1, datatype->integers + 1 + datatype->integers[0], datatype->integers + 1 + 2 * datatype->integers[0], datatype->integers[1 + 3 * datatype->integers[0]], datatype->datatypes[0], &datatype->real_id); + break; + case MPI_COMBINER_DARRAY: + NEXT_FUNC(Type_create_darray)(datatype->integers[0], datatype->integers[1], datatype->integers[2], datatype->integers + 3, datatype->integers + 3 + datatype->integers[2], datatype->integers + 3 + 2 * datatype->integers[2], datatype->integers + 3 + 3 * datatype->integers[2], datatype->integers[3 + 4 * datatype->integers[2]], datatype->datatypes[0], &datatype->real_id); + case MPI_COMBINER_CONTIGUOUS: + NEXT_FUNC(Type_contiguous)(datatype->integers[0], datatype->datatypes[0], &datatype->real_id); + break; + case MPI_COMBINER_F90_REAL: + NEXT_FUNC(Type_create_f90_real)(datatype->integers[0], datatype->integers[1], &datatype->real_id); + break; + case MPI_COMBINER_F90_COMPLEX: + NEXT_FUNC(Type_create_f90_complex)(datatype->integers[0], datatype->integers[1], &datatype->real_id); + break; + case MPI_COMBINER_F90_INTEGER: + NEXT_FUNC(Type_create_f90_integer)(datatype->integers[0], &datatype->real_id); + break; + case MPI_COMBINER_RESIZED: + NEXT_FUNC(Type_create_resized)(datatype->datatypes[0], datatype->addresses[0], datatype->addresses[1], &datatype->real_id); + break; + default: + break; + } + // FIXME: This is needed to make the reconstructed type usable in the + // network. But there is a small potential for a double-commit bug here. Is + // there an mpi function that checks if a type has been committed? + // + // Need to research if double-commit is an issue. + NEXT_FUNC(Type_commit)(&datatype->real_id); + RETURN_TO_UPPER_HALF(); + DMTCP_PLUGIN_ENABLE_CKPT(); +} + void destroy_datatype_desc_t(datatype_desc_t* datatype) { free(datatype->integers); free(datatype->addresses); @@ -268,6 +499,7 @@ void init_comm_world() { // FIXME: This WILL NOT WORK when moving to OpenMPI, ExaMPI, as written. // Yao, Twinkle, should apply their lh_constants_map strategy here. comm_world->real_id = 0x84000000; + comm_world->handle = MPI_COMM_WORLD; // FIXME: the other fields are not initialized. This is an INTERNAL // communicator descriptor, strictly for bookkeeping, not for reconstructing, // etc. @@ -275,3 +507,93 @@ void init_comm_world() { idDescriptorTable[MPI_COMM_WORLD] = ((union id_desc_t*)comm_world); ggidDescriptorTable[MPI_COMM_WORLD] = comm_world_ggid; } + +// FIXME: This gets us a copy of the world group that we can use to get all the +// ranks. It's a real id, not a virtual one. Is this okay? +void write_g_world_group() { + DMTCP_PLUGIN_DISABLE_CKPT(); + JUMP_TO_LOWER_HALF(lh_info.fsaddr); + NEXT_FUNC(Comm_group)(MPI_COMM_WORLD, &g_world_group); + RETURN_TO_UPPER_HALF(); + DMTCP_PLUGIN_ENABLE_CKPT(); +} + +void destroy_g_world_group() { + DMTCP_PLUGIN_DISABLE_CKPT(); + JUMP_TO_LOWER_HALF(lh_info.fsaddr); + NEXT_FUNC(Group_free)(&g_world_group); + RETURN_TO_UPPER_HALF(); + DMTCP_PLUGIN_ENABLE_CKPT(); +} + + +// For all descriptors, update the respective information. +void update_descriptors() { + write_g_world_group(); + for (id_desc_pair pair : idDescriptorTable) { + switch (pair.first & 0xFF000000) { + case UNDEFINED_MASK: + break; + case COMM_MASK: + update_comm_desc_t((comm_desc_t*)pair.second); + break; + case GROUP_MASK: + update_group_desc_t((group_desc_t*)pair.second); + break; + case REQUEST_MASK: + // update_request_desc_t((request_desc_t*)pair.second); + break; + case OP_MASK: + // update_op_desc_t((op_desc_t*)pair.second); + // HACK: This must be called on Op_create. see update_op_desc + break; + case DATATYPE_MASK: + update_datatype_desc_t((datatype_desc_t*)pair.second); + break; + case FILE_MASK: + // update_file_desc_t((file_desc_t*)pair.second); + break; + case COMM_KEYVAL_MASK: + // update_comm_keyval_desc_t((comm_keyval_desc_t*)pair.second); + break; + default: + break; + } + } + destroy_g_world_group(); +} + +// For all descriptors, set its real ID to the one uniquely described by its fields. +void reconstruct_with_descriptors() { + write_g_world_group(); + for (id_desc_pair pair : idDescriptorTable) { + switch (pair.first & 0xFF000000) { + case UNDEFINED_MASK: + break; + case COMM_MASK: + reconstruct_with_comm_desc_t((comm_desc_t*)pair.second); + break; + case GROUP_MASK: + reconstruct_with_group_desc_t((group_desc_t*)pair.second); + break; + case REQUEST_MASK: + // update_request_desc_t((request_desc_t*)pair.second); + break; + case OP_MASK: + reconstruct_with_op_desc_t((op_desc_t*)pair.second); + break; + case DATATYPE_MASK: + reconstruct_with_datatype_desc_t((datatype_desc_t*)pair.second); + break; + case FILE_MASK: + // update_file_desc_t((file_desc_t*)pair.second); + break; + case COMM_KEYVAL_MASK: + // update_comm_keyval_desc_t((comm_keyval_desc_t*)pair.second); + break; + default: + break; + } + } + destroy_g_world_group(); +} diff --git a/mpi-proxy-split/virtual-ids.h b/mpi-proxy-split/virtual-ids.h index 96477c642..9123f5560 100644 --- a/mpi-proxy-split/virtual-ids.h +++ b/mpi-proxy-split/virtual-ids.h @@ -328,6 +328,7 @@ extern std::map idDescriptorTable; extern std::map ggidDescriptorTable; extern int base; extern int nextvId; +extern MPI_Group g_world_group; typedef typename std::map::iterator id_desc_iterator; typedef std::pair id_desc_pair; typedef typename std::map::iterator ggid_desc_iterator; @@ -355,4 +356,24 @@ void destroy_file_desc_t(file_desc_t* file); void init_comm_world(); void grant_ggid(MPI_Comm virtualComm); +void update_datatype_desc_t(datatype_desc_t* datatype); +void update_op_desc_t(op_desc_t* op, MPI_User_function* user_fn, int commute); +void update_request_desc_t(request_desc_t* request); +void update_group_desc_t(group_desc_t* group); +void update_comm_desc_t(comm_desc_t* comm); +void update_file_desc_t(file_desc_t* file); + +void reconstruct_with_datatype_desc_t(datatype_desc_t* datatype); +void reconstruct_with_op_desc_t(op_desc_t* op); +void reconstruct_with_request_desc_t(request_desc_t* request); +void reconstruct_with_group_desc_t(group_desc_t* group); +void reconstruct_with_comm_desc_t(comm_desc_t* comm); +void reconstruct_with_file_desc_t(file_desc_t* file); + +void update_descriptors(); +void reconstruct_with_descriptors(); + +void destroy_g_world_group(); +void write_g_world_group(); + #endif // ifndef MPI_VIRTUAL_IDS_H From 7ae5022876f8613a5a3444554ce8f394be76faa1 Mon Sep 17 00:00:00 2001 From: Leonid Belyaev Date: Thu, 31 Aug 2023 22:09:47 -0400 Subject: [PATCH 06/49] actually remove old descriptors when RID is freed --- mpi-proxy-split/mpi-wrappers/mpi_comm_wrappers.cpp | 6 ++++-- mpi-proxy-split/mpi-wrappers/mpi_group_wrappers.cpp | 5 +++-- mpi-proxy-split/mpi-wrappers/mpi_op_wrappers.cpp | 3 ++- mpi-proxy-split/mpi-wrappers/mpi_type_wrappers.cpp | 4 +++- 4 files changed, 12 insertions(+), 6 deletions(-) diff --git a/mpi-proxy-split/mpi-wrappers/mpi_comm_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_comm_wrappers.cpp index 0718e5df2..b1d97d530 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_comm_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_comm_wrappers.cpp @@ -203,8 +203,10 @@ USER_DEFINED_WRAPPER(int, Comm_free, (MPI_Comm *) comm) // we'll need to replay this call to reconstruct any other comms that // might have been created using this comm. // - // realComm = REMOVE_OLD_COMM(*comm); - // CLEAR_COMM_LOGS(*comm); + // + // FIXME: Now, we remove it. O(1) decode-recode changes this. + realComm = REMOVE_OLD_COMM(*comm); + CLEAR_COMM_LOGS(*comm); active_comms.erase(*comm); LOG_CALL(restoreComms, Comm_free, *comm); } diff --git a/mpi-proxy-split/mpi-wrappers/mpi_group_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_group_wrappers.cpp index 2ceb0fcca..66695004d 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_group_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_group_wrappers.cpp @@ -101,8 +101,9 @@ USER_DEFINED_WRAPPER(int, Group_free, (MPI_Group *) group) // to replay this call to reconstruct any comms that might // have been created using this group. // - // realGroup = REMOVE_OLD_GROUP(*group); - // CLEAR_GROUP_LOGS(*group); + // FIXME: See comment in Comm_free wrapper. + realGroup = REMOVE_OLD_GROUP(*group); + CLEAR_GROUP_LOGS(*group); LOG_CALL(restoreGroups, Group_free, *group); } DMTCP_PLUGIN_ENABLE_CKPT(); diff --git a/mpi-proxy-split/mpi-wrappers/mpi_op_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_op_wrappers.cpp index 04a6230e5..e036fad18 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_op_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_op_wrappers.cpp @@ -73,7 +73,8 @@ USER_DEFINED_WRAPPER(int, Op_free, (MPI_Op*) op) // to replay this call to reconstruct any new op that might // have been created using this op. // - // realOp = REMOVE_OLD_OP(*op); + // FIXME: See comment in Comm_free wrapper. + realOp = REMOVE_OLD_OP(*op); LOG_CALL(restoreOps, Op_free, *op); } DMTCP_PLUGIN_ENABLE_CKPT(); diff --git a/mpi-proxy-split/mpi-wrappers/mpi_type_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_type_wrappers.cpp index fcdd8ffe9..b03d735b7 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_type_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_type_wrappers.cpp @@ -57,7 +57,9 @@ USER_DEFINED_WRAPPER(int, Type_free, (MPI_Datatype *) type) // to replay this call to reconstruct any new type that might // have been created using this type. // - // realType = REMOVE_OLD_TYPE(*type); + // FIXME: Now, we remove this type. Otherwise, if we try to decode a type + // that has been freed in the lower half, MPI will be upset. + realType = REMOVE_OLD_TYPE(*type); LOG_CALL(restoreTypes, Type_free, *type); } DMTCP_PLUGIN_ENABLE_CKPT(); From abd7ef76ea6979ee3700391dd9dea04a17b03dbf Mon Sep 17 00:00:00 2001 From: Leonid Belyaev Date: Fri, 1 Sep 2023 15:52:03 -0400 Subject: [PATCH 07/49] eliminate one lookup in seq_num_broadcast --- mpi-proxy-split/seq_num.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mpi-proxy-split/seq_num.cpp b/mpi-proxy-split/seq_num.cpp index d253be335..b66e3e597 100644 --- a/mpi-proxy-split/seq_num.cpp +++ b/mpi-proxy-split/seq_num.cpp @@ -125,7 +125,8 @@ int twoPhaseCommit(MPI_Comm comm, } void seq_num_broadcast(MPI_Comm comm, unsigned long new_target) { - ggid_desc_t* comm_ggid_desc = VIRTUAL_TO_DESC_COMM(comm)->ggid_desc; + comm_desc_t* comm_desc = VIRTUAL_TO_DESC_COMM(comm); + ggid_desc_t* comm_ggid_desc = comm_desc->ggid_desc; unsigned int comm_ggid = comm_ggid_desc->ggid; unsigned long msg[2] = {comm_ggid, new_target}; int comm_size; @@ -134,7 +135,7 @@ void seq_num_broadcast(MPI_Comm comm, unsigned long new_target) { MPI_Comm_size(comm, &comm_size); MPI_Comm_rank(comm, &comm_rank); MPI_Group world_group, local_group; - MPI_Comm real_local_comm = VIRTUAL_TO_REAL_COMM(comm); + MPI_Comm real_local_comm = comm_desc->real_id; MPI_Comm real_world_comm = VIRTUAL_TO_REAL_COMM(g_world_comm); JUMP_TO_LOWER_HALF(lh_info.fsaddr); NEXT_FUNC(Comm_group)(real_world_comm, &world_group); From 39589d88982d6e661da8743e0bdeb50e1c38dbb2 Mon Sep 17 00:00:00 2001 From: Leonid Belyaev Date: Fri, 1 Sep 2023 16:29:01 -0400 Subject: [PATCH 08/49] fixup descriptor deletion --- mpi-proxy-split/mpi-wrappers/mpi_comm_wrappers.cpp | 2 +- mpi-proxy-split/mpi-wrappers/mpi_group_wrappers.cpp | 2 +- mpi-proxy-split/mpi-wrappers/mpi_op_wrappers.cpp | 2 +- mpi-proxy-split/mpi-wrappers/mpi_type_wrappers.cpp | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/mpi-proxy-split/mpi-wrappers/mpi_comm_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_comm_wrappers.cpp index b1d97d530..6be857a33 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_comm_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_comm_wrappers.cpp @@ -205,7 +205,7 @@ USER_DEFINED_WRAPPER(int, Comm_free, (MPI_Comm *) comm) // // // FIXME: Now, we remove it. O(1) decode-recode changes this. - realComm = REMOVE_OLD_COMM(*comm); + REMOVE_OLD_COMM(*comm); CLEAR_COMM_LOGS(*comm); active_comms.erase(*comm); LOG_CALL(restoreComms, Comm_free, *comm); diff --git a/mpi-proxy-split/mpi-wrappers/mpi_group_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_group_wrappers.cpp index 66695004d..e9c7e1508 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_group_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_group_wrappers.cpp @@ -102,7 +102,7 @@ USER_DEFINED_WRAPPER(int, Group_free, (MPI_Group *) group) // have been created using this group. // // FIXME: See comment in Comm_free wrapper. - realGroup = REMOVE_OLD_GROUP(*group); + REMOVE_OLD_GROUP(*group); CLEAR_GROUP_LOGS(*group); LOG_CALL(restoreGroups, Group_free, *group); } diff --git a/mpi-proxy-split/mpi-wrappers/mpi_op_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_op_wrappers.cpp index e036fad18..35b84eaac 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_op_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_op_wrappers.cpp @@ -74,7 +74,7 @@ USER_DEFINED_WRAPPER(int, Op_free, (MPI_Op*) op) // have been created using this op. // // FIXME: See comment in Comm_free wrapper. - realOp = REMOVE_OLD_OP(*op); + REMOVE_OLD_OP(*op); LOG_CALL(restoreOps, Op_free, *op); } DMTCP_PLUGIN_ENABLE_CKPT(); diff --git a/mpi-proxy-split/mpi-wrappers/mpi_type_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_type_wrappers.cpp index b03d735b7..5054f0c55 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_type_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_type_wrappers.cpp @@ -59,7 +59,7 @@ USER_DEFINED_WRAPPER(int, Type_free, (MPI_Datatype *) type) // // FIXME: Now, we remove this type. Otherwise, if we try to decode a type // that has been freed in the lower half, MPI will be upset. - realType = REMOVE_OLD_TYPE(*type); + REMOVE_OLD_TYPE(*type); LOG_CALL(restoreTypes, Type_free, *type); } DMTCP_PLUGIN_ENABLE_CKPT(); From 567dbd143945d2a5c7b7b93b76c81b88b2a605a1 Mon Sep 17 00:00:00 2001 From: Leonid Belyaev Date: Fri, 1 Sep 2023 16:57:08 -0400 Subject: [PATCH 09/49] CRIPPLE unit-test Makefile, for now. --- mpi-proxy-split/unit-test/Makefile | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/mpi-proxy-split/unit-test/Makefile b/mpi-proxy-split/unit-test/Makefile index 078b569e8..77dbf4f8f 100644 --- a/mpi-proxy-split/unit-test/Makefile +++ b/mpi-proxy-split/unit-test/Makefile @@ -48,7 +48,8 @@ TEST_LD_FLAGS=${DMTCP_ROOT}/src/libdmtcpinternal.a \ DRAIN_TEST_OBJS = drain-send-recv-test.o ../p2p_drain_send_recv.cpp \ ../p2p_log_replay.cpp ${DMTCP_ROOT}/src/lookup_service.o -default: ${TEST_BINS} +# default: ${TEST_BINS} +default: ; ../record-replay.o ../drain_send_recv_packets.o: @make -C .. @@ -72,8 +73,10 @@ drain-send-recv-test.exe: ${DRAIN_TEST_OBJS} gdb-%: ${TEST_BINS} gdb --args ./record-replay-$*-test.exe -check: ${TEST_BINS} - @for x in $^; do ${MPIRUN} -n 1 ./$$x; done +# check: ${TEST_BINS} + # @for x in $^; do ${MPIRUN} -n 1 ./$$x; done + +check: ; clean: tidy rm -f ${TEST_BINS} ${TEST_OBJS} From 7310355716caa1cbf36f8f0849302727e6406c4a Mon Sep 17 00:00:00 2001 From: Leonid Belyaev Date: Mon, 18 Sep 2023 19:52:49 -0400 Subject: [PATCH 10/49] Remove all LOG_CALL --- mpi-proxy-split/mpi-wrappers/mpi_cart_wrappers.cpp | 5 ----- mpi-proxy-split/mpi-wrappers/mpi_collective_wrappers.cpp | 5 ----- mpi-proxy-split/mpi-wrappers/mpi_comm_wrappers.cpp | 7 ------- mpi-proxy-split/mpi-wrappers/mpi_group_wrappers.cpp | 3 --- mpi-proxy-split/mpi-wrappers/mpi_op_wrappers.cpp | 2 -- mpi-proxy-split/mpi-wrappers/mpi_type_wrappers.cpp | 9 --------- mpi-proxy-split/mpi-wrappers/mpi_wrappers.cpp | 2 -- mpi-proxy-split/unit-test/record-replay-cart-test.cpp | 2 -- mpi-proxy-split/unit-test/record-replay-comm-test.cpp | 3 --- mpi-proxy-split/unit-test/record-replay-types-test.cpp | 3 --- 10 files changed, 41 deletions(-) diff --git a/mpi-proxy-split/mpi-wrappers/mpi_cart_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_cart_wrappers.cpp index 2fd4af4e3..04fe5c749 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_cart_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_cart_wrappers.cpp @@ -78,7 +78,6 @@ USER_DEFINED_WRAPPER(int, Cart_map, (MPI_Comm) comm, (int) ndims, if (retval == MPI_SUCCESS && MPI_LOGGING()) { FncArg ds = CREATE_LOG_BUF(dims, ndims * sizeof(int)); FncArg ps = CREATE_LOG_BUF(periods, ndims * sizeof(int)); - LOG_CALL(restoreCarts, Cart_map, comm, ndims, ds, ps, newrank); } DMTCP_PLUGIN_ENABLE_CKPT(); return retval; @@ -108,7 +107,6 @@ USER_DEFINED_WRAPPER(int, Cart_shift, (MPI_Comm) comm, (int) direction, disp, rank_source, rank_dest); RETURN_TO_UPPER_HALF(); if (retval == MPI_SUCCESS && MPI_LOGGING()) { - LOG_CALL(restoreCarts, Cart_shift, comm, direction, disp, *rank_source, *rank_dest); } DMTCP_PLUGIN_ENABLE_CKPT(); @@ -133,7 +131,6 @@ USER_DEFINED_WRAPPER(int, Cart_sub, (MPI_Comm) comm, *new_comm = virtComm; active_comms.insert(virtComm); FncArg rs = CREATE_LOG_BUF(remain_dims, ndims * sizeof(int)); - LOG_CALL(restoreCarts, Cart_sub, comm, ndims, rs, virtComm); } DMTCP_PLUGIN_ENABLE_CKPT(); return retval; @@ -208,7 +205,6 @@ USER_DEFINED_WRAPPER(int, Cart_create, (MPI_Comm)old_comm, (int)ndims, FncArg ds = CREATE_LOG_BUF(dims, ndims * sizeof(int)); FncArg ps = CREATE_LOG_BUF(periods, ndims * sizeof(int)); - LOG_CALL(restoreCarts, Cart_create, old_comm, ndims, ds, ps, reorder, virtComm); } DMTCP_PLUGIN_ENABLE_CKPT(); @@ -256,7 +252,6 @@ USER_DEFINED_WRAPPER(int, Cart_create, (MPI_Comm) old_comm, (int) ndims, active_comms.insert(virtComm); FncArg ds = CREATE_LOG_BUF(dims, ndims * sizeof(int)); FncArg ps = CREATE_LOG_BUF(periods, ndims * sizeof(int)); - LOG_CALL(restoreCarts, Cart_create, old_comm, ndims, ds, ps, reorder, virtComm); } DMTCP_PLUGIN_ENABLE_CKPT(); diff --git a/mpi-proxy-split/mpi-wrappers/mpi_collective_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_collective_wrappers.cpp index e215bfabb..f6fea5d94 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_collective_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_collective_wrappers.cpp @@ -124,7 +124,6 @@ USER_DEFINED_WRAPPER(int, Ibcast, if (retval == MPI_SUCCESS && MPI_LOGGING()) { MPI_Request virtRequest = ADD_NEW_REQUEST(*request); *request = virtRequest; - LOG_CALL(restoreRequests, Ibcast, buffer, count, datatype, root, comm, *request); #ifdef USE_REQUEST_LOG logRequestInfo(*request, IBCAST_REQUEST); @@ -161,7 +160,6 @@ USER_DEFINED_WRAPPER(int, Ibarrier, (MPI_Comm) comm, (MPI_Request *) request) if (retval == MPI_SUCCESS && MPI_LOGGING()) { MPI_Request virtRequest = ADD_NEW_REQUEST(*request); *request = virtRequest; - LOG_CALL(restoreRequests, Ibarrier, comm, *request); #ifdef USE_REQUEST_LOG logRequestInfo(*request, IBARRIER_REQUEST); #endif @@ -328,7 +326,6 @@ USER_DEFINED_WRAPPER(int, Ireduce, if (retval == MPI_SUCCESS && MPI_LOGGING()) { MPI_Request virtRequest = ADD_NEW_REQUEST(*request); *request = virtRequest; - LOG_CALL(restoreRequests, Ireduce, sendbuf, recvbuf, count, datatype, op, root, comm, *request); #ifdef USE_REQUEST_LOG logRequestInfo(*request, IREDUCE_REQUEST); @@ -636,7 +633,6 @@ USER_DEFINED_WRAPPER(int, Comm_split, (MPI_Comm) comm, (int) color, (int) key, grant_ggid(virtComm); *newcomm = virtComm; active_comms.insert(virtComm); - LOG_CALL(restoreComms, Comm_split, comm, color, key, *newcomm); } DMTCP_PLUGIN_ENABLE_CKPT(); commit_finish(comm, passthrough); @@ -658,7 +654,6 @@ USER_DEFINED_WRAPPER(int, Comm_dup, (MPI_Comm) comm, (MPI_Comm *) newcomm) grant_ggid(virtComm); *newcomm = virtComm; active_comms.insert(virtComm); - LOG_CALL(restoreComms, Comm_dup, comm, *newcomm); } DMTCP_PLUGIN_ENABLE_CKPT(); commit_finish(comm, passthrough); diff --git a/mpi-proxy-split/mpi-wrappers/mpi_comm_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_comm_wrappers.cpp index 6be857a33..ee63f298a 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_comm_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_comm_wrappers.cpp @@ -131,7 +131,6 @@ USER_DEFINED_WRAPPER(int, Comm_create, (MPI_Comm) comm, (MPI_Group) group, grant_ggid(virtComm); *newcomm = virtComm; active_comms.insert(virtComm); - LOG_CALL(restoreComms, Comm_create, comm, group, virtComm); } DMTCP_PLUGIN_ENABLE_CKPT(); return retval; @@ -208,7 +207,6 @@ USER_DEFINED_WRAPPER(int, Comm_free, (MPI_Comm *) comm) REMOVE_OLD_COMM(*comm); CLEAR_COMM_LOGS(*comm); active_comms.erase(*comm); - LOG_CALL(restoreComms, Comm_free, *comm); } DMTCP_PLUGIN_ENABLE_CKPT(); return retval; @@ -316,7 +314,6 @@ USER_DEFINED_WRAPPER(int, Comm_set_errhandler, retval = NEXT_FUNC(Comm_set_errhandler)(realComm, errhandler); RETURN_TO_UPPER_HALF(); if (retval == MPI_SUCCESS && MPI_LOGGING()) { - LOG_CALL(restoreComms, Comm_set_errhandler, comm, errhandler); } DMTCP_PLUGIN_ENABLE_CKPT(); return retval; @@ -349,7 +346,6 @@ USER_DEFINED_WRAPPER(int, Comm_split_type, (MPI_Comm) comm, (int) split_type, grant_ggid(virtComm); *newcomm = virtComm; active_comms.insert(virtComm); - LOG_CALL(restoreComms, Comm_split_type, comm, split_type, key, inf, virtComm); } DMTCP_PLUGIN_ENABLE_CKPT(); @@ -385,7 +381,6 @@ USER_DEFINED_WRAPPER(int, Attr_delete, (MPI_Comm) comm, (int) keyval) retval = NEXT_FUNC(Attr_delete)(realComm, realCommKeyval); RETURN_TO_UPPER_HALF(); if (retval == MPI_SUCCESS && MPI_LOGGING()) { - LOG_CALL(restoreComms, Attr_delete, comm, keyval); } DMTCP_PLUGIN_ENABLE_CKPT(); return retval; @@ -404,7 +399,6 @@ USER_DEFINED_WRAPPER(int, Attr_put, (MPI_Comm) comm, retval = NEXT_FUNC(Attr_put)(realComm, realCommKeyval, attribute_val); RETURN_TO_UPPER_HALF(); if (retval == MPI_SUCCESS && MPI_LOGGING()) { - LOG_CALL(restoreComms, Attr_put, comm, keyval, attribute_val); } DMTCP_PLUGIN_ENABLE_CKPT(); return retval; @@ -480,7 +474,6 @@ USER_DEFINED_WRAPPER(int, Comm_create_group, (MPI_Comm) comm, grant_ggid(virtComm); *newcomm = virtComm; active_comms.insert(virtComm); - LOG_CALL(restoreComms, Comm_create_group, comm, group, tag, virtComm); } return retval; }; diff --git a/mpi-proxy-split/mpi-wrappers/mpi_group_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_group_wrappers.cpp index e9c7e1508..6b7a22889 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_group_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_group_wrappers.cpp @@ -44,7 +44,6 @@ USER_DEFINED_WRAPPER(int, Comm_group, (MPI_Comm) comm, (MPI_Group *) group) if (retval == MPI_SUCCESS && MPI_LOGGING()) { MPI_Group virtGroup = ADD_NEW_GROUP(*group); *group = virtGroup; - LOG_CALL(restoreGroups, Comm_group, comm, *group); } DMTCP_PLUGIN_ENABLE_CKPT(); return retval; @@ -104,7 +103,6 @@ USER_DEFINED_WRAPPER(int, Group_free, (MPI_Group *) group) // FIXME: See comment in Comm_free wrapper. REMOVE_OLD_GROUP(*group); CLEAR_GROUP_LOGS(*group); - LOG_CALL(restoreGroups, Group_free, *group); } DMTCP_PLUGIN_ENABLE_CKPT(); return retval; @@ -149,7 +147,6 @@ USER_DEFINED_WRAPPER(int, Group_incl, (MPI_Group) group, (int) n, MPI_Group virtGroup = ADD_NEW_GROUP(*newgroup); *newgroup = virtGroup; FncArg rs = CREATE_LOG_BUF(ranks, n * sizeof(int)); - LOG_CALL(restoreGroups, Group_incl, group, n, rs, *newgroup); } DMTCP_PLUGIN_ENABLE_CKPT(); return retval; diff --git a/mpi-proxy-split/mpi-wrappers/mpi_op_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_op_wrappers.cpp index 35b84eaac..8867676bd 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_op_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_op_wrappers.cpp @@ -51,7 +51,6 @@ USER_DEFINED_WRAPPER(int, Op_create, op_desc_t* desc = VIRTUAL_TO_DESC_OP(virtOp); update_op_desc_t(desc, user_fn, commute); *op = virtOp; - LOG_CALL(restoreOps, Op_create, user_fn, commute, virtOp); } DMTCP_PLUGIN_ENABLE_CKPT(); return retval; @@ -75,7 +74,6 @@ USER_DEFINED_WRAPPER(int, Op_free, (MPI_Op*) op) // // FIXME: See comment in Comm_free wrapper. REMOVE_OLD_OP(*op); - LOG_CALL(restoreOps, Op_free, *op); } DMTCP_PLUGIN_ENABLE_CKPT(); return retval; diff --git a/mpi-proxy-split/mpi-wrappers/mpi_type_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_type_wrappers.cpp index 5054f0c55..ea94c0fd5 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_type_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_type_wrappers.cpp @@ -60,7 +60,6 @@ USER_DEFINED_WRAPPER(int, Type_free, (MPI_Datatype *) type) // FIXME: Now, we remove this type. Otherwise, if we try to decode a type // that has been freed in the lower half, MPI will be upset. REMOVE_OLD_TYPE(*type); - LOG_CALL(restoreTypes, Type_free, *type); } DMTCP_PLUGIN_ENABLE_CKPT(); return retval; @@ -78,7 +77,6 @@ USER_DEFINED_WRAPPER(int, Type_commit, (MPI_Datatype *) type) realType = REMOVE_OLD_TYPE(*type); } else { if (MPI_LOGGING()) { - LOG_CALL(restoreTypes, Type_commit, *type); } } DMTCP_PLUGIN_ENABLE_CKPT(); @@ -97,7 +95,6 @@ USER_DEFINED_WRAPPER(int, Type_contiguous, (int) count, (MPI_Datatype) oldtype, if (retval == MPI_SUCCESS && MPI_LOGGING()) { MPI_Datatype virtType = ADD_NEW_TYPE(*newtype); *newtype = virtType; - LOG_CALL(restoreTypes, Type_contiguous, count, oldtype, virtType); } DMTCP_PLUGIN_ENABLE_CKPT(); return retval; @@ -117,7 +114,6 @@ USER_DEFINED_WRAPPER(int, Type_hvector, (int) count, (int) blocklength, if (retval == MPI_SUCCESS && MPI_LOGGING()) { MPI_Datatype virtType = ADD_NEW_TYPE(*newtype); *newtype = virtType; - LOG_CALL(restoreTypes, Type_hvector, count, blocklength, stride, oldtype, virtType); } DMTCP_PLUGIN_ENABLE_CKPT(); @@ -173,7 +169,6 @@ USER_DEFINED_WRAPPER(int, Type_create_struct, (int) count, FncArg bs = CREATE_LOG_BUF(array_of_blocklengths, count * sizeof(int)); FncArg ds = CREATE_LOG_BUF(array_of_displacements, count * sizeof(MPI_Aint)); FncArg ts = CREATE_LOG_BUF(array_of_types, count * sizeof(MPI_Datatype)); - LOG_CALL(restoreTypes, Type_create_struct, count, bs, ds, ts, virtType); } DMTCP_PLUGIN_ENABLE_CKPT(); return retval; @@ -226,7 +221,6 @@ USER_DEFINED_WRAPPER(int, Type_hindexed, (int) count, FncArg bs = CREATE_LOG_BUF(array_of_blocklengths, count * sizeof(int)); FncArg ds = CREATE_LOG_BUF(array_of_displacements, count * sizeof(MPI_Aint)); - LOG_CALL(restoreTypes, Type_hindexed, count, bs, ds, oldtype, virtType); } DMTCP_PLUGIN_ENABLE_CKPT(); return retval; @@ -303,7 +297,6 @@ USER_DEFINED_WRAPPER(int, Type_indexed, (int) count, *newtype = virtType; FncArg bs = CREATE_LOG_BUF(array_of_blocklengths, count * sizeof(int)); FncArg ds = CREATE_LOG_BUF(array_of_displacements, count * sizeof(int)); - LOG_CALL(restoreTypes, Type_indexed, count, bs, ds, oldtype, virtType); } DMTCP_PLUGIN_ENABLE_CKPT(); return retval; @@ -321,7 +314,6 @@ USER_DEFINED_WRAPPER(int, Type_dup, (MPI_Datatype) oldtype, if (retval == MPI_SUCCESS && MPI_LOGGING()) { MPI_Datatype virtType = ADD_NEW_TYPE(*newtype); *newtype = virtType; - LOG_CALL(restoreTypes, Type_dup, oldtype, virtType); } DMTCP_PLUGIN_ENABLE_CKPT(); return retval; @@ -339,7 +331,6 @@ USER_DEFINED_WRAPPER(int, Type_create_resized, (MPI_Datatype) oldtype, if (retval == MPI_SUCCESS && MPI_LOGGING()) { MPI_Datatype virtType = ADD_NEW_TYPE(*newtype); *newtype = virtType; - LOG_CALL(restoreTypes, Type_create_resized, oldtype, lb, extent, virtType); } DMTCP_PLUGIN_ENABLE_CKPT(); return retval; diff --git a/mpi-proxy-split/mpi-wrappers/mpi_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_wrappers.cpp index 2061f0c22..db92eb1fa 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_wrappers.cpp @@ -76,7 +76,6 @@ USER_DEFINED_WRAPPER(int, Init, (int *) argc, (char ***) argv) { init_comm_world(); g_world_comm = ADD_NEW_COMM(g_world_comm); - LOG_CALL(restoreComms, Comm_dup, MPI_COMM_WORLD, g_world_comm); initialize_drain_send_recv(); DMTCP_PLUGIN_ENABLE_CKPT(); return retval; @@ -102,7 +101,6 @@ USER_DEFINED_WRAPPER(int, Init_thread, (int *) argc, (char ***) argv, init_comm_world(); g_world_comm = ADD_NEW_COMM(g_world_comm); - LOG_CALL(restoreComms, Comm_dup, MPI_COMM_WORLD, g_world_comm); initialize_drain_send_recv(); DMTCP_PLUGIN_ENABLE_CKPT(); return retval; diff --git a/mpi-proxy-split/unit-test/record-replay-cart-test.cpp b/mpi-proxy-split/unit-test/record-replay-cart-test.cpp index 8502233a2..6844d1d35 100644 --- a/mpi-proxy-split/unit-test/record-replay-cart-test.cpp +++ b/mpi-proxy-split/unit-test/record-replay-cart-test.cpp @@ -71,7 +71,6 @@ TEST_F(CartTests, testCartCreate) // Log the call FncArg ds = CREATE_LOG_BUF(_dims, _ndims); FncArg ps = CREATE_LOG_BUF(_periods, _ndims); - EXPECT_TRUE(LOG_CALL(restoreCarts, Cart_create, _comm, _ndims, ds, ps, _reorder, virtComm) != NULL); // Replay the call EXPECT_EQ(RESTORE_MPI_STATE(), MPI_SUCCESS); @@ -97,7 +96,6 @@ TEST_F(CartTests, testCartMap) EXPECT_NE(newrank1, -1); FncArg ds = CREATE_LOG_BUF(_dims, _ndims * sizeof(int)); FncArg ps = CREATE_LOG_BUF(_periods, _ndims * sizeof(int)); - EXPECT_TRUE(LOG_CALL(restoreCarts, Cart_map, _comm, _ndims, ds, ps, newrank1) != NULL); EXPECT_EQ(RESTORE_MPI_STATE(), MPI_SUCCESS); // TODO: Not sure how to test that the mapping is still there diff --git a/mpi-proxy-split/unit-test/record-replay-comm-test.cpp b/mpi-proxy-split/unit-test/record-replay-comm-test.cpp index 768df942a..33c1db4cd 100644 --- a/mpi-proxy-split/unit-test/record-replay-comm-test.cpp +++ b/mpi-proxy-split/unit-test/record-replay-comm-test.cpp @@ -48,7 +48,6 @@ TEST_F(CommTests, testCommDup) MPI_Comm oldvirt = _virtComm; EXPECT_EQ(VIRTUAL_TO_REAL_COMM(_comm), MPI_COMM_WORLD); // Log the call - LOG_CALL(restoreComms, Comm_dup, _comm, _virtComm); // Replay the call EXPECT_EQ(RESTORE_MPI_STATE(), MPI_SUCCESS); // Verify state after replay @@ -69,7 +68,6 @@ TEST_F(CommTests, testCommSplit) _virtComm = ADD_NEW_COMM(real1); EXPECT_NE(_virtComm, -1); MPI_Comm oldvirt = _virtComm; - LOG_CALL(restoreComms, Comm_split, _comm, color, key, _virtComm); // Replay the call EXPECT_EQ(RESTORE_MPI_STATE(), MPI_SUCCESS); // Verify state after replay @@ -91,7 +89,6 @@ TEST_F(CommTests, testCommCreate) _virtComm = ADD_NEW_COMM(real1); EXPECT_NE(_virtComm, -1); MPI_Comm oldvirt = _virtComm; - LOG_CALL(restoreComms, Comm_create, _comm, group, _virtComm); // Replay the call EXPECT_EQ(RESTORE_MPI_STATE(), MPI_SUCCESS); // Verify state after replay diff --git a/mpi-proxy-split/unit-test/record-replay-types-test.cpp b/mpi-proxy-split/unit-test/record-replay-types-test.cpp index 6e491f5cd..5c92037c2 100644 --- a/mpi-proxy-split/unit-test/record-replay-types-test.cpp +++ b/mpi-proxy-split/unit-test/record-replay-types-test.cpp @@ -53,7 +53,6 @@ TEST_F(TypesTests, testTypeContiguous) MPI_Datatype virtType = ADD_NEW_TYPE(real1); EXPECT_EQ(VIRTUAL_TO_REAL_TYPE(virtType), real1); - EXPECT_TRUE(LOG_CALL(restoreTypes, Type_contiguous, _count, type, virtType) != NULL); EXPECT_EQ(RESTORE_MPI_STATE(), MPI_SUCCESS); @@ -73,12 +72,10 @@ TEST_F(TypesTests, testTypeCommit) EXPECT_NE(real1, MPI_DATATYPE_NULL); MPI_Datatype virtType = ADD_NEW_TYPE(real1); EXPECT_EQ(VIRTUAL_TO_REAL_TYPE(virtType), real1); - EXPECT_TRUE(LOG_CALL(restoreTypes, Type_contiguous, _count, type, virtType) != NULL); // Commit the new datatype EXPECT_EQ(MPI_Type_commit(&real1), MPI_SUCCESS); - EXPECT_TRUE(LOG_CALL(restoreTypes, Type_commit, real1) != NULL); EXPECT_EQ(VIRTUAL_TO_REAL_TYPE(virtType), real1); int size = -1; EXPECT_EQ(MPI_Type_size(real1, &size), MPI_SUCCESS); From 8836656ffbbe3ff5ae3d62bfb48aca2c30d4976d Mon Sep 17 00:00:00 2001 From: Leonid Belyaev Date: Mon, 18 Sep 2023 20:28:16 -0400 Subject: [PATCH 11/49] REMOVED record-replay functions --- mpi-proxy-split/Makefile | 2 +- .../mpi-wrappers/mpi_cart_wrappers.cpp | 32 +- .../mpi-wrappers/mpi_collective_wrappers.cpp | 14 +- .../mpi-wrappers/mpi_comm_wrappers.cpp | 18 +- .../mpi-wrappers/mpi_error_wrappers.cpp | 1 - .../mpi-wrappers/mpi_file_wrappers.cpp | 2 - .../mpi-wrappers/mpi_group_wrappers.cpp | 12 +- .../mpi-wrappers/mpi_op_wrappers.cpp | 8 +- .../mpi-wrappers/mpi_p2p_wrappers.cpp | 1 - .../mpi-wrappers/mpi_request_wrappers.cpp | 9 +- .../mpi-wrappers/mpi_type_wrappers.cpp | 38 +- mpi-proxy-split/mpi-wrappers/mpi_wrappers.cpp | 3 - mpi-proxy-split/mpi_plugin.cpp | 3 - mpi-proxy-split/record-replay.cpp | 1134 ----------------- mpi-proxy-split/record-replay.h | 949 -------------- mpi-proxy-split/seq_num.cpp | 5 +- mpi-proxy-split/unit-test/Makefile | 6 - .../unit-test/record-replay-cart-test.cpp | 5 - .../unit-test/record-replay-comm-test.cpp | 1 - .../unit-test/record-replay-group-test.cpp | 1 - .../unit-test/record-replay-types-test.cpp | 1 - 21 files changed, 55 insertions(+), 2190 deletions(-) delete mode 100644 mpi-proxy-split/record-replay.cpp delete mode 100644 mpi-proxy-split/record-replay.h diff --git a/mpi-proxy-split/Makefile b/mpi-proxy-split/Makefile index 50d442b4a..31bf85a48 100644 --- a/mpi-proxy-split/Makefile +++ b/mpi-proxy-split/Makefile @@ -30,7 +30,7 @@ WRAPPERS_SRCDIR=mpi-wrappers # As you add new files to your plugin library, add the object file names here. LIBOBJS = mpi_plugin.o p2p_drain_send_recv.o p2p_log_replay.o \ - record-replay.o seq_num.o virtual-ids.o \ + seq_num.o virtual-ids.o \ split_process.o ${LOWER_HALF_SRCDIR}/procmapsutils.o #MANA_COORD_OBJS = mana_coordinator.o diff --git a/mpi-proxy-split/mpi-wrappers/mpi_cart_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_cart_wrappers.cpp index 04fe5c749..a76396c10 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_cart_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_cart_wrappers.cpp @@ -28,7 +28,6 @@ #include "protectedfds.h" #include "mpi_nextfunc.h" -#include "record-replay.h" #include "virtual-ids.h" #ifdef SINGLE_CART_REORDER #include "two-phase-algo.h" @@ -37,8 +36,6 @@ #endif #include "p2p_drain_send_recv.h" -using namespace dmtcp_mpi; - USER_DEFINED_WRAPPER(int, Cart_coords, (MPI_Comm) comm, (int) rank, (int) maxdims, (int*) coords) { @@ -75,9 +72,9 @@ USER_DEFINED_WRAPPER(int, Cart_map, (MPI_Comm) comm, (int) ndims, // FIXME: Need to virtualize this newrank?? retval = NEXT_FUNC(Cart_map)(realComm, ndims, dims, periods, newrank); RETURN_TO_UPPER_HALF(); - if (retval == MPI_SUCCESS && MPI_LOGGING()) { - FncArg ds = CREATE_LOG_BUF(dims, ndims * sizeof(int)); - FncArg ps = CREATE_LOG_BUF(periods, ndims * sizeof(int)); + if (retval == MPI_SUCCESS && mana_state != RESTART_REPLAY) { + // FncArg ds = CREATE_LOG_BUF(dims, ndims * sizeof(int)); + // FncArg ps = CREATE_LOG_BUF(periods, ndims * sizeof(int)); } DMTCP_PLUGIN_ENABLE_CKPT(); return retval; @@ -106,8 +103,7 @@ USER_DEFINED_WRAPPER(int, Cart_shift, (MPI_Comm) comm, (int) direction, retval = NEXT_FUNC(Cart_shift)(realComm, direction, disp, rank_source, rank_dest); RETURN_TO_UPPER_HALF(); - if (retval == MPI_SUCCESS && MPI_LOGGING()) { - disp, *rank_source, *rank_dest); + if (retval == MPI_SUCCESS && mana_state != RESTART_REPLAY) { } DMTCP_PLUGIN_ENABLE_CKPT(); return retval; @@ -123,14 +119,14 @@ USER_DEFINED_WRAPPER(int, Cart_sub, (MPI_Comm) comm, JUMP_TO_LOWER_HALF(lh_info.fsaddr); retval = NEXT_FUNC(Cart_sub)(realComm, remain_dims, new_comm); RETURN_TO_UPPER_HALF(); - if (retval == MPI_SUCCESS && MPI_LOGGING()) { + if (retval == MPI_SUCCESS && mana_state != RESTART_REPLAY) { int ndims = 0; MPI_Cartdim_get(comm, &ndims); MPI_Comm virtComm = ADD_NEW_COMM(*new_comm); grant_ggid(virtComm); *new_comm = virtComm; active_comms.insert(virtComm); - FncArg rs = CREATE_LOG_BUF(remain_dims, ndims * sizeof(int)); + // FncArg rs = CREATE_LOG_BUF(remain_dims, ndims * sizeof(int)); } DMTCP_PLUGIN_ENABLE_CKPT(); return retval; @@ -197,15 +193,15 @@ USER_DEFINED_WRAPPER(int, Cart_create, (MPI_Comm)old_comm, (int)ndims, g_cartesian_properties.ndims, g_cartesian_properties.coordinates); - if (retval == MPI_SUCCESS && MPI_LOGGING()) { + if (retval == MPI_SUCCESS && mana_state != RESTART_REPLAY) { MPI_Comm virtComm = ADD_NEW_COMM(*comm_cart); grant_ggid(virtComm); *comm_cart = virtComm; active_comms.insert(virtComm); - FncArg ds = CREATE_LOG_BUF(dims, ndims * sizeof(int)); - FncArg ps = CREATE_LOG_BUF(periods, ndims * sizeof(int)); - virtComm); + // FncArg ds = CREATE_LOG_BUF(dims, ndims * sizeof(int)); + // FncArg ps = CREATE_LOG_BUF(periods, ndims * sizeof(int)); + // virtComm); } DMTCP_PLUGIN_ENABLE_CKPT(); return retval; @@ -245,14 +241,14 @@ USER_DEFINED_WRAPPER(int, Cart_create, (MPI_Comm) old_comm, (int) ndims, retval = NEXT_FUNC(Cart_create)(realComm, ndims, dims, periods, reorder, comm_cart); RETURN_TO_UPPER_HALF(); - if (retval == MPI_SUCCESS && MPI_LOGGING()) { + if (retval == MPI_SUCCESS && mana_state != RESTART_REPLAY) { MPI_Comm virtComm = ADD_NEW_COMM(*comm_cart); grant_ggid(virtComm); *comm_cart = virtComm; active_comms.insert(virtComm); - FncArg ds = CREATE_LOG_BUF(dims, ndims * sizeof(int)); - FncArg ps = CREATE_LOG_BUF(periods, ndims * sizeof(int)); - ds, ps, reorder, virtComm); + // FncArg ds = CREATE_LOG_BUF(dims, ndims * sizeof(int)); + // FncArg ps = CREATE_LOG_BUF(periods, ndims * sizeof(int)); + // ds, ps, reorder, virtComm); } DMTCP_PLUGIN_ENABLE_CKPT(); return retval; diff --git a/mpi-proxy-split/mpi-wrappers/mpi_collective_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_collective_wrappers.cpp index f6fea5d94..c0fc97fc8 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_collective_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_collective_wrappers.cpp @@ -27,7 +27,6 @@ #include "protectedfds.h" #include "mpi_plugin.h" -#include "record-replay.h" #include "mpi_nextfunc.h" #include "seq_num.h" #include "virtual-ids.h" @@ -50,7 +49,6 @@ isUsingCollectiveToP2p() { #endif } -using namespace dmtcp_mpi; #ifndef MPI_COLLECTIVE_P2P #ifdef NO_BARRIER_BCAST @@ -121,10 +119,9 @@ USER_DEFINED_WRAPPER(int, Ibcast, retval = NEXT_FUNC(Ibcast)(buffer, count, realType, root, realComm, request); RETURN_TO_UPPER_HALF(); - if (retval == MPI_SUCCESS && MPI_LOGGING()) { + if (retval == MPI_SUCCESS && mana_state != RESTART_REPLAY) { MPI_Request virtRequest = ADD_NEW_REQUEST(*request); *request = virtRequest; - root, comm, *request); #ifdef USE_REQUEST_LOG logRequestInfo(*request, IBCAST_REQUEST); #endif @@ -157,7 +154,7 @@ USER_DEFINED_WRAPPER(int, Ibarrier, (MPI_Comm) comm, (MPI_Request *) request) JUMP_TO_LOWER_HALF(lh_info.fsaddr); retval = NEXT_FUNC(Ibarrier)(realComm, request); RETURN_TO_UPPER_HALF(); - if (retval == MPI_SUCCESS && MPI_LOGGING()) { + if (retval == MPI_SUCCESS && mana_state != RESTART_REPLAY) { MPI_Request virtRequest = ADD_NEW_REQUEST(*request); *request = virtRequest; #ifdef USE_REQUEST_LOG @@ -323,10 +320,9 @@ USER_DEFINED_WRAPPER(int, Ireduce, retval = NEXT_FUNC(Ireduce)(sendbuf, recvbuf, count, realType, realOp, root, realComm, request); RETURN_TO_UPPER_HALF(); - if (retval == MPI_SUCCESS && MPI_LOGGING()) { + if (retval == MPI_SUCCESS && mana_state != RESTART_REPLAY) { MPI_Request virtRequest = ADD_NEW_REQUEST(*request); *request = virtRequest; - count, datatype, op, root, comm, *request); #ifdef USE_REQUEST_LOG logRequestInfo(*request, IREDUCE_REQUEST); #endif @@ -628,7 +624,7 @@ USER_DEFINED_WRAPPER(int, Comm_split, (MPI_Comm) comm, (int) color, (int) key, JUMP_TO_LOWER_HALF(lh_info.fsaddr); retval = NEXT_FUNC(Comm_split)(realComm, color, key, newcomm); RETURN_TO_UPPER_HALF(); - if (retval == MPI_SUCCESS && MPI_LOGGING()) { + if (retval == MPI_SUCCESS && mana_state != RESTART_REPLAY) { MPI_Comm virtComm = ADD_NEW_COMM(*newcomm); grant_ggid(virtComm); *newcomm = virtComm; @@ -649,7 +645,7 @@ USER_DEFINED_WRAPPER(int, Comm_dup, (MPI_Comm) comm, (MPI_Comm *) newcomm) JUMP_TO_LOWER_HALF(lh_info.fsaddr); retval = NEXT_FUNC(Comm_dup)(realComm, newcomm); RETURN_TO_UPPER_HALF(); - if (retval == MPI_SUCCESS && MPI_LOGGING()) { + if (retval == MPI_SUCCESS && mana_state != RESTART_REPLAY) { MPI_Comm virtComm = ADD_NEW_COMM(*newcomm); grant_ggid(virtComm); *newcomm = virtComm; diff --git a/mpi-proxy-split/mpi-wrappers/mpi_comm_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_comm_wrappers.cpp index ee63f298a..8671372b2 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_comm_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_comm_wrappers.cpp @@ -34,12 +34,10 @@ #include "protectedfds.h" #include "mpi_nextfunc.h" -#include "record-replay.h" #include "virtual-ids.h" #include "seq_num.h" #include "p2p_drain_send_recv.h" -using namespace dmtcp_mpi; // TODO // - validate operation status (right now we assume them to be successful by @@ -126,7 +124,7 @@ USER_DEFINED_WRAPPER(int, Comm_create, (MPI_Comm) comm, (MPI_Group) group, JUMP_TO_LOWER_HALF(lh_info.fsaddr); retval = NEXT_FUNC(Comm_create)(realComm, realGroup, newcomm); RETURN_TO_UPPER_HALF(); - if (retval == MPI_SUCCESS && MPI_LOGGING()) { + if (retval == MPI_SUCCESS && mana_state != RESTART_REPLAY) { MPI_Comm virtComm = ADD_NEW_COMM(*newcomm); grant_ggid(virtComm); *newcomm = virtComm; @@ -197,7 +195,7 @@ USER_DEFINED_WRAPPER(int, Comm_free, (MPI_Comm *) comm) } DMTCP_PLUGIN_DISABLE_CKPT(); int retval = MPI_Comm_free_internal(comm); - if (retval == MPI_SUCCESS && MPI_LOGGING()) { + if (retval == MPI_SUCCESS && mana_state != RESTART_REPLAY) { // NOTE: We cannot remove the old comm from the map, since // we'll need to replay this call to reconstruct any other comms that // might have been created using this comm. @@ -205,7 +203,6 @@ USER_DEFINED_WRAPPER(int, Comm_free, (MPI_Comm *) comm) // // FIXME: Now, we remove it. O(1) decode-recode changes this. REMOVE_OLD_COMM(*comm); - CLEAR_COMM_LOGS(*comm); active_comms.erase(*comm); } DMTCP_PLUGIN_ENABLE_CKPT(); @@ -313,7 +310,7 @@ USER_DEFINED_WRAPPER(int, Comm_set_errhandler, JUMP_TO_LOWER_HALF(lh_info.fsaddr); retval = NEXT_FUNC(Comm_set_errhandler)(realComm, errhandler); RETURN_TO_UPPER_HALF(); - if (retval == MPI_SUCCESS && MPI_LOGGING()) { + if (retval == MPI_SUCCESS && mana_state != RESTART_REPLAY) { } DMTCP_PLUGIN_ENABLE_CKPT(); return retval; @@ -341,12 +338,11 @@ USER_DEFINED_WRAPPER(int, Comm_split_type, (MPI_Comm) comm, (int) split_type, JUMP_TO_LOWER_HALF(lh_info.fsaddr); retval = NEXT_FUNC(Comm_split_type)(realComm, split_type, key, inf, newcomm); RETURN_TO_UPPER_HALF(); - if (retval == MPI_SUCCESS && MPI_LOGGING()) { + if (retval == MPI_SUCCESS && mana_state != RESTART_REPLAY) { MPI_Comm virtComm = ADD_NEW_COMM(*newcomm); grant_ggid(virtComm); *newcomm = virtComm; active_comms.insert(virtComm); - split_type, key, inf, virtComm); } DMTCP_PLUGIN_ENABLE_CKPT(); return retval; @@ -380,7 +376,7 @@ USER_DEFINED_WRAPPER(int, Attr_delete, (MPI_Comm) comm, (int) keyval) JUMP_TO_LOWER_HALF(lh_info.fsaddr); retval = NEXT_FUNC(Attr_delete)(realComm, realCommKeyval); RETURN_TO_UPPER_HALF(); - if (retval == MPI_SUCCESS && MPI_LOGGING()) { + if (retval == MPI_SUCCESS && mana_state != RESTART_REPLAY) { } DMTCP_PLUGIN_ENABLE_CKPT(); return retval; @@ -398,7 +394,7 @@ USER_DEFINED_WRAPPER(int, Attr_put, (MPI_Comm) comm, JUMP_TO_LOWER_HALF(lh_info.fsaddr); retval = NEXT_FUNC(Attr_put)(realComm, realCommKeyval, attribute_val); RETURN_TO_UPPER_HALF(); - if (retval == MPI_SUCCESS && MPI_LOGGING()) { + if (retval == MPI_SUCCESS && mana_state != RESTART_REPLAY) { } DMTCP_PLUGIN_ENABLE_CKPT(); return retval; @@ -469,7 +465,7 @@ USER_DEFINED_WRAPPER(int, Comm_create_group, (MPI_Comm) comm, { std::function realBarrierCb = [=]() { int retval = MPI_Comm_create_group_internal(comm, group, tag, newcomm); - if (retval == MPI_SUCCESS && MPI_LOGGING()) { + if (retval == MPI_SUCCESS && mana_state != RESTART_REPLAY) { MPI_Comm virtComm = ADD_NEW_COMM(*newcomm); grant_ggid(virtComm); *newcomm = virtComm; diff --git a/mpi-proxy-split/mpi-wrappers/mpi_error_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_error_wrappers.cpp index 6d23cd29b..1b2405e70 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_error_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_error_wrappers.cpp @@ -28,7 +28,6 @@ #include "protectedfds.h" #include "mpi_nextfunc.h" -#include "record-replay.h" #include "virtual-ids.h" DEFINE_FNC(int, Error_class, (int) errorcode, (int *) errorclass); diff --git a/mpi-proxy-split/mpi-wrappers/mpi_file_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_file_wrappers.cpp index 7089c2c71..35175db30 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_file_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_file_wrappers.cpp @@ -29,11 +29,9 @@ #include "protectedfds.h" #include "mpi_nextfunc.h" -#include "record-replay.h" #include "virtual-ids.h" #include "seq_num.h" -using namespace dmtcp_mpi; std::unordered_map g_params_map; diff --git a/mpi-proxy-split/mpi-wrappers/mpi_group_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_group_wrappers.cpp index 6b7a22889..3cf868e73 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_group_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_group_wrappers.cpp @@ -28,11 +28,8 @@ #include "protectedfds.h" #include "mpi_nextfunc.h" -#include "record-replay.h" #include "virtual-ids.h" -using namespace dmtcp_mpi; - USER_DEFINED_WRAPPER(int, Comm_group, (MPI_Comm) comm, (MPI_Group *) group) { int retval; @@ -41,7 +38,7 @@ USER_DEFINED_WRAPPER(int, Comm_group, (MPI_Comm) comm, (MPI_Group *) group) JUMP_TO_LOWER_HALF(lh_info.fsaddr); retval = NEXT_FUNC(Comm_group)(realComm, group); RETURN_TO_UPPER_HALF(); - if (retval == MPI_SUCCESS && MPI_LOGGING()) { + if (retval == MPI_SUCCESS && mana_state != RESTART_REPLAY) { MPI_Group virtGroup = ADD_NEW_GROUP(*group); *group = virtGroup; } @@ -95,14 +92,13 @@ USER_DEFINED_WRAPPER(int, Group_free, (MPI_Group *) group) { DMTCP_PLUGIN_DISABLE_CKPT(); int retval = MPI_Group_free_internal(group); - if (retval == MPI_SUCCESS && MPI_LOGGING()) { + if (retval == MPI_SUCCESS && mana_state != RESTART_REPLAY) { // NOTE: We cannot remove the old group, since we'll need // to replay this call to reconstruct any comms that might // have been created using this group. // // FIXME: See comment in Comm_free wrapper. REMOVE_OLD_GROUP(*group); - CLEAR_GROUP_LOGS(*group); } DMTCP_PLUGIN_ENABLE_CKPT(); return retval; @@ -143,10 +139,10 @@ USER_DEFINED_WRAPPER(int, Group_incl, (MPI_Group) group, (int) n, JUMP_TO_LOWER_HALF(lh_info.fsaddr); retval = NEXT_FUNC(Group_incl)(realGroup, n, ranks, newgroup); RETURN_TO_UPPER_HALF(); - if (retval == MPI_SUCCESS && MPI_LOGGING()) { + if (retval == MPI_SUCCESS && mana_state != RESTART_REPLAY) { MPI_Group virtGroup = ADD_NEW_GROUP(*newgroup); *newgroup = virtGroup; - FncArg rs = CREATE_LOG_BUF(ranks, n * sizeof(int)); + // FncArg rs = CREATE_LOG_BUF(ranks, n * sizeof(int)); } DMTCP_PLUGIN_ENABLE_CKPT(); return retval; diff --git a/mpi-proxy-split/mpi-wrappers/mpi_op_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_op_wrappers.cpp index 8867676bd..54581d7b6 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_op_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_op_wrappers.cpp @@ -27,12 +27,8 @@ #include "jfilesystem.h" #include "protectedfds.h" #include "mpi_nextfunc.h" -#include "record-replay.h" #include "virtual-ids.h" -using namespace dmtcp_mpi; - - USER_DEFINED_WRAPPER(int, Op_create, (MPI_User_function *) user_fn, (int) commute, (MPI_Op *) op) @@ -42,7 +38,7 @@ USER_DEFINED_WRAPPER(int, Op_create, JUMP_TO_LOWER_HALF(lh_info.fsaddr); retval = NEXT_FUNC(Op_create)(user_fn, commute, op); RETURN_TO_UPPER_HALF(); - if (retval == MPI_SUCCESS && MPI_LOGGING()) { + if (retval == MPI_SUCCESS && mana_state != RESTART_REPLAY) { MPI_Op virtOp = ADD_NEW_OP(*op); // FIXME HACK: Since MPI does not provide any functions To deserialize an // operator, we get the data at creation time. @@ -67,7 +63,7 @@ USER_DEFINED_WRAPPER(int, Op_free, (MPI_Op*) op) JUMP_TO_LOWER_HALF(lh_info.fsaddr); retval = NEXT_FUNC(Op_free)(&realOp); RETURN_TO_UPPER_HALF(); - if (retval == MPI_SUCCESS && MPI_LOGGING()) { + if (retval == MPI_SUCCESS && mana_state != RESTART_REPLAY) { // NOTE: We cannot remove the old op, since we'll need // to replay this call to reconstruct any new op that might // have been created using this op. diff --git a/mpi-proxy-split/mpi-wrappers/mpi_p2p_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_p2p_wrappers.cpp index f1a0c5ef8..770c9b4ff 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_p2p_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_p2p_wrappers.cpp @@ -31,7 +31,6 @@ #include "protectedfds.h" #include "mpi_nextfunc.h" #include "virtual-ids.h" -#include "record-replay.h" // To support MANA_P2P_LOG and MANA_P2P_REPLAY: #include "p2p-deterministic.h" diff --git a/mpi-proxy-split/mpi-wrappers/mpi_request_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_request_wrappers.cpp index bb683af51..3c3d3ed25 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_request_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_request_wrappers.cpp @@ -28,7 +28,6 @@ #include "jfilesystem.h" #include "protectedfds.h" -#include "record-replay.h" #include "p2p_log_replay.h" #include "p2p_drain_send_recv.h" #include "mpi_plugin.h" @@ -109,10 +108,9 @@ USER_DEFINED_WRAPPER(int, Test, (MPI_Request*) request, #endif } LOG_POST_Test(request, statusPtr); - if (retval == MPI_SUCCESS && *flag && MPI_LOGGING()) { + if (retval == MPI_SUCCESS && *flag && mana_state != RESTART_REPLAY) { clearPendingRequestFromLog(*request); REMOVE_OLD_REQUEST(*request); - LOG_REMOVE_REQUEST(*request); // remove from record-replay log *request = MPI_REQUEST_NULL; } DMTCP_PLUGIN_ENABLE_CKPT(); @@ -296,7 +294,7 @@ USER_DEFINED_WRAPPER(int, Waitany, (int) count, } } - if (MPI_LOGGING()) { + if (mana_state != RESTART_REPLAY) { clearPendingRequestFromLog(local_array_of_requests[i]); REMOVE_OLD_REQUEST(local_array_of_requests[i]); local_array_of_requests[i] = MPI_REQUEST_NULL; @@ -363,10 +361,9 @@ USER_DEFINED_WRAPPER(int, Wait, (MPI_Request*) request, (MPI_Status*) status) if (p2p_deterministic_skip_save_request == 0) { if (flag) LOG_POST_Wait(request, statusPtr); } - if (flag && MPI_LOGGING()) { + if (flag && mana_state != RESTART_REPLAY) { clearPendingRequestFromLog(*request); // Remove from g_nonblocking_calls REMOVE_OLD_REQUEST(*request); // Remove from virtual id - LOG_REMOVE_REQUEST(*request); // Remove from record-replay log *request = MPI_REQUEST_NULL; } DMTCP_PLUGIN_ENABLE_CKPT(); diff --git a/mpi-proxy-split/mpi-wrappers/mpi_type_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_type_wrappers.cpp index ea94c0fd5..782bfe24e 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_type_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_type_wrappers.cpp @@ -27,11 +27,8 @@ #include "jfilesystem.h" #include "protectedfds.h" #include "mpi_nextfunc.h" -#include "record-replay.h" #include "virtual-ids.h" -using namespace dmtcp_mpi; - USER_DEFINED_WRAPPER(int, Type_size, (MPI_Datatype) datatype, (int *) size) { int retval; @@ -52,7 +49,7 @@ USER_DEFINED_WRAPPER(int, Type_free, (MPI_Datatype *) type) JUMP_TO_LOWER_HALF(lh_info.fsaddr); retval = NEXT_FUNC(Type_free)(&realType); RETURN_TO_UPPER_HALF(); - if (retval == MPI_SUCCESS && MPI_LOGGING()) { + if (retval == MPI_SUCCESS && mana_state != RESTART_REPLAY) { // NOTE: We cannot remove the old type, since we'll need // to replay this call to reconstruct any new type that might // have been created using this type. @@ -76,7 +73,7 @@ USER_DEFINED_WRAPPER(int, Type_commit, (MPI_Datatype *) type) if (retval != MPI_SUCCESS) { realType = REMOVE_OLD_TYPE(*type); } else { - if (MPI_LOGGING()) { + if (mana_state != RESTART_REPLAY) { } } DMTCP_PLUGIN_ENABLE_CKPT(); @@ -92,7 +89,7 @@ USER_DEFINED_WRAPPER(int, Type_contiguous, (int) count, (MPI_Datatype) oldtype, JUMP_TO_LOWER_HALF(lh_info.fsaddr); retval = NEXT_FUNC(Type_contiguous)(count, realType, newtype); RETURN_TO_UPPER_HALF(); - if (retval == MPI_SUCCESS && MPI_LOGGING()) { + if (retval == MPI_SUCCESS && mana_state != RESTART_REPLAY) { MPI_Datatype virtType = ADD_NEW_TYPE(*newtype); *newtype = virtType; } @@ -111,10 +108,9 @@ USER_DEFINED_WRAPPER(int, Type_hvector, (int) count, (int) blocklength, retval = NEXT_FUNC(Type_hvector)(count, blocklength, stride, realType, newtype); RETURN_TO_UPPER_HALF(); - if (retval == MPI_SUCCESS && MPI_LOGGING()) { + if (retval == MPI_SUCCESS && mana_state != RESTART_REPLAY) { MPI_Datatype virtType = ADD_NEW_TYPE(*newtype); *newtype = virtType; - stride, oldtype, virtType); } DMTCP_PLUGIN_ENABLE_CKPT(); return retval; @@ -163,12 +159,12 @@ USER_DEFINED_WRAPPER(int, Type_create_struct, (int) count, array_of_displacements, realTypes, newtype); RETURN_TO_UPPER_HALF(); - if (retval == MPI_SUCCESS && MPI_LOGGING()) { + if (retval == MPI_SUCCESS && mana_state != RESTART_REPLAY) { MPI_Datatype virtType = ADD_NEW_TYPE(*newtype); *newtype = virtType; - FncArg bs = CREATE_LOG_BUF(array_of_blocklengths, count * sizeof(int)); - FncArg ds = CREATE_LOG_BUF(array_of_displacements, count * sizeof(MPI_Aint)); - FncArg ts = CREATE_LOG_BUF(array_of_types, count * sizeof(MPI_Datatype)); + // FncArg bs = CREATE_LOG_BUF(array_of_blocklengths, count * sizeof(int)); + // FncArg ds = CREATE_LOG_BUF(array_of_displacements, count * sizeof(MPI_Aint)); + // FncArg ts = CREATE_LOG_BUF(array_of_types, count * sizeof(MPI_Datatype)); } DMTCP_PLUGIN_ENABLE_CKPT(); return retval; @@ -215,12 +211,12 @@ USER_DEFINED_WRAPPER(int, Type_hindexed, (int) count, array_of_displacements, realType, newtype); RETURN_TO_UPPER_HALF(); - if (retval == MPI_SUCCESS && MPI_LOGGING()) { + if (retval == MPI_SUCCESS && mana_state != RESTART_REPLAY) { MPI_Datatype virtType = ADD_NEW_TYPE(*newtype); *newtype = virtType; - FncArg bs = CREATE_LOG_BUF(array_of_blocklengths, count * sizeof(int)); - FncArg ds = CREATE_LOG_BUF(array_of_displacements, - count * sizeof(MPI_Aint)); + // FncArg bs = CREATE_LOG_BUF(array_of_blocklengths, count * sizeof(int)); + // FncArg ds = CREATE_LOG_BUF(array_of_displacements, + // count * sizeof(MPI_Aint)); } DMTCP_PLUGIN_ENABLE_CKPT(); return retval; @@ -292,11 +288,11 @@ USER_DEFINED_WRAPPER(int, Type_indexed, (int) count, array_of_displacements, realType, newtype); RETURN_TO_UPPER_HALF(); - if (retval == MPI_SUCCESS && MPI_LOGGING()) { + if (retval == MPI_SUCCESS && mana_state != RESTART_REPLAY) { MPI_Datatype virtType = ADD_NEW_TYPE(*newtype); *newtype = virtType; - FncArg bs = CREATE_LOG_BUF(array_of_blocklengths, count * sizeof(int)); - FncArg ds = CREATE_LOG_BUF(array_of_displacements, count * sizeof(int)); + // FncArg bs = CREATE_LOG_BUF(array_of_blocklengths, count * sizeof(int)); + // FncArg ds = CREATE_LOG_BUF(array_of_displacements, count * sizeof(int)); } DMTCP_PLUGIN_ENABLE_CKPT(); return retval; @@ -311,7 +307,7 @@ USER_DEFINED_WRAPPER(int, Type_dup, (MPI_Datatype) oldtype, JUMP_TO_LOWER_HALF(lh_info.fsaddr); retval = NEXT_FUNC(Type_dup)(realType, newtype); RETURN_TO_UPPER_HALF(); - if (retval == MPI_SUCCESS && MPI_LOGGING()) { + if (retval == MPI_SUCCESS && mana_state != RESTART_REPLAY) { MPI_Datatype virtType = ADD_NEW_TYPE(*newtype); *newtype = virtType; } @@ -328,7 +324,7 @@ USER_DEFINED_WRAPPER(int, Type_create_resized, (MPI_Datatype) oldtype, JUMP_TO_LOWER_HALF(lh_info.fsaddr); retval = NEXT_FUNC(Type_create_resized)(realType, lb, extent, newtype); RETURN_TO_UPPER_HALF(); - if (retval == MPI_SUCCESS && MPI_LOGGING()) { + if (retval == MPI_SUCCESS && mana_state != RESTART_REPLAY) { MPI_Datatype virtType = ADD_NEW_TYPE(*newtype); *newtype = virtType; } diff --git a/mpi-proxy-split/mpi-wrappers/mpi_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_wrappers.cpp index db92eb1fa..c3634839a 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_wrappers.cpp @@ -28,14 +28,12 @@ #include "jassert.h" #include "jfilesystem.h" #include "protectedfds.h" -#include "record-replay.h" #include "mpi_nextfunc.h" #include "virtual-ids.h" #include "p2p_drain_send_recv.h" #include "mana_header.h" #include "seq_num.h" -using namespace dmtcp_mpi; #if 0 DEFINE_FNC(int, Init, (int *) argc, (char ***) argv) @@ -177,7 +175,6 @@ USER_DEFINED_WRAPPER(int, Get_address, (const void *) location, // FOR DEBUGGING ONLY: // This defines a call to MPI_MANA_Internal in the lower half, which // is especially useful in debugging restart. It is called -// from mpi-proxy-split/mpi_plugin.cpp, just before doing record-replay. // In mpi-proxy-split/lower-half, redefine MPI_MANA_Internal() // to do whatever is desired. Then do: // rm bin/lh_proxy diff --git a/mpi-proxy-split/mpi_plugin.cpp b/mpi-proxy-split/mpi_plugin.cpp index 560a70b5e..f7863831d 100644 --- a/mpi-proxy-split/mpi_plugin.cpp +++ b/mpi-proxy-split/mpi_plugin.cpp @@ -46,7 +46,6 @@ #include "split_process.h" #include "p2p_log_replay.h" #include "p2p_drain_send_recv.h" -#include "record-replay.h" #include "seq_num.h" #include "mpi_nextfunc.h" #include "virtual-ids.h" @@ -1173,11 +1172,9 @@ mpi_plugin_event_hook(DmtcpEvent_t event, DmtcpEventData_t *data) mana_state = RESTART_REPLAY; #ifdef SINGLE_CART_REORDER dmtcp_global_barrier("MPI:setCartesianCommunicator"); - // record-replay.cpp setCartesianCommunicator(lh_info.getCartesianCommunicatorFptr); #endif dmtcp_global_barrier("MPI:reconstruct_with_descriptors"); - // restoreMpiLogState(); // record-replay.cpp reconstruct_with_descriptors(); // FIXME: I place reconstruct_with_descriptors in the analogous place to // restoreMpiLogState. diff --git a/mpi-proxy-split/record-replay.cpp b/mpi-proxy-split/record-replay.cpp deleted file mode 100644 index 5ef79e303..000000000 --- a/mpi-proxy-split/record-replay.cpp +++ /dev/null @@ -1,1134 +0,0 @@ -/**************************************************************************** - * Copyright (C) 2019-2021 by Gene Cooperman, Rohan Garg, Yao Xu * - * gene@ccs.neu.edu, rohgarg@ccs.neu.edu, xu.yao1@northeastern.edu * - * * - * This file is part of DMTCP. * - * * - * DMTCP is free software: you can redistribute it and/or * - * modify it under the terms of the GNU Lesser General Public License as * - * published by the Free Software Foundation, either version 3 of the * - * License, or (at your option) any later version. * - * * - * DMTCP is distributed in the hope that it will be useful, * - * but WITHOUT ANY WARRANTY; without even the implied warranty of * - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * - * GNU Lesser General Public License for more details. * - * * - * You should have received a copy of the GNU Lesser General Public * - * License in the files COPYING and COPYING.LESSER. If not, see * - * . * - ****************************************************************************/ - -#ifdef SINGLE_CART_REORDER -#include -#include "cartesian.h" -#endif - -#include -#include "jassert.h" -#include "jconvert.h" - -#include "record-replay.h" -#include "virtual-ids.h" -#include "p2p_log_replay.h" - -using namespace dmtcp_mpi; - -static int restoreCommSplit(MpiRecord& rec); -static int restoreCommSplitType(MpiRecord& rec); -static int restoreCommDup(MpiRecord& rec); -static int restoreCommCreate(MpiRecord& rec); -static int restoreCommCreateGroup(MpiRecord& rec); -static int restoreCommErrHandler(MpiRecord& rec); -static int restoreCommFree(MpiRecord& rec); -static int restoreAttrPut(MpiRecord& rec); -static int restoreAttrDelete(MpiRecord& rec); -static int restoreCommCreateKeyval(MpiRecord& rec); -static int restoreCommFreeKeyval(MpiRecord& rec); - -static int restoreCommGroup(MpiRecord& rec); -static int restoreGroupFree(MpiRecord& rec); -static int restoreGroupIncl(MpiRecord& rec); - -static int restoreTypeContiguous(MpiRecord& rec); -static int restoreTypeCommit(MpiRecord& rec); -static int restoreTypeHVector(MpiRecord& rec); -static int restoreTypeIndexed(MpiRecord& rec); -static int restoreTypeHIndexed(MpiRecord& rec); -static int restoreTypeFree(MpiRecord& rec); -static int restoreTypeCreateStruct(MpiRecord& rec); -static int restoreTypeDup(MpiRecord& rec); -static int restoreTypeCreateResized(MpiRecord& rec); - -static int restoreCartCreate(MpiRecord& rec); -static int restoreCartMap(MpiRecord& rec); -static int restoreCartShift(MpiRecord& rec); -static int restoreCartSub(MpiRecord& rec); - -static int restoreOpCreate(MpiRecord& rec); -static int restoreOpFree(MpiRecord& rec); - -static int restoreIbcast(MpiRecord& rec); -static int restoreIreduce(MpiRecord& rec); -static int restoreIbarrier(MpiRecord& rec); - -#ifdef SINGLE_CART_REORDER -void create_cartesian_info_mpi_datatype(MPI_Datatype *cidt); -void load_restart_cartesian_mapping(CartesianProperties *cp, - CartesianInfo *ci, - CartesianInfo restart_mapping[]); -void compare_comm_old_and_cart_cartesian_mapping( - CartesianProperties *cp, - CartesianInfo checkpoint_mapping[], - CartesianInfo restart_mapping[], - int *comm_old_ranks_order, - int *comm_cart_ranks_order); -void create_comm_old_communicator(CartesianProperties *cp, - int *comm_old_ranks_order); -void create_comm_cart_communicator(CartesianProperties *cp, - int *comm_cart_ranks_order); -#endif - -void -restoreMpiLogState() -{ - JASSERT(RESTORE_MPI_STATE() == MPI_SUCCESS) - .Text("Failed to restore MPI state"); -} - -int -dmtcp_mpi::restoreComms(MpiRecord &rec) -{ - int rc = -1; - JTRACE("Restoring MPI communicators"); - switch (rec.getType()) { - case GENERATE_ENUM(Comm_split): - JTRACE("restoreCommSplit"); - rc = restoreCommSplit(rec); - break; - case GENERATE_ENUM(Comm_split_type): - JTRACE("restoreCommSplitType"); - rc = restoreCommSplitType(rec); - break; - case GENERATE_ENUM(Comm_dup): - JTRACE("restoreCommDup"); - rc = restoreCommDup(rec); - break; - case GENERATE_ENUM(Comm_create): - JTRACE("restoreCommCreate"); - rc = restoreCommCreate(rec); - break; - case GENERATE_ENUM(Comm_create_group): - JTRACE("restoreCommCreateGroup"); - rc = restoreCommCreateGroup(rec); - break; - case GENERATE_ENUM(Comm_set_errhandler): - JTRACE("restoreCommErrHandler"); - rc = restoreCommErrHandler(rec); - break; - case GENERATE_ENUM(Comm_free): - JTRACE("restoreCommFree"); - rc = restoreCommFree(rec); - break; - case GENERATE_ENUM(Attr_put): - JTRACE("restoreAtrrPut"); - rc = restoreAttrPut(rec); - break; - case GENERATE_ENUM(Attr_delete): - JTRACE("restoreAtrrDelete"); - rc = restoreAttrDelete(rec); - break; - case GENERATE_ENUM(Comm_create_keyval): - JTRACE("restoreCommCreateKeyval"); - rc = restoreCommCreateKeyval(rec); - break; - case GENERATE_ENUM(Comm_free_keyval): - JTRACE("restoreCommFreeKeyval"); - rc = restoreCommFreeKeyval(rec); - break; - default: - JWARNING(false)(rec.getType()).Text("Unknown call"); - break; - } - return rc; -} - -int -dmtcp_mpi::restoreGroups(MpiRecord &rec) -{ - int rc = -1; - JTRACE("Restoring MPI groups"); - switch (rec.getType()) { - case GENERATE_ENUM(Comm_group): - JTRACE("restoreCommGroup"); - rc = restoreCommGroup(rec); - break; - case GENERATE_ENUM(Group_free): - JTRACE("restoreGroupFree"); - rc = restoreGroupFree(rec); - break; - case GENERATE_ENUM(Group_incl): - JTRACE("restoreGroupIncl"); - rc = restoreGroupIncl(rec); - break; - default: - JWARNING(false)(rec.getType()).Text("Unknown call"); - break; - } - return rc; -} - -int -dmtcp_mpi::restoreTypes(MpiRecord &rec) -{ - int rc = -1; - JTRACE("Restoring MPI derived types"); - switch (rec.getType()) { - case GENERATE_ENUM(Type_contiguous): - JTRACE("restoreTypeContiguous"); - rc = restoreTypeContiguous(rec); - break; - case GENERATE_ENUM(Type_commit): - JTRACE("restoreTypeCommit"); - rc = restoreTypeCommit(rec); - break; - case GENERATE_ENUM(Type_hvector): - JTRACE("restoreTypeHVector"); - rc = restoreTypeHVector(rec); - break; - case GENERATE_ENUM(Type_indexed): - JTRACE("restoreTypeIndexed"); - rc = restoreTypeIndexed(rec); - break; - case GENERATE_ENUM(Type_free): - JTRACE("restoreTypeFree"); - rc = restoreTypeFree(rec); - break; - case GENERATE_ENUM(Type_create_struct): - JTRACE("restoreTypeCreateStruct"); - rc = restoreTypeCreateStruct(rec); - break; - case GENERATE_ENUM(Type_hindexed): - JTRACE("restoreTypeHIndexed"); - rc = restoreTypeHIndexed(rec); - break; - case GENERATE_ENUM(Type_dup): - JTRACE("restoreTypeDup"); - rc = restoreTypeDup(rec); - break; - case GENERATE_ENUM(Type_create_resized): - JTRACE("restoreTypeCreateResized"); - rc = restoreTypeCreateResized(rec); - break; - default: - JWARNING(false)(rec.getType()).Text("Unknown call"); - break; - } - return rc; -} - -int -dmtcp_mpi::restoreCarts(MpiRecord &rec) -{ - int rc = -1; - JTRACE("Restoring MPI cartesian"); - switch (rec.getType()) { - case GENERATE_ENUM(Cart_create): - JTRACE("restoreCartCreate"); - rc = restoreCartCreate(rec); - break; - case GENERATE_ENUM(Cart_map): - JTRACE("restoreCartMap"); - rc = restoreCartMap(rec); - break; - case GENERATE_ENUM(Cart_shift): - JTRACE("restoreCartShift"); - rc = restoreCartShift(rec); - break; - case GENERATE_ENUM(Cart_sub): - JTRACE("restoreCartSub"); - rc = restoreCartSub(rec); - break; - default: - JWARNING(false)(rec.getType()).Text("Unknown call"); - break; - } - return rc; -} - -int -dmtcp_mpi::restoreOps(MpiRecord &rec) -{ - int rc = -1; - JTRACE("Restoring MPI Ops"); - switch (rec.getType()) { - case GENERATE_ENUM(Op_create): - JTRACE("restoreOpCreate"); - rc = restoreOpCreate(rec); - break; - case GENERATE_ENUM(Op_free): - JTRACE("restoreOpFree"); - rc = restoreOpFree(rec); - break; - default: - JWARNING(false)(rec.getType()).Text("Unknown call"); - break; - } - return rc; -} - -int -dmtcp_mpi::restoreRequests(MpiRecord &rec) -{ - int rc = -1; - JTRACE("Restoring MPI Requests"); - switch (rec.getType()) { - case GENERATE_ENUM(Ibarrier): - JTRACE("restoreIbarrier"); - rc = restoreIbarrier(rec); - break; - case GENERATE_ENUM(Ireduce): - JTRACE("restoreIreduce"); - rc = restoreIreduce(rec); - break; - case GENERATE_ENUM(Ibcast): - JTRACE("restoreIbcast"); - rc = restoreIbcast(rec); - break; - default: - JWARNING(false)(rec.getType()).Text("Unknown call"); - break; - } - return rc; -} - -static int -restoreCommSplit(MpiRecord& rec) -{ - int retval; - MPI_Comm comm = rec.args(0); - int color = rec.args(1); - int key = rec.args(2); - MPI_Comm newcomm = MPI_COMM_NULL; - retval = FNC_CALL(Comm_split, rec)(comm, color, key, &newcomm); - if (retval == MPI_SUCCESS) { - MPI_Comm virtComm = rec.args(3); - UPDATE_COMM_MAP(virtComm, newcomm); - } - return retval; -} - -static int -restoreCommSplitType(MpiRecord& rec) -{ - int retval; - MPI_Comm comm = rec.args(0); - int split_type = rec.args(1); - int key = rec.args(2); - MPI_Info inf = rec.args(3); - MPI_Comm newcomm = MPI_COMM_NULL; - retval = FNC_CALL(Comm_split_type, rec)(comm, split_type, key, inf, &newcomm); - if (retval == MPI_SUCCESS) { - MPI_Comm virtComm = rec.args(4); - UPDATE_COMM_MAP(virtComm, newcomm); - } - return retval; -} - -static int -restoreCommDup(MpiRecord& rec) -{ - int retval; - MPI_Comm comm = rec.args(0); - MPI_Comm newcomm = MPI_COMM_NULL; - retval = FNC_CALL(Comm_dup, rec)(comm, &newcomm); - if (retval == MPI_SUCCESS) { - MPI_Comm virtComm = rec.args(1); - UPDATE_COMM_MAP(virtComm, newcomm); - } - return retval; -} - -static int -restoreCommCreate(MpiRecord& rec) -{ - int retval; - MPI_Comm comm = rec.args(0); - MPI_Group group = rec.args(1); - MPI_Comm newcomm = MPI_COMM_NULL; - retval = FNC_CALL(Comm_create, rec)(comm, group, &newcomm); - if (retval == MPI_SUCCESS) { - MPI_Comm oldcomm = rec.args(2); - UPDATE_COMM_MAP(oldcomm, newcomm); - } - return retval; -} - -static int -restoreCommCreateGroup(MpiRecord& rec) -{ - int retval; - MPI_Comm comm = rec.args(0); - MPI_Group group = rec.args(1); - int tag = rec.args(2); - MPI_Comm newcomm = MPI_COMM_NULL; - retval = FNC_CALL(Comm_create_group, rec)(comm, group, tag, &newcomm); - if (retval == MPI_SUCCESS) { - MPI_Comm oldcomm = rec.args(3); - UPDATE_COMM_MAP(oldcomm, newcomm); - } - return retval; -} - -static int -restoreCommErrHandler(MpiRecord& rec) -{ - int retval; - MPI_Comm comm = rec.args(0); - MPI_Errhandler errhandler = rec.args(1); - retval = FNC_CALL(Comm_set_errhandler, rec)(comm, errhandler); - JWARNING(retval == MPI_SUCCESS)(comm).Text("Error restoring MPI errhandler"); - return retval; -} - -static int -restoreCommFree(MpiRecord& rec) -{ - int retval; - MPI_Comm comm = rec.args(0); - retval = FNC_CALL(Comm_free, rec)(&comm); - JWARNING(retval == MPI_SUCCESS)(comm).Text("Error freeing MPI comm"); - if (retval == MPI_SUCCESS) { - // See mpi_comm_wrappers.cpp:Comm_free - // NOTE: We cannot remove the old comm from the map, since - // we'll need to replay this call to reconstruct any other comms that - // might have been created using this comm. - // - // MPI_Comm oldcomm = REMOVE_OLD_COMM(comm); - } - return retval; -} - -static int -restoreAttrPut(MpiRecord& rec) -{ - int retval; - MPI_Comm comm = rec.args(0); - int key = rec.args(1); - void *val = rec.args(2); - retval = FNC_CALL(Attr_put, rec)(comm, key, val); - JWARNING(retval == MPI_SUCCESS)(comm) - .Text("Error restoring MPI attribute-put"); - return retval; -} - -static int -restoreAttrDelete(MpiRecord& rec) -{ - int retval; - MPI_Comm comm = rec.args(0); - int key = rec.args(1); - retval = FNC_CALL(Attr_delete, rec)(comm, key); - JWARNING(retval == MPI_SUCCESS)(comm).Text("Error deleting MPI attribute"); - return retval; -} - -static int -restoreCommCreateKeyval(MpiRecord& rec) -{ - int retval; - void *cfn_tmp = rec.args(0); - void *dfn_tmp = rec.args(1); - MPI_Comm_copy_attr_function *cfn = (MPI_Comm_copy_attr_function*) cfn_tmp; - MPI_Comm_delete_attr_function *dfn = (MPI_Comm_delete_attr_function*) dfn_tmp; - int newkey = 0; - void *extra_state = rec.args(3); - retval = FNC_CALL(Comm_create_keyval, rec)(cfn, dfn, &newkey, extra_state); - if (retval == MPI_SUCCESS) { - int oldkey = rec.args(2); - UPDATE_COMM_KEYVAL_MAP(oldkey, newkey); - } - return retval; -} - -static int -restoreCommFreeKeyval(MpiRecord& rec) -{ - int retval; - int key = rec.args(0); - retval = FNC_CALL(Comm_free_keyval, rec)(&key); - JWARNING(retval == MPI_SUCCESS)(key).Text("Error deleting MPI Comm Keyval"); - // See mpi_comm_wrappers.cpp:Comm_free_keyval - // We don't remove item from virtual-id tables - return retval; -} - -static int -restoreCommGroup(MpiRecord& rec) -{ - int retval; - MPI_Comm comm = rec.args(0); - MPI_Group newgroup = MPI_GROUP_NULL; - retval = FNC_CALL(Comm_group, rec)(comm, &newgroup); - JWARNING(retval == MPI_SUCCESS)(comm).Text("Error restoring MPI comm group"); - if (retval == MPI_SUCCESS) { - MPI_Group oldgroup = rec.args(1); - UPDATE_GROUP_MAP(oldgroup, newgroup); - } - return retval; -} - -static int -restoreGroupFree(MpiRecord& rec) -{ - int retval; - MPI_Group group = rec.args(0); - retval = FNC_CALL(Group_free, rec)(&group); - JWARNING(retval == MPI_SUCCESS)(group).Text("Error restoring MPI group free"); - if (retval == MPI_SUCCESS) { - // See mpi_group_wrappers.cpp:Group_free - // NOTE: We cannot remove the old group, since we'll need - // to replay this call to reconstruct any comms that might - // have been created using this group. - // - // REMOVE_OLD_GROUP(group); - } - return retval; -} - - -static int -restoreGroupIncl(MpiRecord& rec) -{ - int retval; - MPI_Group group = rec.args(0); - int n = rec.args(1); - int *ranks = rec.args(2); - MPI_Group newgroup = MPI_GROUP_NULL; - retval = FNC_CALL(Group_incl, rec)(group, n, ranks, &newgroup); - JWARNING(retval == MPI_SUCCESS)(group).Text("Error restoring MPI group incl"); - if (retval == MPI_SUCCESS) { - MPI_Group oldgroup = rec.args(3); - UPDATE_GROUP_MAP(oldgroup, newgroup); - } - return retval; -} - -static int -restoreTypeContiguous(MpiRecord& rec) -{ - int retval; - int count = rec.args(0); - MPI_Datatype oldtype = rec.args(1); - MPI_Datatype newtype; - retval = FNC_CALL(Type_contiguous, rec)(count, oldtype, &newtype); - if (retval == MPI_SUCCESS) { - MPI_Datatype virtType = rec.args(2); - UPDATE_TYPE_MAP(virtType, newtype); - } - return retval; -} - -static int -restoreTypeCommit(MpiRecord& rec) -{ - int retval; - MPI_Datatype type = rec.args(0); - retval = FNC_CALL(Type_commit, rec)(&type); - JWARNING(retval == MPI_SUCCESS)(type).Text("Could not commit MPI datatype"); - return retval; -} - -static int -restoreTypeHVector(MpiRecord& rec) -{ - int retval; - int count = rec.args(0); - int blocklength = rec.args(1); - MPI_Aint stride = rec.args(2); - MPI_Datatype oldtype = rec.args(3); - MPI_Datatype newtype = MPI_DATATYPE_NULL; - retval = FNC_CALL(Type_hvector, rec)(count, blocklength, - stride, oldtype, &newtype); - JWARNING(retval == MPI_SUCCESS)(oldtype) - .Text("Could not restore MPI hvector datatype"); - if (retval == MPI_SUCCESS) { - MPI_Datatype virtType = rec.args(4); - UPDATE_TYPE_MAP(virtType, newtype); - } - return retval; -} - -static int -restoreTypeHIndexed(MpiRecord& rec) -{ - int retval; - int count = rec.args(0); - int *bs = rec.args(1); - MPI_Aint *ds = rec.args(2); - MPI_Datatype oldtype = rec.args(3); - MPI_Datatype newtype = MPI_DATATYPE_NULL; - retval = FNC_CALL(Type_hindexed, rec)(count, bs, ds, oldtype, &newtype); - JWARNING(retval == MPI_SUCCESS)(oldtype) - .Text("Could not restore MPI hvector datatype"); - if (retval == MPI_SUCCESS) { - MPI_Datatype virtType = rec.args(4); - UPDATE_TYPE_MAP(virtType, newtype); - } - return retval; -} - -static int -restoreTypeDup(MpiRecord& rec) -{ - int retval; - MPI_Datatype oldtype = rec.args(0); - MPI_Datatype newtype = MPI_DATATYPE_NULL; - retval = FNC_CALL(Type_dup, rec)(oldtype, &newtype); - JWARNING(retval == MPI_SUCCESS)(oldtype) - .Text("Could not restore MPI hvector datatype"); - if (retval == MPI_SUCCESS) { - MPI_Datatype virtType = rec.args(1); - UPDATE_TYPE_MAP(virtType, newtype); - } - return retval; -} - -static int -restoreTypeCreateResized(MpiRecord& rec) -{ - int retval; - MPI_Datatype oldtype = rec.args(0); - MPI_Aint lb = rec.args(1); - MPI_Aint ext = rec.args(2); - MPI_Datatype newtype = MPI_DATATYPE_NULL; - retval = FNC_CALL(Type_create_resized, rec)(oldtype, lb, ext, &newtype); - JWARNING(retval == MPI_SUCCESS)(oldtype) - .Text("Could not restore MPI hvector datatype"); - if (retval == MPI_SUCCESS) { - MPI_Datatype virtType = rec.args(3); - UPDATE_TYPE_MAP(virtType, newtype); - } - return retval; -} - -void MpiRecordReplay::printRecords(bool print) -{ - JNOTE("Printing _records"); - for(MpiRecord* record : _records) { - int fnc_idx = record->getType(); - if (print) { - printf("%s\n", MPI_Fnc_strings[fnc_idx]); - } else { - JNOTE("") (MPI_Fnc_strings[fnc_idx]); - } - } -} - -static int -restoreTypeIndexed(MpiRecord& rec) -{ - int retval; - int count = rec.args(0); - int *blocklengths = rec.args(1); - int *displs = rec.args(2); - MPI_Datatype oldtype = rec.args(3); - MPI_Datatype newtype = MPI_DATATYPE_NULL; - retval = FNC_CALL(Type_indexed, rec)(count, blocklengths, - displs, oldtype, &newtype); - JWARNING(retval == MPI_SUCCESS)(oldtype) - .Text("Could not restore MPI indexed datatype"); - if (retval == MPI_SUCCESS) { - MPI_Datatype virtType = rec.args(4); - UPDATE_TYPE_MAP(virtType, newtype); - } - return retval; -} - -static int -restoreTypeFree(MpiRecord& rec) -{ - int retval; - MPI_Datatype type = rec.args(0); - retval = FNC_CALL(Type_free, rec)(&type); - JWARNING(retval == MPI_SUCCESS)(type).Text("Could not free MPI datatype"); - if (retval == MPI_SUCCESS) { - // See mpi_type_wrappers.cpp:Type_free - // NOTE: We cannot remove the old type from the map, since - // we'll need to replay this call to reconstruct any other type that - // might have been created using this type. - // - // MPI_Datatype realType = REMOVE_OLD_TYPE(type); - } - return retval; -} - -static int -restoreTypeCreateStruct(MpiRecord& rec) -{ - int retval; - int count = rec.args(0); - int *blocklengths = rec.args(1); - MPI_Aint *displs = rec.args(2); - MPI_Datatype *types = rec.args(3); - MPI_Datatype newtype = MPI_DATATYPE_NULL; - retval = FNC_CALL(Type_create_struct, rec)(count, blocklengths, - displs, types, &newtype); - JWARNING(retval == MPI_SUCCESS)(types) - .Text("Could not restore MPI struct datatype"); - if (retval == MPI_SUCCESS) { - MPI_Datatype virtType = rec.args(4); - UPDATE_TYPE_MAP(virtType, newtype); - } - return retval; -} - -#ifdef SINGLE_CART_REORDER -int -load_cartesian_properties(const char *filename, CartesianProperties *cp) -{ - int fd = open(filename, O_RDONLY); - if (fd == -1) { - return -1; - } - read(fd, &cp->comm_old_size, sizeof(int)); - read(fd, &cp->comm_cart_size, sizeof(int)); - read(fd, &cp->comm_old_rank, sizeof(int)); - read(fd, &cp->comm_cart_rank, sizeof(int)); - read(fd, &cp->reorder, sizeof(int)); - read(fd, &cp->ndims, sizeof(int)); - int array_size = sizeof(int) * cp->ndims; - read(fd, cp->coordinates, array_size); - read(fd, cp->dimensions, array_size); - read(fd, cp->periods, array_size); - close(fd); - return 0; -} - -void -load_checkpoint_cartesian_mapping(CartesianProperties *cp, - CartesianInfo checkpoint_mapping[]) -{ - int ndims = cp->ndims; - int comm_old_size = cp->comm_old_size; - for (int i = 0; i < comm_old_size; i++) { - CartesianProperties cp; - dmtcp::ostringstream o; - o << "./ckpt_rank_" << i << "/cartesian.info"; - if (load_cartesian_properties(o.str().c_str(), &cp) == 0) { - checkpoint_mapping[i].comm_old_rank = cp.comm_old_rank; - checkpoint_mapping[i].comm_cart_rank = cp.comm_cart_rank; - for (int j = 0; j < ndims; j++) { - checkpoint_mapping[i].coordinates[j] = cp.coordinates[j]; - } - } - } -} - -// Prior to checkpoint we will use the normal variable names, and -// after restart we will use the '_prime' suffix with variable names. -MPI_Comm comm_cart; -MPI_Comm *comm_cart_prime; -MPI_Comm comm_old; -MPI_Comm comm_old_prime; - -void -create_cartesian_info_mpi_datatype(MPI_Datatype *cidt) -{ - int retval = -1; - int lengths[3] = { 1, 1, MAX_CART_PROP_SIZE }; - - // Calculate displacements - // In C, by default padding can be inserted between fields. MPI_Get_address - // will allow to get the address of each struct field and calculate the - // corresponding displacement relative to that struct base address. The - // displacements thus calculated will therefore include padding if any. - MPI_Aint base_address; - MPI_Aint displacements[3]; - CartesianInfo dummy_ci; - - JUMP_TO_LOWER_HALF(lh_info.fsaddr); - - retval = NEXT_FUNC(Get_address)(&dummy_ci, &base_address); - retval += NEXT_FUNC(Get_address)(&dummy_ci.comm_old_rank, &displacements[0]); - retval += NEXT_FUNC(Get_address)(&dummy_ci.comm_cart_rank, &displacements[1]); - retval += NEXT_FUNC(Get_address)(&dummy_ci.coordinates[0], &displacements[2]); - - displacements[0] = NEXT_FUNC(Aint_diff)(displacements[0], base_address); - displacements[1] = NEXT_FUNC(Aint_diff)(displacements[1], base_address); - displacements[2] = NEXT_FUNC(Aint_diff)(displacements[2], base_address); - - MPI_Datatype types[3] = { MPI_INT, MPI_INT, MPI_INT }; - - retval = - NEXT_FUNC(Type_create_struct)(3, lengths, displacements, types, cidt); - JASSERT(retval == MPI_SUCCESS) - .Text("Failed to create MPI datatype for struct."); - - retval = NEXT_FUNC(Type_commit)(cidt); - JASSERT(retval == MPI_SUCCESS) - .Text("Failed to commit MPI datatype for struct."); - - RETURN_TO_UPPER_HALF(); -} - -void -load_restart_cartesian_mapping(CartesianProperties *cp, CartesianInfo *ci, - CartesianInfo restart_mapping[]) -{ - int retval = -1; - MPI_Datatype ci_type; - create_cartesian_info_mpi_datatype(&ci_type); - // Root process will collect the cartesian info and all other process will - // send their cartesian info - if (ci->comm_old_rank == 0) { - retval = NEXT_FUNC(Gather)(ci, 1, ci_type, restart_mapping, 1, ci_type, 0, - comm_old_prime); - } else { - retval = - NEXT_FUNC(Gather)(ci, 1, ci_type, NULL, 0, ci_type, 0, comm_old_prime); - } - JASSERT(retval == MPI_SUCCESS) - .Text("Failed to load restart cartesian mapping."); -} - -void -compare_comm_old_and_cart_cartesian_mapping(CartesianProperties *cp, - CartesianInfo checkpoint_mapping[], - CartesianInfo restart_mapping[], - int *comm_old_ranks_order, - int *comm_cart_ranks_order) -{ - for (int i = 0; i < cp->comm_old_size; i++) { - CartesianInfo *checkpoint = &checkpoint_mapping[i]; - // Iterate through each entry in the array and find out - // the rank of the process whose coordinates are equal to - // checkpoint.coordinates - for (int j = 0; j < cp->comm_old_size; j++) { - CartesianInfo *restart = &restart_mapping[j]; - int sum = 0; - for (int k = 0; k < cp->ndims; k++) { - if (checkpoint->coordinates[k] == restart->coordinates[k]) { - sum += 1; - } - } - if (sum == cp->ndims) { - comm_old_ranks_order[i] = checkpoint->comm_old_rank; - comm_cart_ranks_order[i] = checkpoint->comm_cart_rank; - break; - } - } - } -} - -void -create_comm_old_communicator(CartesianProperties *cp, int *comm_old_ranks_order) -{ - int retval = -1; - MPI_Group comm_old_group_prime, comm_old_group; - MPI_Comm_group(comm_old_prime, &comm_old_group_prime); - retval = MPI_Group_incl(comm_old_group_prime, cp->comm_old_size, - comm_old_ranks_order, &comm_old_group); - JASSERT(retval == MPI_SUCCESS) - .Text("Failed to create group."); - retval = - MPI_Comm_create_group(comm_old_prime, comm_old_group, 121, &comm_old); - JASSERT(retval == MPI_SUCCESS) - .Text("Failed to create communicator."); -} - -void -create_comm_cart_communicator(CartesianProperties *cp, int *comm_cart_ranks_order) -{ - int retval = -1; - MPI_Group comm_cart_group_prime, comm_cart_group; - MPI_Comm_group(*comm_cart_prime, &comm_cart_group_prime); - - retval = MPI_Group_incl(comm_cart_group_prime, cp->comm_cart_size, - comm_cart_ranks_order, &comm_cart_group); - JASSERT(retval == MPI_SUCCESS) - .Text("Failed to create group."); - MPI_Comm comm_cart_tmp; - retval = MPI_Comm_create_group(*comm_cart_prime, comm_cart_group, 111, - &comm_cart_tmp); - JASSERT(retval == MPI_SUCCESS) - .Text("Failed to create communicator."); - retval = MPI_Cart_create(comm_cart_tmp, cp->ndims, cp->dimensions, - cp->periods, cp->reorder, &comm_cart); - JASSERT(retval == MPI_SUCCESS) - .Text("Failed to create communicator."); -} - -void -setCartesianCommunicator(void *getCartesianCommunicatorFptr) -{ - typedef void (*getCartesianCommunicatorFptr_t)(MPI_Comm **); - ((getCartesianCommunicatorFptr_t)getCartesianCommunicatorFptr)( - &comm_cart_prime); -} - -static int -restoreCartCreate(MpiRecord &rec) -{ - int retval = -1; - int comm_old_ranks_order[MAX_PROCESSES]; - int comm_cart_ranks_order[MAX_PROCESSES]; - - CartesianInfo ci; - CartesianProperties cp; - CartesianInfo checkpoint_mapping[MAX_PROCESSES]; - CartesianInfo restart_mapping[MAX_PROCESSES]; - - // In current implementation, is MPI_COMM_WORLD - comm_old_prime = MPI_COMM_WORLD; - // Get cartesian info of this process - retval = MPI_Comm_rank(comm_old_prime, &ci.comm_old_rank); - retval = MPI_Comm_rank(*comm_cart_prime, &ci.comm_cart_rank); - // Get cartesian properties of this process - dmtcp::ostringstream o; - o << "./ckpt_rank_" << ci.comm_old_rank << "/cartesian.info"; - retval = load_cartesian_properties(o.str().c_str(), &cp); - JASSERT(retval == 0) - (o.str().c_str()).Text("Failed to load cartesian properties."); - // Get coordinates of this process - retval = MPI_Cart_coords(*comm_cart_prime, ci.comm_cart_rank, cp.ndims, - ci.coordinates); - // Load checkpoint cartesian mapping - load_checkpoint_cartesian_mapping(&cp, checkpoint_mapping); - // Load restart cartesian mapping - load_restart_cartesian_mapping(&cp, &ci, restart_mapping); - retval = MPI_Barrier(MPI_COMM_WORLD); - JASSERT(retval == MPI_SUCCESS).Text("MPI_Barrier(1) failed."); - // The root process will populate and - // arrays - if (ci.comm_old_rank == 0) { - compare_comm_old_and_cart_cartesian_mapping(&cp, checkpoint_mapping, - restart_mapping, - comm_old_ranks_order, - comm_cart_ranks_order); - } - retval = MPI_Barrier(MPI_COMM_WORLD); - JASSERT(retval == MPI_SUCCESS).Text("MPI_Barrier(2) failed."); - retval = NEXT_FUNC(Bcast)(comm_old_ranks_order, cp.comm_old_size, MPI_INT, 0, - comm_old_prime); - JASSERT(retval == MPI_SUCCESS) - .Text("Failed to broadcast integer array."); - retval = MPI_Barrier(MPI_COMM_WORLD); - JASSERT(retval == MPI_SUCCESS).Text("MPI_Barrier(3) failed."); - retval = NEXT_FUNC(Bcast)(comm_cart_ranks_order, cp.comm_cart_size, MPI_INT, - 0, *comm_cart_prime); - JASSERT(retval == MPI_SUCCESS) - .Text("Failed to broadcast integer array."); - retval = MPI_Barrier(MPI_COMM_WORLD); - JASSERT(retval == MPI_SUCCESS).Text("MPI_Barrier(4) failed."); - // Create and communicators - create_comm_old_communicator(&cp, comm_old_ranks_order); - create_comm_cart_communicator(&cp, comm_cart_ranks_order); - retval = MPI_Barrier(MPI_COMM_WORLD); - JASSERT(retval == MPI_SUCCESS).Text("MPI_Barrier(5) failed."); - // Update mapping - MPI_Comm virtComm = rec.args(5); - // FIXME: This only works for MPICH but maybe not for other MPI libraries. - UPDATE_COMM_MAP(MPI_COMM_WORLD, comm_old); - UPDATE_COMM_MAP(virtComm, comm_cart); - return MPI_SUCCESS; -} - -#else - -static int -restoreCartCreate(MpiRecord& rec) -{ - int retval; - MPI_Comm comm = rec.args(0); - int ndims = rec.args(1); - int *dims = rec.args(2); - int *periods = rec.args(3); - int reorder = rec.args(4); - MPI_Comm newcomm = MPI_COMM_NULL; - retval = FNC_CALL(Cart_create, rec)(comm, ndims, dims, - periods, reorder, &newcomm); - if (retval == MPI_SUCCESS) { - MPI_Comm virtComm = rec.args(5); - UPDATE_COMM_MAP(virtComm, newcomm); - } - return retval; -} - -#endif - -static int -restoreCartMap(MpiRecord& rec) -{ - int retval; - MPI_Comm comm = rec.args(0); - int ndims = rec.args(1); - int *dims = rec.args(2); - int *periods = rec.args(3); - int newrank = -1; - retval = FNC_CALL(Cart_map, rec)(comm, ndims, dims, periods, &newrank); - if (retval == MPI_SUCCESS) { - // FIXME: Virtualize rank? - int oldrank = rec.args(4); - JASSERT(newrank == oldrank)(oldrank)(newrank).Text("Different ranks"); - } - return retval; -} - -static int -restoreCartShift(MpiRecord& rec) -{ - int retval; - MPI_Comm comm = rec.args(0); - int direction = rec.args(1); - int disp = rec.args(2); - int rank_source = -1; - int rank_dest = -1; - retval = FNC_CALL(Cart_shift, rec)(comm, direction, - disp, &rank_source, &rank_dest); - if (retval == MPI_SUCCESS) { - // FIXME: Virtualize rank? - int oldsrc = rec.args(3); - int olddest = rec.args(4); - JASSERT(oldsrc == rank_source && olddest == rank_dest) - (oldsrc)(olddest)(rank_source)(rank_dest).Text("Different ranks"); - } - return retval; -} - -static int -restoreCartSub(MpiRecord& rec) -{ - int retval; - MPI_Comm comm = rec.args(0); - // int ndims = rec.args(1); - int *remain_dims = rec.args(2); - MPI_Comm newcomm = MPI_COMM_NULL; - // LOG_CALL(restoreCarts, Cart_sub, comm, ndims, rs, virtComm); - retval = FNC_CALL(Cart_sub, rec)(comm, remain_dims, &newcomm); - if (retval == MPI_SUCCESS) { - MPI_Comm virtComm = rec.args(3); - UPDATE_COMM_MAP(virtComm, newcomm); - } - return retval; -} - -static int -restoreOpCreate(MpiRecord& rec) -{ - int retval = -1; - MPI_User_function *user_fn = rec.args(0); - int commute = rec.args(1); - MPI_Op newop = MPI_OP_NULL; - retval = FNC_CALL(Op_create, rec)(user_fn, commute, &newop); - if (retval == MPI_SUCCESS) { - MPI_Op oldop = rec.args(2); - UPDATE_OP_MAP(oldop, newop); - } - return retval; -} - -static int -restoreOpFree(MpiRecord& rec) -{ - int retval = -1; - MPI_Op op = rec.args(0); - MPI_Op realOp = VIRTUAL_TO_REAL_OP(op); - retval = FNC_CALL(Op_free, rec)(&realOp); - if (retval == MPI_SUCCESS) { - // See mpi_op_wrappers.cpp:Op_free - // NOTE: We cannot remove the old op from the map, since - // we'll need to replay this call to reconstruct any other op that - // might have been created using this op. - // - // realOp = REMOVE_OLD_OP(op); - } - return retval; -} - -static int restoreIbcast(MpiRecord& rec) { - int retval = -1; - void *buf = rec.args(0); - int count = rec.args(1); - MPI_Datatype datatype = rec.args(2); - int root = rec.args(3); - MPI_Comm comm = rec.args(4); - if (rec.getComplete()) { - if ((buf = rec.getBuf()) == NULL) { - int size; - MPI_Type_size(datatype, &size); - buf = malloc(count * size); - rec.setBuf(buf); - } - } - MPI_Request newRealRequest = MPI_REQUEST_NULL; - retval = FNC_CALL(Ibcast, rec)(buf, count, datatype, root, comm, - &newRealRequest); - if (retval == MPI_SUCCESS) { - MPI_Request virtRequest = rec.args(5); - UPDATE_REQUEST_MAP(virtRequest, newRealRequest); -#ifdef USE_REQUEST_LOG - logRequestInfo(virtRequest, IBCAST_REQUEST); -#endif - } - return retval; -} - -static int restoreIreduce(MpiRecord& rec) { - int retval = -1; - void *sendbuf = rec.args(0); - void *recvbuf = rec.args(1); - int count = rec.args(2); - MPI_Datatype datatype = rec.args(3); - MPI_Op op = rec.args(4); - int root = rec.args(5); - MPI_Comm comm = rec.args(6); - if (rec.getComplete() == true) { - int rank; - MPI_Comm_rank(comm, &rank); - if (rank == root) { // receiver - sendbuf = MPI_IN_PLACE; - // Use a temporary buffer to consume the received message - if ((recvbuf = rec.getBuf()) == NULL) { - int size; - MPI_Type_size(datatype, &size); - recvbuf = malloc(count * size); - rec.setBuf(recvbuf); - } - } else { // sender - // Sender's buffer contains the data of the recorded message's sendbuf - sendbuf = rec.getBuf(); - } - } - MPI_Request newRealRequest = MPI_REQUEST_NULL; - retval = FNC_CALL(Ireduce, rec)(sendbuf, recvbuf, count, - datatype, op, root, comm, &newRealRequest); - if (retval == MPI_SUCCESS) { - MPI_Request virtRequest = rec.args(7); - UPDATE_REQUEST_MAP(virtRequest, newRealRequest); -#ifdef USE_REQUEST_LOG - logRequestInfo(virtRequest, IREDUCE_REQUEST); -#endif - } - return retval; -} - -static int restoreIbarrier(MpiRecord& rec) { - int retval = -1; - MPI_Comm comm = rec.args(0); - MPI_Request newRealRequest = MPI_REQUEST_NULL; - retval = FNC_CALL(Ibarrier, rec)(comm, &newRealRequest); - MPI_Request virtRequest; - if (retval == MPI_SUCCESS) { - virtRequest = rec.args(1); - UPDATE_REQUEST_MAP(virtRequest, newRealRequest); -#ifdef USE_REQUEST_LOG - logRequestInfo(virtRequest, IBARRIER_REQUEST); -#endif - } - // Verify the request is valid - int flag; - retval = MPI_Request_get_status(virtRequest, &flag, MPI_STATUS_IGNORE); - JASSERT(retval == MPI_SUCCESS); - return retval; -} diff --git a/mpi-proxy-split/record-replay.h b/mpi-proxy-split/record-replay.h deleted file mode 100644 index 56e14ed9a..000000000 --- a/mpi-proxy-split/record-replay.h +++ /dev/null @@ -1,949 +0,0 @@ -/**************************************************************************** - * Copyright (C) 2019-2021 by Gene Cooperman, Rohan Garg, Yao Xu * - * gene@ccs.neu.edu, rohgarg@ccs.neu.edu, xu.yao1@northeastern.edu * - * * - * This file is part of DMTCP. * - * * - * DMTCP is free software: you can redistribute it and/or * - * modify it under the terms of the GNU Lesser General Public License as * - * published by the Free Software Foundation, either version 3 of the * - * License, or (at your option) any later version. * - * * - * DMTCP is distributed in the hope that it will be useful, * - * but WITHOUT ANY WARRANTY; without even the implied warranty of * - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * - * GNU Lesser General Public License for more details. * - * * - * You should have received a copy of the GNU Lesser General Public * - * License in the files COPYING and COPYING.LESSER. If not, see * - * . * - ****************************************************************************/ - -#ifndef MPI_RECORD_REPLAY_H -#define MPI_RECORD_REPLAY_H - -#include - -#include -#include -#include -#include -#include - -#include "jassert.h" -#include "jconvert.h" - -#include "lower_half_api.h" - -// Logs the MPI call to the global MPI calls log object (defined by the -// 'MpiRecordReplay' class). 'cb' specifies the callback that will be used -// to replay the MPI call while restoring the MPI state at restart time. 'fnc' -// represents the current MPI call. 'args ...' can be used to provide a -// variable-length list of arguments to be saved. The saved arguments are useful -// while replaying the call later. -#define LOG_CALL(cb, fnc, args...) \ - dmtcp_mpi::MpiRecordReplay::instance().record(cb, GENERATE_ENUM(fnc), \ - GENERATE_FNC_PTR(fnc), args) - -#define RESTORE_MPI_STATE() \ - dmtcp_mpi::MpiRecordReplay::instance().replay() - -#define CLEAR_LOG() \ - dmtcp_mpi::MpiRecordReplay::instance().reset() - -#define CLEAR_GROUP_LOGS(group) \ - dmtcp_mpi::MpiRecordReplay::instance().clearGroupLogs(group) - -#define CLEAR_COMM_LOGS(comm) \ - dmtcp_mpi::MpiRecordReplay::instance().clearCommLogs(comm) - -#define LOG_REMOVE_REQUEST(request) \ - dmtcp_mpi::MpiRecordReplay::instance().removeRequestLog(request) - -// Returns true if we are currently replaying the MPI calls from the saved MPI -// calls log; false, otherwise. Normally, this would be true while restoring -// the MPI state at restart time. All other times, this would return false. -// We cannot use LOGGING since it's used for enabling JTRACE -#define MPI_LOGGING() \ - dmtcp_mpi::MpiRecordReplay::instance().isReplayOn() - -// Calls the wrapper function corresponding to the given type 'type'. (The -// 'rec' object contains a pointer to the wrapper function.) -#define FNC_CALL(type, rec) \ - ({ \ - __typeof__(GENERATE_FNC_PTR(type))_real_MPI_## type = \ - rec.call(GENERATE_FNC_PTR(type)); \ - _real_MPI_ ## type; \ - }) - -#define CREATE_LOG_BUF(buf, len) dmtcp_mpi::FncArg(buf, len) - - -namespace dmtcp_mpi -{ - struct FncArg; - class MpiRecord; - class MpiRecordReplay; - - using mutex_t = std::mutex; - using lock_t = std::unique_lock; - using fcb_t = std::function; - using mpi_record_vector_iterator_t = dmtcp::vector::iterator; - - enum TYPE { - TYPE_INT, - TYPE_INT_PTR, - TYPE_INT_ARRAY, - TYPE_VOID_PTR, - TYPE_VOID_CONST_PTR, - TYPE_LONG, - TYPE_MPI_USER_FNC, - }; - - // Restores the MPI requests and returns MPI_SUCCESS on success - extern int restoreRequests(MpiRecord&); - - // Struct for saving arbitrary function arguments - struct FncArg - { - void *_data; - enum TYPE _type; - - FncArg(const void *data, size_t len, dmtcp_mpi::TYPE type) - : _data(JALLOC_HELPER_MALLOC(len)) - { - _type = type; - if (_data && data) { - memcpy(_data, data, len); - } - } - - // This constructor is only used by CREATE_LOG_BUF - FncArg(const void *data, size_t len) - : _data(JALLOC_HELPER_MALLOC(len)) - { - // Default _type set to TYPE_INT_ARRAY because this constructor is used - // by CREATE_LOG_BUF in MPI_Cart functions. - _type = dmtcp_mpi::TYPE_INT_ARRAY; - if (_data && data) { - memcpy(_data, data, len); - } - } - - ~FncArg() - { - if (!_data) { - JALLOC_HELPER_FREE(_data); - } - } - - // On restart, we will use the restore family of functions in - // record-replay.cpp. A typical line of code there is: - // int arg = rec.args(n); - // rec.args() returns a 'const FncArg&'. - // The copy constructor for arg will invokes one of these casts - // from FncArg to the type for arg. So in this example, the code - // is invoking the 'int cast operator', resulting in: - // int arg = *(int*)(((FncArg)rec.args(n))._data); - - // Returns an int corresponding to the saved argument - operator int() const - { - return *(int*)_data; - } - - // Returns an int pointer to saved argument - // This also handles types like MPI_Datatype* in MPICH, - // because MPI_Datatype is an integer in this implementation. - // This doesn't handle MPI_Aint* because MPI_Aint - // is 8 bytes (long int). - operator int*() const - { - if (_type == dmtcp_mpi::TYPE_INT_ARRAY) { - return (int*)_data; - } else if (_type == dmtcp_mpi::TYPE_INT_PTR) { - return *(int**)_data; - } else { - JASSERT(false).Text("Unsupported arg type"); - return NULL; - } - } - - // This is used to handle MPI_Aint*, because in MPICH - // MPI_Aint is 8 bytes (long int). - operator long*() const - { - return (long*)_data; - } - - // Returns a void pointer to saved argument - operator void*() const - { - return *(void**)_data; - } - - // Returns a void pointer to saved argument - operator void const*() const - { - return *(void const**)_data; - } - - operator MPI_User_function*() const - { - return *(MPI_User_function**)_data; - } - }; - - template - FncArg FncArgTyped(T data) - { - int a; - int *b; - void *c; - void const *d; - MPI_User_function *e; - long f; - - if (typeid(data) == typeid(a)) { - return FncArg(&data, sizeof(data), TYPE_INT); - } - if (typeid(data) == typeid(b)) { - return FncArg(&data, sizeof(data), TYPE_INT_PTR); - } - if (typeid(data) == typeid(c)) { - return FncArg(&data, sizeof(data), TYPE_VOID_PTR); - } - if (typeid(data) == typeid(d)) { - return FncArg(&data, sizeof(data), TYPE_VOID_CONST_PTR); - } - if (typeid(data) == typeid(e)) { - return FncArg(&data, sizeof(data), TYPE_MPI_USER_FNC); - } - if (typeid(data) == typeid(f)) { - return FncArg(&data, sizeof(data), TYPE_MPI_USER_FNC); - } - JASSERT(false).Text("Unkown type for FncArg"); - return FncArgTyped(-1); - } - - // Represent a single call record - class MpiRecord - { - public: -#ifdef JALIB_ALLOCATOR - static void* operator new(size_t nbytes, void* p) { return p; } - static void* operator new(size_t nbytes) { JALLOC_HELPER_NEW(nbytes); } - static void operator delete(void* p) { JALLOC_HELPER_DELETE(p); } -#endif - MpiRecord(fcb_t cb, MPI_Fncs type, void *fnc) - : _cb(cb), _type(type), _fnc(fnc), _buffer(NULL), _complete(false) - { - } - - ~MpiRecord() - { - _args.clear(); - free(_buffer); - } - - // Base case to stop the recursion - void addArgs() - { - } - - // Handle one complex argument - void addArgs(const FncArg arg) - { - _args.push_back(arg); - } - - // Handle one MPI_User_function argument - void addArgs(MPI_User_function *arg) - { - _args.push_back(FncArgTyped((void*)arg)); - } - - // Handle one simple argument - template - void addArgs(const T arg) - { - _args.push_back(FncArgTyped(arg)); - } - - // Handle list of arguments - template - void addArgs(const T car, const Targs... cdr) - { - addArgs(car); - addArgs(cdr...); - } - - // Execute the restore callback for this record - int play() const - { - return _cb(const_cast(*this)); - } - - // Returns a reference to the 'n'-th function argument object - const FncArg& args(int n) const - { - return _args[n]; - } - - // Returns the enum MPI_Fncs type of the current MPI record object - MPI_Fncs getType() const - { - return _type; - } - - void setBuf(void *buf) - { - _buffer = buf; - } - - void *getBuf() - { - return _buffer; - } - - void setComplete(bool complete) - { - _complete = complete; - } - - bool getComplete() - { - return _complete; - } - - // Returns a pointer to the wrapper function corresponding to this MPI - // record object - template - T call(T fptr) const - { - return T(_fnc); - } - - private: - fcb_t _cb; // Callback to invoke to replay this MPI call - MPI_Fncs _type; // enum MPI_Fncs type of this MPI call - void *_fnc; // Pointer to the wrapper function of this MPI call - dmtcp::vector _args; // List of argument objects for this MPI call - void *_buffer; // opaque data saved in the buffer - bool _complete; - }; - - // Singleton class representing the entire log of MPI calls, useful for - // saving and restoring the MPI state - class MpiRecordReplay - { - public: -#ifdef JALIB_ALLOCATOR - static void* operator new(size_t nbytes, void* p) { return p; } - static void* operator new(size_t nbytes) { JALLOC_HELPER_NEW(nbytes); } - static void operator delete(void* p) { JALLOC_HELPER_DELETE(p); } -#endif - - // Returns the singleton instance of the MpiRecordReplay class - static MpiRecordReplay& instance() - { - static MpiRecordReplay _records; - return _records; - } - - // Record/replay addresses two subtle issues in replaying Ireduce/Ibcast. - // [1] Checkpoint replays the Ibcast saved in the object log. The - // receiver is receiving the current MPI Ibcast call. The sender advances - // to the next MPI Ibcast call. Since only the buffer address is saved in - // the log, the sender's buffer value is different in the MPI next call. - // That causes the receiver gets wrong buffer. - // [2] The completed requests is not replayed in the replay. The Ibcast - // sender request is completed while the receiver is still not finished. - // In replay, the receiver's request is replay and waits for the broadcast - // message. However, sender's request is completed so it won't be replayed - // and won't send message. That causes the receiver waiting forever. - // The record-replay workflows is below. - // [1] All requests saved in the record-replay are replayed. - // [2] Saves Ibcast buffer value in addition to its address. - // [3] For the completed sender's request, the saved buffer value is copied - // to temporary buffer before it is replay. - // [4] For the completed receiver's request, a temporary buffer is created - // to consume the broadcast message. Since the request is completed, we - // don't want to impact the original buffer. - // [5] For the requests that is not completed yet, the original buffer address - // is used in replay for both the sender and the receiver. - // - // Records an MPI call with its arguments in the MPI calls log. Returns - // a pointer to the inserted MPI record object (containing the details - // of the call). - template - MpiRecord* record(fcb_t cb, MPI_Fncs type, - const T fPtr, const Targs... args) - { - MpiRecord *rec = new MpiRecord(cb, type, (void*)fPtr); - if (rec) { - rec->addArgs(args...); - // All collective calls are translated in MANA to the async calls Ibarrier/Ireduce/Ibcast. - // If MANA uses other async calls, we need other cases. - switch (type) { - case GENERATE_ENUM(Ibarrier): - { - MPI_Request req = rec->args(1); - if (req != MPI_REQUEST_NULL) - _recordsMap[req] = 0; - break; - } - case GENERATE_ENUM(Ireduce): - { - MPI_Request req = rec->args(7); - MPI_Comm comm = rec->args(6); - int rank; - MPI_Comm_rank(comm, &rank); - int root = rec->args(5); - // Save the sender's buffer value - // We don't need to save the receiver's buffer value - if (rank != root) { - void *sendbuf = rec->args(0); - int count = rec->args(2); - MPI_Datatype datatype = rec->args(3); - int size; - MPI_Type_size(datatype, &size); - void *newbuf = malloc(count * size); - memcpy(newbuf, sendbuf, count * size); - rec->setBuf(newbuf); - } - if (req != MPI_REQUEST_NULL) - _recordsMap[req] = 0; - break; - } - case GENERATE_ENUM(Ibcast): - { - MPI_Request req = rec->args(5); - MPI_Comm comm = rec->args(4); - int rank; - MPI_Comm_rank(comm, &rank); - int root = rec->args(3); - if (rank == root) { - void *buf = rec->args(0); - int count = rec->args(1); - MPI_Datatype type = rec->args(2); - int size; - MPI_Type_size(type, &size); - void *newbuf = malloc(count * size); - memcpy(newbuf, buf, count * size); - rec->setBuf(newbuf); - } - if (req != MPI_REQUEST_NULL) - _recordsMap[req] = 0; - break; - } - case GENERATE_ENUM(Type_hvector): - { - MPI_Datatype newtype = rec->args(4); - MPI_Datatype oldtype = rec->args(3); - datatype_create(newtype); - datatype_incRef(1, &oldtype); - break; - } - case GENERATE_ENUM(Type_create_struct): - { - MPI_Datatype newtype = rec->args(4); - int count = rec->args(0); - MPI_Datatype *oldtypes = rec->args(3); - datatype_create(newtype); - datatype_incRef(count, oldtypes); - break; - } - case GENERATE_ENUM(Type_indexed): - { - MPI_Datatype newtype = rec->args(4); - MPI_Datatype oldtype = rec->args(3); - datatype_create(newtype); - datatype_incRef(1, &oldtype); - break; - } - case GENERATE_ENUM(Type_commit): - // No need to increase ref count so Type_free can - // free the MPI_Type_ records that creates the new type - break; - case GENERATE_ENUM(Type_free): - { - MPI_Datatype type = rec->args(0); - datatype_free(type); - delete rec; - return NULL; - } - default: - // The 'default' cases include record types like - // comm_create, comm_group, group_incl, etc. - // Those known types only need to be recorded. So they don't - // have any case label to pre-process their record info - // before they are recorded. - break; - } - { - lock_t lock(_mutex); - _records.push_back(rec); - } - } - return rec; - } - - // Replays the MPI calls from the log. Returns MPI_SUCCESS on success. - int replay() - { - int rc = MPI_SUCCESS; - lock_t lock(_mutex); - _replayOn = true; - for (MpiRecord* rec : _records) { - MPI_Request req = MPI_REQUEST_NULL; - switch (rec->getType()) { - case GENERATE_ENUM(Ibarrier): - req = rec->args(1); - break; - case GENERATE_ENUM(Ireduce): - req = rec->args(7); - break; - case GENERATE_ENUM(Ibcast): - req = rec->args(5); - break; - default: - break; - } - if (req != MPI_REQUEST_NULL) { - auto iter = _recordsMap.find(req); - if (iter->second == 1) - rec->setComplete(true); - } - rc = rec->play(); - if (rc != MPI_SUCCESS) { - break; - } - } - _replayOn = false; - return rc; - } - - void reset() - { - lock_t lock(_mutex); - for (MpiRecord* i : _records) { - delete i; - } - _replayOn = false; - _records.clear(); - } - - void cleanComms(dmtcp::set &staleComms) - { - bool setChanged = false; - std::function isStaleComm = - [&](const MpiRecord *rec) { - switch (rec->getType()) { - case GENERATE_ENUM(Comm_split): - { - MPI_Comm c = rec->args(0); - MPI_Comm newcomm = rec->args(3); - if (staleComms.count(c) > 0) { - staleComms.erase(c); - staleComms.insert(newcomm); - setChanged = true; - return true; - } - return false; - } - case GENERATE_ENUM(Comm_split_type): - { - MPI_Comm c = rec->args(0); - MPI_Comm newcomm = rec->args(4); - if (staleComms.count(c) > 0) { - staleComms.erase(c); - staleComms.insert(newcomm); - setChanged = true; - return true; - } - return false; - } - case GENERATE_ENUM(Comm_create): - { - MPI_Comm c = rec->args(0); - MPI_Comm newcomm = rec->args(2); - if (staleComms.count(c) > 0) { - staleComms.erase(c); - staleComms.insert(newcomm); - setChanged = true; - return true; - } - return false; - } - case GENERATE_ENUM(Comm_dup): - { - MPI_Comm c = rec->args(0); - MPI_Comm newcomm = rec->args(1); - if (staleComms.count(c) > 0) { - staleComms.erase(c); - staleComms.insert(newcomm); - setChanged = true; - return true; - } - return false; - } - case GENERATE_ENUM(Comm_set_errhandler): - case GENERATE_ENUM(Attr_put): - case GENERATE_ENUM(Attr_delete): - { - MPI_Comm c = rec->args(0); - if (staleComms.count(c) > 0) { - staleComms.erase(c); - return true; - } - return false; - } - default: - return false; - } }; - do { - mpi_record_vector_iterator_t it = - remove_if(_records.begin(), _records.end(), isStaleComm); - _records.erase(it, _records.end()); - } while (setChanged); - } - - void clearGroupLogs(MPI_Group group) - { - lock_t lock(_mutex); - dmtcp::set staleComms; - std::function isValidGroup = - [group, &staleComms](const MpiRecord *rec) { - switch (rec->getType()) { - case GENERATE_ENUM(Group_incl): - { - MPI_Group g = rec->args(0); - return group == g; - } - case GENERATE_ENUM(Comm_group): - { - // MPI_Comm comm = rec->args(0); - MPI_Group g = rec->args(1); - return group == g; - } - case GENERATE_ENUM(Comm_create): - { - // MPI_Comm comm = rec->args(0); - MPI_Group g = rec->args(1); - MPI_Comm oldcomm = rec->args(2); - if (group == g) { - staleComms.insert(oldcomm); // save this - return true; - } - return false; - } - default: - return false; - } }; - mpi_record_vector_iterator_t it = - remove_if(_records.begin(), _records.end(), isValidGroup); - _records.erase(it, _records.end()); - cleanComms(staleComms); - } - - void clearCommLogs(MPI_Comm comm) - { - lock_t lock(_mutex); - dmtcp::set staleComms; - std::function isValidComm = - [comm, &staleComms](const MpiRecord *rec) { - switch (rec->getType()) { - case GENERATE_ENUM(Comm_split): - { - MPI_Comm c = rec->args(0); - MPI_Comm newcomm = rec->args(3); - if (c == comm) { - staleComms.insert(newcomm); - return true; - } - return false; - } - case GENERATE_ENUM(Comm_split_type): - { - MPI_Comm c = rec->args(0); - MPI_Comm newcomm = rec->args(4); - if (c == comm) { - staleComms.insert(newcomm); - return true; - } - return false; - } - case GENERATE_ENUM(Comm_create): - { - MPI_Comm c = rec->args(0); - MPI_Comm newcomm = rec->args(2); - if (c == comm) { - staleComms.insert(newcomm); - return true; - } - return false; - } - case GENERATE_ENUM(Comm_dup): - { - MPI_Comm c = rec->args(0); - MPI_Comm newcomm = rec->args(1); - if (c == comm) { - staleComms.insert(newcomm); - return true; - } - return false; - } - case GENERATE_ENUM(Comm_set_errhandler): - case GENERATE_ENUM(Attr_put): - case GENERATE_ENUM(Attr_delete): - { - MPI_Comm c = rec->args(0); - return staleComms.count(c) > 0; - } - default: - return false; - } }; - mpi_record_vector_iterator_t it = - remove_if(_records.begin(), _records.end(), isValidComm); - _records.erase(it, _records.end()); - cleanComms(staleComms); - } - - void removeRequestLog(MPI_Request request) - { - lock_t lock(_mutex); - auto iter = _recordsMap.find(request); - if (iter == _recordsMap.end()) { - return; - } else { - iter->second = 1; // finished - } - } - - // Returns true if we are currently replaying the MPI calls - bool isReplayOn() - { - // FIXME: This needs locking. But we can't do this here, otherwise it'll - // deadlock - return !_replayOn; - } - - void printRecords(bool print); - private: - // Pvt. constructor - MpiRecordReplay() - : _records(), - _replayOn(false), - _mutex() - { - } - - void datatype_create(MPI_Datatype datatype) - { - _datatypeMap[datatype] = 1; - } - - void datatype_incRef(int count, MPI_Datatype *datatypes) - { - MPI_Datatype type; - lock_t lock(_mutex); - if (count == 1) { - type = *datatypes; - if (_datatypeMap.find(type) != _datatypeMap.end()) { - _datatypeMap[type]++; - } - } else { - for (int i = 0; i < count; i++) { - type = datatypes[i]; - if (_datatypeMap.find(type) != _datatypeMap.end()) { - _datatypeMap[type]++; - } - } - } - } - - int datatype_decRef(MPI_Datatype datatype) { - if (_datatypeMap.find(datatype) == _datatypeMap.end()) { - return -1; - } else { - return --_datatypeMap[datatype]; - } - } - - void datatype_find_stale_types(MPI_Datatype type, - dmtcp::set &staleTypes) - { - std::function isStaleType = - [this, type, &staleTypes](const MpiRecord *rec) { - switch (rec->getType()) { - case GENERATE_ENUM(Type_hvector): - { - MPI_Datatype newtype = rec->args(4); - if (newtype == type) { - // The oldtype could be stale if its ref count drops to zero - if (this->datatype_decRef(type) == 0) { - MPI_Datatype oldtype = rec->args(3); - // Skip pre-defined MPI constants like MPI_INT, etc. - if (_datatypeMap.find(oldtype) != _datatypeMap.end()) { - datatype_find_stale_types(oldtype, staleTypes); - } - } - return true; - } - return false; - } - case GENERATE_ENUM(Type_create_struct): - { - MPI_Datatype newtype = rec->args(4); - if (newtype == type) { - if (this->datatype_decRef(type) == 0) { - int count = rec->args(0); - MPI_Datatype *oldtypes = rec->args(3); - for (int i = 0; i < count; i++) { - if (_datatypeMap.find(oldtypes[i]) != _datatypeMap.end()) { - datatype_find_stale_types(oldtypes[i], staleTypes); - } - } - return true; - } - } - return false; - } - case GENERATE_ENUM(Type_indexed): - { - MPI_Datatype newtype = rec->args(4); - if (newtype == type) { - // The oldtype could be stale if its ref count drops to zero - if (this->datatype_decRef(type) == 0) { - MPI_Datatype oldtype = rec->args(3); - // Skip pre-defined MPI constants like MPI_INT, etc. - if (_datatypeMap.find(oldtype) != _datatypeMap.end()) { - datatype_find_stale_types(oldtype, staleTypes); - } - } - return true; - } - return false; - } - case GENERATE_ENUM(Type_commit): - { - // Skip commit because the stale type is collected by - // the creator of the type - return false; - } - default: - return false; - } - }; - - for (auto it = _records.rbegin(); it != _records.rend(); it++) { - if (isStaleType(*it)) { - staleTypes.insert(type); - break; - } - } - } - - void datatype_free(MPI_Datatype datatype) - { - dmtcp::set staleTypes; - bool creator = false; - lock_t lock(_mutex); - datatype_find_stale_types(datatype, staleTypes); - std::function isEqualType = - [&staleTypes, &creator](const MpiRecord *rec) { - switch (rec->getType()) { - case GENERATE_ENUM(Type_hvector): - { - MPI_Datatype newtype = rec->args(4); - if (staleTypes.count(newtype) > 0) { - creator = true; - return true; - } - return false; - } - case GENERATE_ENUM(Type_create_struct): - { - MPI_Datatype newtype = rec->args(4); - if (staleTypes.count(newtype) > 0) { - creator = true; - return true; - } - return false; - } - case GENERATE_ENUM(Type_indexed): - { - MPI_Datatype newtype = rec->args(4); - if (staleTypes.count(newtype) > 0) { - creator = true; - return true; - } - return false; - } - case GENERATE_ENUM(Type_commit): - { - MPI_Datatype newtype = rec->args(0); - return staleTypes.count(newtype) > 0; - } - case GENERATE_ENUM(Type_free): - { - MPI_Datatype type = rec->args(0); - return staleTypes.count(type) > 0; - } - default: - return false; - } - }; - // Traverse the log in the reverse order as the latest MPI record - // is added at the end. - auto it = _records.end(); - while (it != _records.begin()) { - it--; - MpiRecord *rec = *it; - if (isEqualType(rec)) { - it = _records.erase(it); - delete rec; - if (creator) break; - } - } - for (MPI_Datatype type : staleTypes) { - _datatypeMap.erase(type); - } - } - - // Virtual Ids Table - dmtcp::vector _records; - std::unordered_map _recordsMap; //map - std::unordered_map _datatypeMap; //map - // True on restart, false otherwise - bool _replayOn; - // Lock on list - mutex_t _mutex; - }; // class MpiRecordReplay - - - // Restores the MPI communicators and returns MPI_SUCCESS on success - extern int restoreComms(MpiRecord& ); - - // Restores the MPI groups and returns MPI_SUCCESS on success - extern int restoreGroups(MpiRecord& ); - - // Restores the MPI types and returns MPI_SUCCESS on success - extern int restoreTypes(MpiRecord& ); - - // Restores the MPI cartesian communicators and returns MPI_SUCCESS on success - extern int restoreCarts(MpiRecord& ); - - // Restores the MPI ops and returns MPI_SUCCESS on success - extern int restoreOps(MpiRecord& ); - -}; // namespace dmtcp_mpi - -// Restores the MPI state by recreating the communicator, groups, types, etc. -// post restart -extern void restoreMpiLogState(); -#ifdef SINGLE_CART_REORDER -extern void setCartesianCommunicator(void *getCartesianCommunicatorFptr); -#endif -#endif // ifndef MPI_RECORD_REPLAY_H diff --git a/mpi-proxy-split/seq_num.cpp b/mpi-proxy-split/seq_num.cpp index b66e3e597..218e2f9ac 100644 --- a/mpi-proxy-split/seq_num.cpp +++ b/mpi-proxy-split/seq_num.cpp @@ -10,9 +10,7 @@ #include "seq_num.h" #include "mpi_nextfunc.h" #include "virtual-ids.h" -#include "record-replay.h" -using namespace dmtcp_mpi; using dmtcp::kvdb::KVDBRequest; using dmtcp::kvdb::KVDBResponse; @@ -112,9 +110,10 @@ int check_seq_nums(bool exclusive) { return target_reached; } +// FIXME: This is barely used. Remove? int twoPhaseCommit(MPI_Comm comm, std::functiondoRealCollectiveComm) { - if (!MPI_LOGGING() || comm == MPI_COMM_NULL) { + if (mana_state == RESTART_REPLAY || comm == MPI_COMM_NULL) { return doRealCollectiveComm(); // lambda function: already captured args } diff --git a/mpi-proxy-split/unit-test/Makefile b/mpi-proxy-split/unit-test/Makefile index 77dbf4f8f..73113fc7b 100644 --- a/mpi-proxy-split/unit-test/Makefile +++ b/mpi-proxy-split/unit-test/Makefile @@ -16,9 +16,6 @@ MPICC = mpicc MPICXX = mpic++ endif -TESTS = record-replay-comm-test \ - record-replay-group-test \ - record-replay-types-test TEST_OBJS=$(addsuffix .o, ${TESTS}) TEST_BINS=$(addsuffix .exe, ${TESTS}) @@ -51,14 +48,12 @@ DRAIN_TEST_OBJS = drain-send-recv-test.o ../p2p_drain_send_recv.cpp \ # default: ${TEST_BINS} default: ; -../record-replay.o ../drain_send_recv_packets.o: @make -C .. # NOTE: The objects files split_process.o and procmapsutils.o # below are required only when MANA is configured with SINGLE_CART_REORDER # C/C++ flag. One should remove these two obj files when we decide to remove # the SINGLE_CART_REORDER macro implementation entirely from MANA. -%.exe: %.o ../record-replay.o ../split_process.o ../lower-half/procmapsutils.o ${MPICXX} -fPIC -g3 -O0 -o $@ $^ ${TEST_LD_FLAGS} drain-send-recv-test.exe: ${DRAIN_TEST_OBJS} @@ -71,7 +66,6 @@ drain-send-recv-test.exe: ${DRAIN_TEST_OBJS} ${MPICXX} ${CXXFLAGS} -g3 -O0 -c -o $@ $< gdb-%: ${TEST_BINS} - gdb --args ./record-replay-$*-test.exe # check: ${TEST_BINS} # @for x in $^; do ${MPIRUN} -n 1 ./$$x; done diff --git a/mpi-proxy-split/unit-test/record-replay-cart-test.cpp b/mpi-proxy-split/unit-test/record-replay-cart-test.cpp index 6844d1d35..ff189e13b 100644 --- a/mpi-proxy-split/unit-test/record-replay-cart-test.cpp +++ b/mpi-proxy-split/unit-test/record-replay-cart-test.cpp @@ -3,7 +3,6 @@ #include #include -#include "record-replay.h" #include "virtual-ids.h" #undef DMTCP_PLUGIN_ENABLE_CKPT @@ -110,12 +109,8 @@ int main(int argc, char **argv) { // FIXME: This unit test has been disabled because MPI cartesian communicator - // is created at the restart step instead of the record-replay step, which // happens after the restart step. The new cartesian communicator created at - // the restart step is then passed to record-replay separately via - // setCartesianCommunicator(). Therefore, the "record-replay-cart" unit test // will result in seg fault error because variable (in - // record-replay.cpp) has not been set in this unit test. // initializeJalib(); diff --git a/mpi-proxy-split/unit-test/record-replay-comm-test.cpp b/mpi-proxy-split/unit-test/record-replay-comm-test.cpp index 33c1db4cd..5c941f6c1 100644 --- a/mpi-proxy-split/unit-test/record-replay-comm-test.cpp +++ b/mpi-proxy-split/unit-test/record-replay-comm-test.cpp @@ -2,7 +2,6 @@ #include -#include "record-replay.h" #include "virtual-ids.h" #undef DMTCP_PLUGIN_ENABLE_CKPT diff --git a/mpi-proxy-split/unit-test/record-replay-group-test.cpp b/mpi-proxy-split/unit-test/record-replay-group-test.cpp index a0dd59367..b0af16d6c 100644 --- a/mpi-proxy-split/unit-test/record-replay-group-test.cpp +++ b/mpi-proxy-split/unit-test/record-replay-group-test.cpp @@ -2,7 +2,6 @@ #include -#include "record-replay.h" #include "virtual-ids.h" #undef DMTCP_PLUGIN_ENABLE_CKPT diff --git a/mpi-proxy-split/unit-test/record-replay-types-test.cpp b/mpi-proxy-split/unit-test/record-replay-types-test.cpp index 5c92037c2..3251d238e 100644 --- a/mpi-proxy-split/unit-test/record-replay-types-test.cpp +++ b/mpi-proxy-split/unit-test/record-replay-types-test.cpp @@ -2,7 +2,6 @@ #include -#include "record-replay.h" #include "virtual-ids.h" #undef DMTCP_PLUGIN_ENABLE_CKPT From 855064468cf624bab0901e8e121dded742af8c3c Mon Sep 17 00:00:00 2001 From: Leonid Belyaev Date: Mon, 18 Sep 2023 20:51:18 -0400 Subject: [PATCH 12/49] Added fortran conditional macros, for ExaMPI. But no makefile yet. --- .../mpi-wrappers/mpi_collective_wrappers.cpp | 6 ++++++ .../mpi-wrappers/mpi_p2p_wrappers.cpp | 12 ++++++++++++ .../mpi-wrappers/mpi_request_wrappers.cpp | 17 +++++++++++++++++ 3 files changed, 35 insertions(+) diff --git a/mpi-proxy-split/mpi-wrappers/mpi_collective_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_collective_wrappers.cpp index c0fc97fc8..e7c59b8d3 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_collective_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_collective_wrappers.cpp @@ -255,7 +255,11 @@ USER_DEFINED_WRAPPER(int, Allreduce, commit_begin(comm, passthrough); int retval; DMTCP_PLUGIN_DISABLE_CKPT(); + + #ifndef MANA_EXAMPI get_fortran_constants(); + #endif + if (use_allreduce_reproducible) retval = MPI_Allreduce_reproducible(sendbuf, recvbuf, count, datatype, op, comm, 0); @@ -264,9 +268,11 @@ USER_DEFINED_WRAPPER(int, Allreduce, MPI_Datatype realType = VIRTUAL_TO_REAL_TYPE(datatype); MPI_Op realOp = VIRTUAL_TO_REAL_OP(op); // FIXME: Ideally, check FORTRAN_MPI_IN_PLACE only in the Fortran wrapper. + #ifndef MANA_EXAMPI if (sendbuf == FORTRAN_MPI_IN_PLACE) { sendbuf = MPI_IN_PLACE; } + #endif JUMP_TO_LOWER_HALF(lh_info.fsaddr); retval = NEXT_FUNC(Allreduce)(sendbuf, recvbuf, count, realType, realOp, realComm); diff --git a/mpi-proxy-split/mpi-wrappers/mpi_p2p_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_p2p_wrappers.cpp index 770c9b4ff..1da55ba67 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_p2p_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_p2p_wrappers.cpp @@ -250,7 +250,11 @@ USER_DEFINED_WRAPPER(int, Sendrecv, (const void *) sendbuf, (int) sendcount, RETURN_TO_UPPER_HALF(); DMTCP_PLUGIN_ENABLE_CKPT(); #else + + #ifndef MANA_EXAMPI get_fortran_constants(); + #endif + MPI_Request reqs[2]; MPI_Status sts[2]; // FIXME: The send and receive need to be atomic @@ -267,9 +271,17 @@ USER_DEFINED_WRAPPER(int, Sendrecv, (const void *) sendbuf, (int) sendcount, retval = MPI_Waitall(2, reqs, sts); // Set status only when the status is neither MPI_STATUS_IGNORE nor // FORTRAN_MPI_STATUS_IGNORE + + #ifdef MANA_EXAMPI + if (status != MPI_STATUS_IGNORE) { + *status = sts[1]; + } + #else if (status != MPI_STATUS_IGNORE && status != FORTRAN_MPI_STATUS_IGNORE) { *status = sts[1]; } + #endif + if (retval == MPI_SUCCESS) { // updateLocalRecvs(); } diff --git a/mpi-proxy-split/mpi-wrappers/mpi_request_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_request_wrappers.cpp index 3c3d3ed25..85fb7c3c8 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_request_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_request_wrappers.cpp @@ -217,12 +217,17 @@ USER_DEFINED_WRAPPER(int, Waitall, (int) count, MPI_Request *local_array_of_requests = array_of_requests; MPI_Status *local_array_of_statuses = array_of_statuses; + #ifndef MANA_EXAMPI get_fortran_constants(); + #endif + for (int i = 0; i < count; i++) { /* FIXME: Is there a chance it gets a valid C address, which we shouldn't * ignore? Ideally, we should only check FORTRAN_MPI_STATUSES_IGNORE * in the Fortran wrapper. */ + + #ifndef MANA_EXAMPI if (local_array_of_statuses != MPI_STATUSES_IGNORE && local_array_of_statuses != FORTRAN_MPI_STATUSES_IGNORE) { retval = MPI_Wait(&local_array_of_requests[i], @@ -230,9 +235,21 @@ USER_DEFINED_WRAPPER(int, Waitall, (int) count, } else { retval = MPI_Wait(&local_array_of_requests[i], MPI_STATUS_IGNORE); } + #else + if (local_array_of_statuses != MPI_STATUSES_IGNORE) { + retval = MPI_Wait(&local_array_of_requests[i], + &local_array_of_statuses[i]); + } else { + retval = MPI_Wait(&local_array_of_requests[i], MPI_STATUS_IGNORE); + } + #endif + if (retval != MPI_SUCCESS) { break; } + + + } #endif return retval; From 9aae6fe7dd8cf13249cf48fb933ba75cdfa8f0eb Mon Sep 17 00:00:00 2001 From: Leonid Belyaev Date: Mon, 18 Sep 2023 20:51:38 -0400 Subject: [PATCH 13/49] Simplify Type_vector wrapper --- .../mpi-wrappers/mpi_type_wrappers.cpp | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/mpi-proxy-split/mpi-wrappers/mpi_type_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_type_wrappers.cpp index 782bfe24e..6e8ddacb1 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_type_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_type_wrappers.cpp @@ -127,15 +127,23 @@ USER_DEFINED_WRAPPER(int, Type_vector, (int) count, (int) blocklength, (int) stride, (MPI_Datatype) oldtype, (MPI_Datatype*) newtype) { - int size; - int retval = MPI_Type_size(oldtype, &size); - if(retval != MPI_SUCCESS) { - return retval; - } + int retval; + DMTCP_PLUGIN_DISABLE_CKPT(); - return MPI_Type_hvector(count, blocklength, stride*size, oldtype, newtype); + MPI_Datatype realType = VIRTUAL_TO_REAL_TYPE(oldtype); + JUMP_TO_LOWER_HALF(lh_info.fsaddr); + retval = NEXT_FUNC(Type_vector)(count, blocklength, + stride, realType, newtype); + RETURN_TO_UPPER_HALF(); + if (retval == MPI_SUCCESS) { + MPI_Datatype virtType = ADD_NEW_TYPE(*newtype); + *newtype = virtType; + } + DMTCP_PLUGIN_ENABLE_CKPT(); + return retval; } + // int MPI_Type_create_struct(int count, // const int array_of_blocklengths[], // const MPI_Aint array_of_displacements[], From bd5540940553908b837e16ae87ef4f2c61fbd61d Mon Sep 17 00:00:00 2001 From: Leonid Belyaev Date: Mon, 18 Sep 2023 22:20:19 -0400 Subject: [PATCH 14/49] Added MPI-naive lh_constants_map --- mpi-proxy-split/lower-half/libproxy.c | 22 ++++++++ mpi-proxy-split/lower-half/libproxy.h | 58 +++++++++++++++++++++ mpi-proxy-split/lower-half/lower_half_api.h | 16 ++++++ mpi-proxy-split/split_process.cpp | 2 + mpi-proxy-split/virtual-ids.cpp | 13 +++++ mpi-proxy-split/virtual-ids.h | 10 +++- restart_plugin/mtcp_restart_plugin.h | 2 + restart_plugin/mtcp_split_process.c | 4 +- 8 files changed, 124 insertions(+), 3 deletions(-) diff --git a/mpi-proxy-split/lower-half/libproxy.c b/mpi-proxy-split/lower-half/libproxy.c index 47ff41136..e106d33e1 100644 --- a/mpi-proxy-split/lower-half/libproxy.c +++ b/mpi-proxy-split/lower-half/libproxy.c @@ -68,8 +68,29 @@ static void* MPI_Fnc_Ptrs[] = { NULL, }; +#define INIT_CONST_MAP(const) mpi_constants[LH_##const] = const; + +static int mpi_constants_initialized = 0; +static void* mpi_constants[LH_MPI_Constant_Invalid + 1] // Local functions +void* +get_lh_mpi_constant(enum MPI_Constants constant) +{ + if (!mpi_constants_initialized) { + mpi_constants[LH_MPI_Constant_NULL] = NULL; + FOREACH_CONSTANT(INIT_CONST_MAP) + mpi_constants[LH_MPI_ERRORS_RETURN] = 0; + mpi_constants[LH_MPI_Constant_Invalid] = NULL; + mpi_constants_initialized = 1; + } + if (constant < LH_MPI_Constant_NULL || + constant > LH_MPI_Constant_Invalid) { + return NULL; + } + return mpi_constants[constant]; +} + LhCoreRegions_t* getLhRegionsList(int *num) { @@ -346,6 +367,7 @@ void first_constructor() lh_info.g_appContext = (void*)&g_appContext; lh_info.lh_dlsym = (void*)&mydlsym; lh_info.getRankFptr = (void*)&getRank; + lh_info.lh_mpi_constants = (void*)&get_lh_mpi_constant; #ifdef SINGLE_CART_REORDER lh_info.getCoordinatesFptr = (void*)&getCoordinates; diff --git a/mpi-proxy-split/lower-half/libproxy.h b/mpi-proxy-split/lower-half/libproxy.h index a45c41053..9f205e206 100644 --- a/mpi-proxy-split/lower-half/libproxy.h +++ b/mpi-proxy-split/lower-half/libproxy.h @@ -405,4 +405,62 @@ do { \ MACRO(MANA_Internal), \ MACRO(Aint_diff), +#define FOREACH_CONSTANT(MACRO) \ + MACRO(GROUP_NULL) \ + MACRO(COMM_NULL) \ + MACRO(REQUEST_NULL) \ + MACRO(OP_NULL) \ + MACRO(INFO_NULL) \ + MACRO(COMM_WORLD) \ + MACRO(COMM_SELF) \ + MACRO(GROUP_EMPTY) \ + MACRO(MAX) \ + MACRO(MIN) \ + MACRO(SUM) \ + MACRO(PROD) \ + MACRO(BAND) \ + MACRO(LOR) \ + MACRO(BOR) \ + MACRO(MAXLOC) \ + MACRO(MINLOC) \ + MACRO(DATATYPE_NULL) \ + MACRO(BYTE) \ + MACRO(PACKED) \ + MACRO(CHAR) \ + MACRO(SHORT) \ + MACRO(INT) \ + MACRO(LONG) \ + MACRO(FLOAT) \ + MACRO(DOUBLE) \ + MACRO(LONG_DOUBLE) \ + MACRO(UNSIGNED_CHAR) \ + MACRO(SIGNED_CHAR) \ + MACRO(UNSIGNED_SHORT) \ + MACRO(UNSIGNED_LONG) \ + MACRO(UNSIGNED) \ + MACRO(FLOAT_INT) \ + MACRO(DOUBLE_INT) \ + MACRO(LONG_DOUBLE_INT) \ + MACRO(LONG_INT) \ + MACRO(SHORT_INT) \ + MACRO(2INT) \ + MACRO(WCHAR) \ + MACRO(LONG_LONG_INT) \ + MACRO(LONG_LONG) \ + MACRO(UNSIGNED_LONG_LONG) \ + MACRO(INT8_T) \ + MACRO(UINT8_T) \ + MACRO(INT16_T) \ + MACRO(UINT16_T) \ + MACRO(INT32_T) \ + MACRO(UINT32_T) \ + MACRO(INT64_T) \ + MACRO(UINT64_T) \ + MACRO(AINT) \ + MACRO(CXX_BOOL) \ + MACRO(CXX_FLOAT_COMPLEX) \ + MACRO(CXX_DOUBLE_COMPLEX) \ + MACRO(CXX_LONG_DOUBLE_COMPLEX) \ + MACRO(ERRORS_RETURN) \ + #endif // define _LIBPROXY_H diff --git a/mpi-proxy-split/lower-half/lower_half_api.h b/mpi-proxy-split/lower-half/lower_half_api.h index af030eb97..240614dff 100644 --- a/mpi-proxy-split/lower-half/lower_half_api.h +++ b/mpi-proxy-split/lower-half/lower_half_api.h @@ -27,6 +27,7 @@ #include "libproxy.h" #define GENERATE_ENUM(ENUM) MPI_Fnc_##ENUM +#define GENERATE_CONSTANT_ENUM(ENUM) LH_MPI_##ENUM, #define GENERATE_FNC_PTR(FNC) &MPI_##FNC #define GENERATE_FNC_STRING(FNC) "MPI_" #FNC #define PAGE_SIZE 0x1000 @@ -93,6 +94,9 @@ typedef struct _LowerHalfInfo void *g_appContext; // Pointer to ucontext_t of upper half application (defined in the lower half) void *lh_dlsym; // Pointer to mydlsym() function in the lower half void *getRankFptr; // Pointer to getRank() function in the lower half + void *lh_mpi_constants; // Open MPI can save its MPI constants at a different + // at a different address each time. Copy to uh. + #ifdef SINGLE_CART_REORDER void *getCoordinatesFptr; // Pointer to getCoordinates() function in the lower half void *getCartesianCommunicatorFptr; // Pointer to getCartesianCommunicator() function in the lower half @@ -113,6 +117,13 @@ enum MPI_Fncs { MPI_Fnc_Invalid, }; +enum MPI_Constants { + LH_MPI_Constant_NULL, + FOREACH_CONSTANT(GENERATE_CONSTANT_ENUM) + // LH_MPI_ERRORS_RETURN, + LH_MPI_Constant_Invalid, +}; + __attribute__ ((unused)) static const char *MPI_Fnc_strings[] = { "MPI_Fnc_NULL", @@ -134,6 +145,7 @@ typedef int (*libcFptr_t) (int (*main) (int, char **, char ** MAIN_AUXVEC_DECL), void *); typedef void* (*proxyDlsym_t)(enum MPI_Fncs fnc); +typedef void* (*lh_constant_t)(enum MPI_Constants constant); typedef void* (*updateEnviron_t)(char **environ); typedef void (*resetMmappedList_t)(); typedef MmapInfo_t* (*getMmappedList_t)(int **num); @@ -148,6 +160,7 @@ extern LowerHalfInfo_t lh_info; // the transient lh_proxy process in DMTCP_EVENT_INIT. // initializeLowerHalf() will initialize this to: (proxyDlsym_t)lh_info.lh_dlsym extern proxyDlsym_t pdlsym; +extern lh_constant_t lh_mpi_constants; extern LhCoreRegions_t lh_regions_list[MAX_LH_REGIONS]; // API @@ -156,6 +169,9 @@ extern LhCoreRegions_t lh_regions_list[MAX_LH_REGIONS]; // the given enum value extern void *mydlsym(enum MPI_Fncs fnc); +// Gets the value of an MPI constant in the lower half, for these can differ. +extern void *get_lh_mpi_constant(enum MPI_Constants constant); + // Initializes the MPI library in the lower half (by calling MPI_Init()) and // returns the MPI rank of the current process extern int getRank(); diff --git a/mpi-proxy-split/split_process.cpp b/mpi-proxy-split/split_process.cpp index d78d517b7..f0b8b8986 100644 --- a/mpi-proxy-split/split_process.cpp +++ b/mpi-proxy-split/split_process.cpp @@ -58,6 +58,7 @@ static unsigned long origPhnum; static unsigned long origPhdr; LowerHalfInfo_t lh_info; proxyDlsym_t pdlsym; // initialized to (proxyDlsym_t)lh_info.lh_dlsym +lh_constant_t lh_mpi_constants; LhCoreRegions_t lh_regions_list[MAX_LH_REGIONS] = {0}; static unsigned long getStackPtr(); @@ -509,6 +510,7 @@ initializeLowerHalf() // Save the pointer to mydlsym() function in the lower half. This will be // used in all the mpi-wrappers. pdlsym = (proxyDlsym_t)lh_info.lh_dlsym; + lh_mpi_constants = (lh_constant_t)lh_info.lh_mpi_constants; // Copied from glibc source ElfW(auxv_t) *auxvec; diff --git a/mpi-proxy-split/virtual-ids.cpp b/mpi-proxy-split/virtual-ids.cpp index 2b6804596..afd8a5b91 100644 --- a/mpi-proxy-split/virtual-ids.cpp +++ b/mpi-proxy-split/virtual-ids.cpp @@ -55,6 +55,16 @@ std::map idDescriptorTable; // int ggid -> ggid_desc_t*, which contains CVC information. std::map ggidDescriptorTable; +// For best compatibility, we determine MPI constants at runtime. +std::map lh_constants_map; + +#define INIT_LH_CONST_MAP(const) lh_constants_map[MPI_##const] = REAL_CONSTANT(const); + +void init_lh_constants_map() { + FOREACH_CONSTANT(INIT_LH_CONST_MAP) + lh_constants_map[MPI_ERRORS_RETURN] = 0; +} + // dead-simple vid generation mechanism, add one to get new ids. int base = 1; int nextvId = base; @@ -597,3 +607,6 @@ void reconstruct_with_descriptors() { } destroy_g_world_group(); } + + + diff --git a/mpi-proxy-split/virtual-ids.h b/mpi-proxy-split/virtual-ids.h index 9123f5560..5d2ad4348 100644 --- a/mpi-proxy-split/virtual-ids.h +++ b/mpi-proxy-split/virtual-ids.h @@ -8,6 +8,7 @@ #include "jconvert.h" #include "split_process.h" #include "dmtcp.h" +#include "lower_half_api.h" #define CONCAT(a,b) a ## b @@ -42,6 +43,10 @@ }) #endif // ifndef NEXT_FUNC +#ifndef REAL_CONSTANT +# define REAL_CONSTANT(name) *(__typeof__(MPI_##name)*)lh_mpi_constants(LH_MPI_##name) +#endif // ifndef REAL_CONSTANT + #define DESC_TO_VIRTUAL(desc, null, real_type) \ ({ \ @@ -55,7 +60,7 @@ #define VIRTUAL_TO_REAL(id, null, real_id_type, desc_type) \ ({ \ desc_type* _VTR_tmp = VIRTUAL_TO_DESC(id, null, desc_type); \ - real_id_type _VTR_id = (_VTR_tmp == NULL) ? id : _VTR_tmp->real_id; \ + real_id_type _VTR_id = (_VTR_tmp == NULL) ? (real_id_type)lh_constants_map[id] : _VTR_tmp->real_id; \ _VTR_id; \ }) @@ -326,6 +331,7 @@ union id_desc_t { extern std::map idDescriptorTable; extern std::map ggidDescriptorTable; +extern std::map lh_constants_map; extern int base; extern int nextvId; extern MPI_Group g_world_group; @@ -376,4 +382,6 @@ void reconstruct_with_descriptors(); void destroy_g_world_group(); void write_g_world_group(); +void init_lh_constants_map(); + #endif // ifndef MPI_VIRTUAL_IDS_H diff --git a/restart_plugin/mtcp_restart_plugin.h b/restart_plugin/mtcp_restart_plugin.h index 6c3a8da1b..c57e1af34 100644 --- a/restart_plugin/mtcp_restart_plugin.h +++ b/restart_plugin/mtcp_restart_plugin.h @@ -48,6 +48,8 @@ typedef struct LowerHalfInfo void *g_appContext; void *lh_dlsym; void *getRankFptr; + void *lh_mpi_constants; + #ifdef SINGLE_CART_REORDER void *getCoordinatesFptr; void *getCartesianCommunicatorFptr; diff --git a/restart_plugin/mtcp_split_process.c b/restart_plugin/mtcp_split_process.c index 8751ed496..947eb21ac 100644 --- a/restart_plugin/mtcp_split_process.c +++ b/restart_plugin/mtcp_split_process.c @@ -371,8 +371,8 @@ initializeLowerHalf(RestoreInfo *rinfo) auxvec = (ElfW(auxv_t) *) evp; } // update vDSO linkmap entry to the temporary address - updateVdsoLinkmapEntry(rinfo->currentVdsoStart, - rinfo->pluginInfo.vdsoLdAddrInLinkMap); + // updateVdsoLinkmapEntry(rinfo->currentVdsoStart, + // rinfo->pluginInfo.vdsoLdAddrInLinkMap); JUMP_TO_LOWER_HALF(rinfo->pluginInfo.fsaddr); (*resetMmaps)(); // Set the auxiliary vector to correspond to the values of the lower half From 3275bc7713aa36d339a33fcbce30d6f1cc7562f6 Mon Sep 17 00:00:00 2001 From: Leonid Belyaev Date: Mon, 18 Sep 2023 22:32:58 -0400 Subject: [PATCH 15/49] Added usage of REAL_CONSTANT as required --- .../mpi-wrappers/mpi_request_wrappers.cpp | 32 +++++++++---------- mpi-proxy-split/mpi-wrappers/mpi_wrappers.cpp | 8 ++--- mpi-proxy-split/p2p_drain_send_recv.cpp | 2 +- mpi-proxy-split/seq_num.cpp | 6 ++-- 4 files changed, 24 insertions(+), 24 deletions(-) diff --git a/mpi-proxy-split/mpi-wrappers/mpi_request_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_request_wrappers.cpp index 85fb7c3c8..17e1440cd 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_request_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_request_wrappers.cpp @@ -61,9 +61,9 @@ USER_DEFINED_WRAPPER(int, Test, (MPI_Request*) request, (int*) flag, (MPI_Status*) status) { int retval; - if (*request == MPI_REQUEST_NULL) { + if (*request == REAL_CONSTANT(REQUEST_NULL)) { // *request might be in read-only memory. So we can't overwrite it with - // MPI_REQUEST_NULL later. + // REAL_CONSTANT(REQUEST_NULL) later. *flag = true; return MPI_SUCCESS; } @@ -77,10 +77,10 @@ USER_DEFINED_WRAPPER(int, Test, (MPI_Request*) request, } MPI_Request realRequest; realRequest = VIRTUAL_TO_REAL_REQUEST(*request); - if (*request != MPI_REQUEST_NULL && realRequest == MPI_REQUEST_NULL) { + if (*request != REAL_CONSTANT(REQUEST_NULL) && realRequest == REAL_CONSTANT(REQUEST_NULL)) { *flag = 1; REMOVE_OLD_REQUEST(*request); - *request = MPI_REQUEST_NULL; + *request = REAL_CONSTANT(REQUEST_NULL); DMTCP_PLUGIN_ENABLE_CKPT(); // FIXME: We should also fill in the status return MPI_SUCCESS; @@ -90,7 +90,7 @@ USER_DEFINED_WRAPPER(int, Test, (MPI_Request*) request, // Updating global counter of recv bytes // FIXME: This if statement should be merged into // clearPendingRequestFromLog() - if (*flag && *request != MPI_REQUEST_NULL + if (*flag && *request != REAL_CONSTANT(REQUEST_NULL) && g_nonblocking_calls.find(*request) != g_nonblocking_calls.end() && g_nonblocking_calls[*request]->type == IRECV_REQUEST) { int count = 0; @@ -111,7 +111,7 @@ USER_DEFINED_WRAPPER(int, Test, (MPI_Request*) request, if (retval == MPI_SUCCESS && *flag && mana_state != RESTART_REPLAY) { clearPendingRequestFromLog(*request); REMOVE_OLD_REQUEST(*request); - *request = MPI_REQUEST_NULL; + *request = REAL_CONSTANT(REQUEST_NULL); } DMTCP_PLUGIN_ENABLE_CKPT(); return retval; @@ -179,7 +179,7 @@ USER_DEFINED_WRAPPER(int, Testany, (int) count, *local_flag = 1; *local_index = MPI_UNDEFINED; for (int i = 0; i < local_count; i++) { - if (local_array_of_requests[i] == MPI_REQUEST_NULL) { + if (local_array_of_requests[i] == REAL_CONSTANT(REQUEST_NULL)) { continue; } retval = MPI_Test(&local_array_of_requests[i], local_flag, local_status); @@ -271,11 +271,11 @@ USER_DEFINED_WRAPPER(int, Waitany, (int) count, *local_index = MPI_UNDEFINED; int was_null[count] = {0}; for (int i = 0; i < count; i++) { - was_null[i] = local_array_of_requests[i] == MPI_REQUEST_NULL ? 1 : 0; + was_null[i] = local_array_of_requests[i] == REAL_CONSTANT(REQUEST_NULL) ? 1 : 0; } while (1) { for (int i = 0; i < count; i++) { - if (local_array_of_requests[i] == MPI_REQUEST_NULL) { + if (local_array_of_requests[i] == REAL_CONSTANT(REQUEST_NULL)) { if (was_null[i]) { continue; } else { @@ -293,7 +293,7 @@ USER_DEFINED_WRAPPER(int, Waitany, (int) count, } if (flag) { MPI_Request *request = &local_array_of_requests[i]; - if (*request != MPI_REQUEST_NULL + if (*request != REAL_CONSTANT(REQUEST_NULL) && g_nonblocking_calls.find(*request) != g_nonblocking_calls.end() && g_nonblocking_calls[*request]->type == IRECV_REQUEST) { int count = 0; @@ -304,7 +304,7 @@ USER_DEFINED_WRAPPER(int, Waitany, (int) count, MPI_Comm comm = g_nonblocking_calls[*request]->comm; int worldRank = localRankToGlobalRank(local_status->MPI_SOURCE, comm); g_recvBytesByRank[worldRank] += count * size; - } else if (*request == MPI_REQUEST_NULL) { + } else if (*request == REAL_CONSTANT(REQUEST_NULL)) { if (!was_null[i]) { *local_index = i; return retval; @@ -314,7 +314,7 @@ USER_DEFINED_WRAPPER(int, Waitany, (int) count, if (mana_state != RESTART_REPLAY) { clearPendingRequestFromLog(local_array_of_requests[i]); REMOVE_OLD_REQUEST(local_array_of_requests[i]); - local_array_of_requests[i] = MPI_REQUEST_NULL; + local_array_of_requests[i] = REAL_CONSTANT(REQUEST_NULL); } *local_index = i; @@ -334,9 +334,9 @@ USER_DEFINED_WRAPPER(int, Waitany, (int) count, USER_DEFINED_WRAPPER(int, Wait, (MPI_Request*) request, (MPI_Status*) status) { int retval; - if (*request == MPI_REQUEST_NULL) { + if (*request == REAL_CONSTANT(REQUEST_NULL)) { // *request might be in read-only memory. So we can't overwrite it with - // MPI_REQUEST_NULL later. + // REAL_CONSTANT(REQUEST_NULL) later. return MPI_SUCCESS; } int flag = 0; @@ -358,7 +358,7 @@ USER_DEFINED_WRAPPER(int, Wait, (MPI_Request*) request, (MPI_Status*) status) // Updating global counter of recv bytes // FIXME: This if statement should be merged into // clearPendingRequestFromLog() - if (flag && *request != MPI_REQUEST_NULL + if (flag && *request != REAL_CONSTANT(REQUEST_NULL) && g_nonblocking_calls.find(*request) != g_nonblocking_calls.end() && g_nonblocking_calls[*request]->type == IRECV_REQUEST) { int count = 0; @@ -381,7 +381,7 @@ USER_DEFINED_WRAPPER(int, Wait, (MPI_Request*) request, (MPI_Status*) status) if (flag && mana_state != RESTART_REPLAY) { clearPendingRequestFromLog(*request); // Remove from g_nonblocking_calls REMOVE_OLD_REQUEST(*request); // Remove from virtual id - *request = MPI_REQUEST_NULL; + *request = REAL_CONSTANT(REQUEST_NULL); } DMTCP_PLUGIN_ENABLE_CKPT(); } diff --git a/mpi-proxy-split/mpi-wrappers/mpi_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_wrappers.cpp index c3634839a..ec768a252 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_wrappers.cpp @@ -66,8 +66,8 @@ USER_DEFINED_WRAPPER(int, Init, (int *) argc, (char ***) argv) { JUMP_TO_LOWER_HALF(lh_info.fsaddr); retval = NEXT_FUNC(Init)(argc, argv); - // Create a duplicate of MPI_COMM_WORLD for internal use. - NEXT_FUNC(Comm_dup)(MPI_COMM_WORLD, &g_world_comm); + // Create a duplicate of REAL_CONSTANT(COMM_WORLD) for internal use. + NEXT_FUNC(Comm_dup)(REAL_CONSTANT(COMM_WORLD), &g_world_comm); RETURN_TO_UPPER_HALF(); recordPostMpiInitMaps(); @@ -91,8 +91,8 @@ USER_DEFINED_WRAPPER(int, Init_thread, (int *) argc, (char ***) argv, JUMP_TO_LOWER_HALF(lh_info.fsaddr); retval = NEXT_FUNC(Init_thread)(argc, argv, required, provided); - // Create a duplicate of MPI_COMM_WORLD for internal use. - NEXT_FUNC(Comm_dup)(MPI_COMM_WORLD, &g_world_comm); + // Create a duplicate of REAL_CONSTANT(COMM_WORLD) for internal use. + NEXT_FUNC(Comm_dup)(REAL_CONSTANT(COMM_WORLD), &g_world_comm); RETURN_TO_UPPER_HALF(); recordPostMpiInitMaps(); diff --git a/mpi-proxy-split/p2p_drain_send_recv.cpp b/mpi-proxy-split/p2p_drain_send_recv.cpp index 84a29b3bd..b19d720f0 100644 --- a/mpi-proxy-split/p2p_drain_send_recv.cpp +++ b/mpi-proxy-split/p2p_drain_send_recv.cpp @@ -345,7 +345,7 @@ localRankToGlobalRank(int localRank, MPI_Comm localComm) MPI_Group worldGroup, localGroup; MPI_Comm realComm = VIRTUAL_TO_REAL_COMM(localComm); JUMP_TO_LOWER_HALF(lh_info.fsaddr); - NEXT_FUNC(Comm_group)(MPI_COMM_WORLD, &worldGroup); + NEXT_FUNC(Comm_group)(REAL_CONSTANT(COMM_WORLD), &worldGroup); NEXT_FUNC(Comm_group)(realComm, &localGroup); NEXT_FUNC(Group_translate_ranks)(localGroup, 1, &localRank, worldGroup, &worldRank); diff --git a/mpi-proxy-split/seq_num.cpp b/mpi-proxy-split/seq_num.cpp index 218e2f9ac..8b424458b 100644 --- a/mpi-proxy-split/seq_num.cpp +++ b/mpi-proxy-split/seq_num.cpp @@ -145,7 +145,7 @@ void seq_num_broadcast(MPI_Comm comm, unsigned long new_target) { JUMP_TO_LOWER_HALF(lh_info.fsaddr); NEXT_FUNC(Group_translate_ranks)(local_group, 1, &i, world_group, &world_rank); - NEXT_FUNC(Send)(&msg, 2, MPI_UNSIGNED_LONG, world_rank, + NEXT_FUNC(Send)(&msg, 2, REAL_CONSTANT(UNSIGNED_LONG), world_rank, 0, real_world_comm); RETURN_TO_UPPER_HALF(); #ifdef DEBUG_SEQ_NUM @@ -173,7 +173,7 @@ void commit_begin(MPI_Comm comm, bool passthrough) { unsigned long new_target[2]; MPI_Comm real_world_comm = VIRTUAL_TO_REAL_COMM(g_world_comm); JUMP_TO_LOWER_HALF(lh_info.fsaddr); - NEXT_FUNC(Recv)(&new_target, 2, MPI_UNSIGNED_LONG, + NEXT_FUNC(Recv)(&new_target, 2, REAL_CONSTANT(UNSIGNED_LONG), status.MPI_SOURCE, status.MPI_TAG, real_world_comm, MPI_STATUS_IGNORE); RETURN_TO_UPPER_HALF(); @@ -220,7 +220,7 @@ void commit_finish(MPI_Comm comm, bool passthrough) { unsigned long new_target[2]; MPI_Comm real_world_comm = VIRTUAL_TO_REAL_COMM(g_world_comm); JUMP_TO_LOWER_HALF(lh_info.fsaddr); - NEXT_FUNC(Recv)(&new_target, 2, MPI_UNSIGNED_LONG, + NEXT_FUNC(Recv)(&new_target, 2, REAL_CONSTANT(UNSIGNED_LONG), status.MPI_SOURCE, status.MPI_TAG, real_world_comm, MPI_STATUS_IGNORE); RETURN_TO_UPPER_HALF(); From 87b8d7ce22bd542b4036bb0f1253c2ba0aa59e19 Mon Sep 17 00:00:00 2001 From: Leonid Belyaev Date: Mon, 18 Sep 2023 22:38:24 -0400 Subject: [PATCH 16/49] libproxy.c fixup --- mpi-proxy-split/lower-half/libproxy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mpi-proxy-split/lower-half/libproxy.c b/mpi-proxy-split/lower-half/libproxy.c index e106d33e1..971727c37 100644 --- a/mpi-proxy-split/lower-half/libproxy.c +++ b/mpi-proxy-split/lower-half/libproxy.c @@ -68,7 +68,7 @@ static void* MPI_Fnc_Ptrs[] = { NULL, }; -#define INIT_CONST_MAP(const) mpi_constants[LH_##const] = const; +#define INIT_CONST_MAP(const) mpi_constants[LH_MPI_##const] = const; static int mpi_constants_initialized = 0; static void* mpi_constants[LH_MPI_Constant_Invalid + 1] From eeeb8a8f13497f184ca8eafe2a8918bfafe573ed Mon Sep 17 00:00:00 2001 From: Leonid Belyaev Date: Mon, 18 Sep 2023 22:50:39 -0400 Subject: [PATCH 17/49] mpi_constants fixup --- mpi-proxy-split/lower-half/libproxy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mpi-proxy-split/lower-half/libproxy.c b/mpi-proxy-split/lower-half/libproxy.c index 971727c37..29d7365ce 100644 --- a/mpi-proxy-split/lower-half/libproxy.c +++ b/mpi-proxy-split/lower-half/libproxy.c @@ -68,10 +68,10 @@ static void* MPI_Fnc_Ptrs[] = { NULL, }; -#define INIT_CONST_MAP(const) mpi_constants[LH_MPI_##const] = const; +#define INIT_CONST_MAP(const) mpi_constants[LH_MPI_##const] = MPI_##const; static int mpi_constants_initialized = 0; -static void* mpi_constants[LH_MPI_Constant_Invalid + 1] +static void* mpi_constants[LH_MPI_Constant_Invalid + 1]; // Local functions void* From 0358defba2a223871cba8aaf7b5252b50fe84c5a Mon Sep 17 00:00:00 2001 From: Leonid Belyaev Date: Tue, 19 Sep 2023 02:50:00 -0400 Subject: [PATCH 18/49] init_lh_constants_map in Init, Init_thread --- mpi-proxy-split/mpi-wrappers/mpi_wrappers.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/mpi-proxy-split/mpi-wrappers/mpi_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_wrappers.cpp index ec768a252..ed3e4c2b3 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_wrappers.cpp @@ -66,6 +66,9 @@ USER_DEFINED_WRAPPER(int, Init, (int *) argc, (char ***) argv) { JUMP_TO_LOWER_HALF(lh_info.fsaddr); retval = NEXT_FUNC(Init)(argc, argv); + RETURN_TO_UPPER_HALF(); + init_lh_constants_map(); + JUMP_TO_LOWER_HALF(lh_info.fsaddr); // Create a duplicate of REAL_CONSTANT(COMM_WORLD) for internal use. NEXT_FUNC(Comm_dup)(REAL_CONSTANT(COMM_WORLD), &g_world_comm); RETURN_TO_UPPER_HALF(); @@ -91,6 +94,9 @@ USER_DEFINED_WRAPPER(int, Init_thread, (int *) argc, (char ***) argv, JUMP_TO_LOWER_HALF(lh_info.fsaddr); retval = NEXT_FUNC(Init_thread)(argc, argv, required, provided); + RETURN_TO_UPPER_HALF(); + init_lh_constants_map(); + JUMP_TO_LOWER_HALF(lh_info.fsaddr); // Create a duplicate of REAL_CONSTANT(COMM_WORLD) for internal use. NEXT_FUNC(Comm_dup)(REAL_CONSTANT(COMM_WORLD), &g_world_comm); RETURN_TO_UPPER_HALF(); From 37815cf999e79ef9ed9524fedba48a3a35d64611 Mon Sep 17 00:00:00 2001 From: Leonid Belyaev Date: Tue, 19 Sep 2023 02:55:06 -0400 Subject: [PATCH 19/49] REAL_CONSTANT in vids --- mpi-proxy-split/virtual-ids.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mpi-proxy-split/virtual-ids.cpp b/mpi-proxy-split/virtual-ids.cpp index afd8a5b91..e4c58bb73 100644 --- a/mpi-proxy-split/virtual-ids.cpp +++ b/mpi-proxy-split/virtual-ids.cpp @@ -91,8 +91,8 @@ int getggid(MPI_Comm comm, int worldRank, int commSize, int* rbuf) { DMTCP_PLUGIN_DISABLE_CKPT(); JUMP_TO_LOWER_HALF(lh_info.fsaddr); - NEXT_FUNC(Allgather)(&worldRank, 1, MPI_INT, - rbuf, 1, MPI_INT, comm); + NEXT_FUNC(Allgather)(&worldRank, 1, REAL_CONSTANT(INT), + rbuf, 1, REAL_CONSTANT(INT), comm); RETURN_TO_UPPER_HALF(); DMTCP_PLUGIN_ENABLE_CKPT(); @@ -124,7 +124,7 @@ void grant_ggid(MPI_Comm virtualComm) { DMTCP_PLUGIN_DISABLE_CKPT(); JUMP_TO_LOWER_HALF(lh_info.fsaddr); - NEXT_FUNC(Comm_rank)(MPI_COMM_WORLD, &worldRank); + NEXT_FUNC(Comm_rank)(REAL_CONSTANT(COMM_WORLD), &worldRank); NEXT_FUNC(Comm_size)(realComm, &commSize); NEXT_FUNC(Comm_rank)(realComm, &localRank); RETURN_TO_UPPER_HALF(); @@ -235,7 +235,7 @@ void reconstruct_with_comm_desc_t(comm_desc_t* desc) { DMTCP_PLUGIN_DISABLE_CKPT(); JUMP_TO_LOWER_HALF(lh_info.fsaddr); NEXT_FUNC(Group_incl)(g_world_group, desc->size, desc->global_ranks, &group); - NEXT_FUNC(Comm_create_group)(MPI_COMM_WORLD, group, 0, &desc->real_id); + NEXT_FUNC(Comm_create_group)(REAL_CONSTANT(COMM_WORLD), group, 0, &desc->real_id); RETURN_TO_UPPER_HALF(); DMTCP_PLUGIN_ENABLE_CKPT(); } @@ -508,7 +508,7 @@ void init_comm_world() { // FIXME: This WILL NOT WORK when moving to OpenMPI, ExaMPI, as written. // Yao, Twinkle, should apply their lh_constants_map strategy here. - comm_world->real_id = 0x84000000; + comm_world->real_id = REAL_CONSTANT(COMM_WORLD); comm_world->handle = MPI_COMM_WORLD; // FIXME: the other fields are not initialized. This is an INTERNAL // communicator descriptor, strictly for bookkeeping, not for reconstructing, @@ -523,7 +523,7 @@ void init_comm_world() { void write_g_world_group() { DMTCP_PLUGIN_DISABLE_CKPT(); JUMP_TO_LOWER_HALF(lh_info.fsaddr); - NEXT_FUNC(Comm_group)(MPI_COMM_WORLD, &g_world_group); + NEXT_FUNC(Comm_group)(REAL_CONSTANT(COMM_WORLD), &g_world_group); RETURN_TO_UPPER_HALF(); DMTCP_PLUGIN_ENABLE_CKPT(); } From f249fedc2ffc353c5caa68104cc448e1f65239f0 Mon Sep 17 00:00:00 2001 From: Leonid Belyaev Date: Tue, 19 Sep 2023 19:26:22 -0400 Subject: [PATCH 20/49] Change MANA_EXAMPI to just EXAMPI --- mpi-proxy-split/mpi-wrappers/mpi_collective_wrappers.cpp | 4 ++-- mpi-proxy-split/mpi-wrappers/mpi_p2p_wrappers.cpp | 4 ++-- mpi-proxy-split/mpi-wrappers/mpi_request_wrappers.cpp | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/mpi-proxy-split/mpi-wrappers/mpi_collective_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_collective_wrappers.cpp index e7c59b8d3..a23b38fdd 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_collective_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_collective_wrappers.cpp @@ -256,7 +256,7 @@ USER_DEFINED_WRAPPER(int, Allreduce, int retval; DMTCP_PLUGIN_DISABLE_CKPT(); - #ifndef MANA_EXAMPI + #ifndef EXAMPI get_fortran_constants(); #endif @@ -268,7 +268,7 @@ USER_DEFINED_WRAPPER(int, Allreduce, MPI_Datatype realType = VIRTUAL_TO_REAL_TYPE(datatype); MPI_Op realOp = VIRTUAL_TO_REAL_OP(op); // FIXME: Ideally, check FORTRAN_MPI_IN_PLACE only in the Fortran wrapper. - #ifndef MANA_EXAMPI + #ifndef EXAMPI if (sendbuf == FORTRAN_MPI_IN_PLACE) { sendbuf = MPI_IN_PLACE; } diff --git a/mpi-proxy-split/mpi-wrappers/mpi_p2p_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_p2p_wrappers.cpp index 1da55ba67..701c2095d 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_p2p_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_p2p_wrappers.cpp @@ -251,7 +251,7 @@ USER_DEFINED_WRAPPER(int, Sendrecv, (const void *) sendbuf, (int) sendcount, DMTCP_PLUGIN_ENABLE_CKPT(); #else - #ifndef MANA_EXAMPI + #ifndef EXAMPI get_fortran_constants(); #endif @@ -272,7 +272,7 @@ USER_DEFINED_WRAPPER(int, Sendrecv, (const void *) sendbuf, (int) sendcount, // Set status only when the status is neither MPI_STATUS_IGNORE nor // FORTRAN_MPI_STATUS_IGNORE - #ifdef MANA_EXAMPI + #ifdef EXAMPI if (status != MPI_STATUS_IGNORE) { *status = sts[1]; } diff --git a/mpi-proxy-split/mpi-wrappers/mpi_request_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_request_wrappers.cpp index 17e1440cd..c9929b57e 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_request_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_request_wrappers.cpp @@ -217,7 +217,7 @@ USER_DEFINED_WRAPPER(int, Waitall, (int) count, MPI_Request *local_array_of_requests = array_of_requests; MPI_Status *local_array_of_statuses = array_of_statuses; - #ifndef MANA_EXAMPI + #ifndef EXAMPI get_fortran_constants(); #endif @@ -227,7 +227,7 @@ USER_DEFINED_WRAPPER(int, Waitall, (int) count, * in the Fortran wrapper. */ - #ifndef MANA_EXAMPI + #ifndef EXAMPI if (local_array_of_statuses != MPI_STATUSES_IGNORE && local_array_of_statuses != FORTRAN_MPI_STATUSES_IGNORE) { retval = MPI_Wait(&local_array_of_requests[i], From 52835c5d3e2ea538a6d5832c2263bdae5a16f515 Mon Sep 17 00:00:00 2001 From: Leonid Belyaev Date: Tue, 19 Sep 2023 19:53:07 -0400 Subject: [PATCH 21/49] fixed one realOp definition --- mpi-proxy-split/mpi-wrappers/mpi_collective_wrappers.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mpi-proxy-split/mpi-wrappers/mpi_collective_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_collective_wrappers.cpp index a23b38fdd..fa00bd03f 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_collective_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_collective_wrappers.cpp @@ -607,7 +607,7 @@ USER_DEFINED_WRAPPER(int, Scan, (const void *) sendbuf, (void *) recvbuf, if (sendbuf == FORTRAN_MPI_IN_PLACE) { sendbuf = MPI_IN_PLACE; } - MPI_Op realOp = VIRTUAL_TO_REAL_TYPE(op); + MPI_Op realOp = VIRTUAL_TO_REAL_OP(op); JUMP_TO_LOWER_HALF(lh_info.fsaddr); retval = NEXT_FUNC(Scan)(sendbuf, recvbuf, count, realType, realOp, realComm); From 77ac3de9ef32974c900ed9a97f437d6d8cb407fb Mon Sep 17 00:00:00 2001 From: Leonid Belyaev Date: Tue, 19 Sep 2023 19:59:41 -0400 Subject: [PATCH 22/49] MPI collectives EXAMPI definitions --- .../mpi-wrappers/mpi_collective_wrappers.cpp | 39 +++++++++++++++++-- 1 file changed, 35 insertions(+), 4 deletions(-) diff --git a/mpi-proxy-split/mpi-wrappers/mpi_collective_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_collective_wrappers.cpp index fa00bd03f..d206bffb3 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_collective_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_collective_wrappers.cpp @@ -221,8 +221,13 @@ MPI_Allreduce_reproducible(const void *sendbuf, MPI_Type_size(datatype, &type_size); JASSERT(count * comm_size * type_size <= MAX_ALL_SENDBUF_SIZE); +#ifndef EXAMPI JASSERT(sendbuf != FORTRAN_MPI_IN_PLACE && sendbuf != MPI_IN_PLACE) .Text("MANA: MPI_Allreduce_reproducible: MPI_IN_PLACE not yet supported."); +#else + JASSERT(sendbuf != MPI_IN_PLACE) + .Text("MANA: MPI_Allreduce_reproducible: MPI_IN_PLACE not yet supported."); +#endif // Gather the operands from all ranks in the comm MPI_Gather(sendbuf, count, datatype, tmpbuf, count, datatype, 0, comm); @@ -256,9 +261,9 @@ USER_DEFINED_WRAPPER(int, Allreduce, int retval; DMTCP_PLUGIN_DISABLE_CKPT(); - #ifndef EXAMPI +#ifndef EXAMPI get_fortran_constants(); - #endif +#endif if (use_allreduce_reproducible) retval = MPI_Allreduce_reproducible(sendbuf, recvbuf, count, datatype, op, @@ -268,11 +273,11 @@ USER_DEFINED_WRAPPER(int, Allreduce, MPI_Datatype realType = VIRTUAL_TO_REAL_TYPE(datatype); MPI_Op realOp = VIRTUAL_TO_REAL_OP(op); // FIXME: Ideally, check FORTRAN_MPI_IN_PLACE only in the Fortran wrapper. - #ifndef EXAMPI +#ifndef EXAMPI if (sendbuf == FORTRAN_MPI_IN_PLACE) { sendbuf = MPI_IN_PLACE; } - #endif +#endif JUMP_TO_LOWER_HALF(lh_info.fsaddr); retval = NEXT_FUNC(Allreduce)(sendbuf, recvbuf, count, realType, realOp, realComm); @@ -296,9 +301,11 @@ USER_DEFINED_WRAPPER(int, Reduce, MPI_Datatype realType = VIRTUAL_TO_REAL_TYPE(datatype); MPI_Op realOp = VIRTUAL_TO_REAL_OP(op); // FIXME: Ideally, check FORTRAN_MPI_IN_PLACE only in the Fortran wrapper. +#ifndef EXAMPI if (sendbuf == FORTRAN_MPI_IN_PLACE) { sendbuf = MPI_IN_PLACE; } +#endif JUMP_TO_LOWER_HALF(lh_info.fsaddr); retval = NEXT_FUNC(Reduce)(sendbuf, recvbuf, count, realType, realOp, root, realComm); @@ -319,9 +326,11 @@ USER_DEFINED_WRAPPER(int, Ireduce, MPI_Datatype realType = VIRTUAL_TO_REAL_TYPE(datatype); MPI_Op realOp = VIRTUAL_TO_REAL_OP(op); // FIXME: Ideally, check FORTRAN_MPI_IN_PLACE only in the Fortran wrapper. +#ifndef EXAMPI if (sendbuf == FORTRAN_MPI_IN_PLACE) { sendbuf = MPI_IN_PLACE; } +#endif JUMP_TO_LOWER_HALF(lh_info.fsaddr); retval = NEXT_FUNC(Ireduce)(sendbuf, recvbuf, count, realType, realOp, root, realComm, request); @@ -350,9 +359,11 @@ USER_DEFINED_WRAPPER(int, Reduce_scatter, MPI_Datatype realType = VIRTUAL_TO_REAL_TYPE(datatype); MPI_Op realOp = VIRTUAL_TO_REAL_OP(op); // FIXME: Ideally, check FORTRAN_MPI_IN_PLACE only in the Fortran wrapper. +#ifndef MANA_EXAMPI if (sendbuf == FORTRAN_MPI_IN_PLACE) { sendbuf = MPI_IN_PLACE; } +#endif JUMP_TO_LOWER_HALF(lh_info.fsaddr); retval = NEXT_FUNC(Reduce_scatter)(sendbuf, recvbuf, recvcounts, realType, realOp, realComm); @@ -381,9 +392,11 @@ MPI_Alltoall_internal(const void *sendbuf, int sendcount, MPI_Datatype realSendType = VIRTUAL_TO_REAL_TYPE(sendtype); MPI_Datatype realRecvType = VIRTUAL_TO_REAL_TYPE(recvtype); // FIXME: Ideally, check FORTRAN_MPI_IN_PLACE only in the Fortran wrapper. +#ifndef EXAMPI if (sendbuf == FORTRAN_MPI_IN_PLACE) { sendbuf = MPI_IN_PLACE; } +#endif JUMP_TO_LOWER_HALF(lh_info.fsaddr); retval = NEXT_FUNC(Alltoall)(sendbuf, sendcount, realSendType, recvbuf, recvcount, realRecvType, realComm); @@ -419,9 +432,11 @@ USER_DEFINED_WRAPPER(int, Alltoallv, commit_begin(comm, passthrough); int retval; // FIXME: Ideally, check FORTRAN_MPI_IN_PLACE only in the Fortran wrapper. +#ifndef EXAMPI if (sendbuf == FORTRAN_MPI_IN_PLACE) { sendbuf = MPI_IN_PLACE; } +#endif DMTCP_PLUGIN_DISABLE_CKPT(); MPI_Comm realComm = VIRTUAL_TO_REAL_COMM(comm); MPI_Datatype realSendType = VIRTUAL_TO_REAL_TYPE(sendtype); @@ -444,17 +459,21 @@ USER_DEFINED_WRAPPER(int, Gather, (const void *) sendbuf, (int) sendcount, commit_begin(comm, passthrough); int retval; // FIXME: Ideally, check FORTRAN_MPI_IN_PLACE only in the Fortran wrapper. +#ifndef EXAMPI if (sendbuf == FORTRAN_MPI_IN_PLACE) { sendbuf = MPI_IN_PLACE; } +#endif DMTCP_PLUGIN_DISABLE_CKPT(); MPI_Comm realComm = VIRTUAL_TO_REAL_COMM(comm); MPI_Datatype realSendType = VIRTUAL_TO_REAL_TYPE(sendtype); MPI_Datatype realRecvType = VIRTUAL_TO_REAL_TYPE(recvtype); // FIXME: Ideally, check FORTRAN_MPI_IN_PLACE only in the Fortran wrapper. +#ifndef EXAMPI if (sendbuf == FORTRAN_MPI_IN_PLACE) { sendbuf = MPI_IN_PLACE; } +#endif JUMP_TO_LOWER_HALF(lh_info.fsaddr); retval = NEXT_FUNC(Gather)(sendbuf, sendcount, realSendType, recvbuf, recvcount, realRecvType, @@ -474,9 +493,11 @@ USER_DEFINED_WRAPPER(int, Gatherv, (const void *) sendbuf, (int) sendcount, commit_begin(comm, passthrough); int retval; // FIXME: Ideally, check FORTRAN_MPI_IN_PLACE only in the Fortran wrapper. +#ifndef EXAMPI if (sendbuf == FORTRAN_MPI_IN_PLACE) { sendbuf = MPI_IN_PLACE; } +#endif DMTCP_PLUGIN_DISABLE_CKPT(); MPI_Comm realComm = VIRTUAL_TO_REAL_COMM(comm); MPI_Datatype realSendType = VIRTUAL_TO_REAL_TYPE(sendtype); @@ -499,9 +520,11 @@ USER_DEFINED_WRAPPER(int, Scatter, (const void *) sendbuf, (int) sendcount, commit_begin(comm, passthrough); int retval; // FIXME: Ideally, check FORTRAN_MPI_IN_PLACE only in the Fortran wrapper. +#ifndef EXAMPI if (recvbuf == FORTRAN_MPI_IN_PLACE) { recvbuf = MPI_IN_PLACE; } +#endif DMTCP_PLUGIN_DISABLE_CKPT(); MPI_Comm realComm = VIRTUAL_TO_REAL_COMM(comm); MPI_Datatype realSendType = VIRTUAL_TO_REAL_TYPE(sendtype); @@ -525,9 +548,11 @@ USER_DEFINED_WRAPPER(int, Scatterv, (const void *) sendbuf, commit_begin(comm, passthrough); int retval; // FIXME: Ideally, check FORTRAN_MPI_IN_PLACE only in the Fortran wrapper. +#ifndef EXAMPI if (recvbuf == FORTRAN_MPI_IN_PLACE) { recvbuf = MPI_IN_PLACE; } +#endif EXAMPI DMTCP_PLUGIN_DISABLE_CKPT(); MPI_Comm realComm = VIRTUAL_TO_REAL_COMM(comm); MPI_Datatype realSendType = VIRTUAL_TO_REAL_TYPE(sendtype); @@ -550,9 +575,11 @@ USER_DEFINED_WRAPPER(int, Allgather, (const void *) sendbuf, (int) sendcount, commit_begin(comm, passthrough); int retval; // FIXME: Ideally, check FORTRAN_MPI_IN_PLACE only in the Fortran wrapper. +#ifndef EXAMPI if (sendbuf == FORTRAN_MPI_IN_PLACE) { sendbuf = MPI_IN_PLACE; } +#endif DMTCP_PLUGIN_DISABLE_CKPT(); MPI_Comm realComm = VIRTUAL_TO_REAL_COMM(comm); MPI_Datatype realSendType = VIRTUAL_TO_REAL_TYPE(sendtype); @@ -576,9 +603,11 @@ USER_DEFINED_WRAPPER(int, Allgatherv, (const void *) sendbuf, (int) sendcount, commit_begin(comm, passthrough); int retval; // FIXME: Ideally, check FORTRAN_MPI_IN_PLACE only in the Fortran wrapper. +#ifndef EXAMPI if (sendbuf == FORTRAN_MPI_IN_PLACE) { sendbuf = MPI_IN_PLACE; } +#endif DMTCP_PLUGIN_DISABLE_CKPT(); MPI_Comm realComm = VIRTUAL_TO_REAL_COMM(comm); MPI_Datatype realSendType = VIRTUAL_TO_REAL_TYPE(sendtype); @@ -604,9 +633,11 @@ USER_DEFINED_WRAPPER(int, Scan, (const void *) sendbuf, (void *) recvbuf, MPI_Comm realComm = VIRTUAL_TO_REAL_COMM(comm); MPI_Datatype realType = VIRTUAL_TO_REAL_TYPE(datatype); // FIXME: Ideally, check FORTRAN_MPI_IN_PLACE only in the Fortran wrapper. +#ifndef EXAMPI if (sendbuf == FORTRAN_MPI_IN_PLACE) { sendbuf = MPI_IN_PLACE; } +#endif MPI_Op realOp = VIRTUAL_TO_REAL_OP(op); JUMP_TO_LOWER_HALF(lh_info.fsaddr); retval = NEXT_FUNC(Scan)(sendbuf, recvbuf, count, From db2cfbe50be3e34f8793f0a64f8650db75bd8a87 Mon Sep 17 00:00:00 2001 From: Leonid Belyaev Date: Tue, 19 Sep 2023 20:10:05 -0400 Subject: [PATCH 23/49] Some OpenMPI definitions --- mpi-proxy-split/mpi-wrappers/mpi_type_wrappers.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/mpi-proxy-split/mpi-wrappers/mpi_type_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_type_wrappers.cpp index 6e8ddacb1..be600541c 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_type_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_type_wrappers.cpp @@ -116,12 +116,14 @@ USER_DEFINED_WRAPPER(int, Type_hvector, (int) count, (int) blocklength, return retval; } +#ifndef OPEN_MPI USER_DEFINED_WRAPPER(int, Type_create_hvector, (int) count, (int) blocklength, (MPI_Aint) stride, (MPI_Datatype) oldtype, (MPI_Datatype*) newtype) { return MPI_Type_hvector(count, blocklength, stride, oldtype, newtype); } +#endif USER_DEFINED_WRAPPER(int, Type_vector, (int) count, (int) blocklength, (int) stride, (MPI_Datatype) oldtype, @@ -178,6 +180,7 @@ USER_DEFINED_WRAPPER(int, Type_create_struct, (int) count, return retval; } +#ifndef OPEN_MPI // Perlmutter cray_mpich both implement MPI 3.1. However, they use different // APIs. We use MPICH_NUMVERSION (3.4a2) to differentiate the cray-mpich on Cori // and Perlmuttter. This ad-hoc workaround should be removed once the cray-mpich @@ -198,7 +201,10 @@ USER_DEFINED_WRAPPER(int, Type_struct, (int) count, array_of_displacements, array_of_types, newtype ); } +#endif // ifndef OPEN_MPI + +#ifndef OPEN_MPI #if MPICH_NUMVERSION < MPICH_CALC_VERSION(3,4,0,0,2) && defined(CRAY_MPICH_VERSION) USER_DEFINED_WRAPPER(int, Type_hindexed, (int) count, (const int*) array_of_blocklengths, @@ -229,7 +235,9 @@ USER_DEFINED_WRAPPER(int, Type_hindexed, (int) count, DMTCP_PLUGIN_ENABLE_CKPT(); return retval; } +#endif // ifndef OPEN_MPI +#ifndef OPEN_MPI USER_DEFINED_WRAPPER(int, Type_create_hindexed, (int) count, (const int*) array_of_blocklengths, (const MPI_Aint*) array_of_displacements, @@ -251,6 +259,8 @@ USER_DEFINED_WRAPPER(int, Type_create_hindexed, (int) count, #endif } +#endif // ifndef OPEN_MPI + USER_DEFINED_WRAPPER(int, Type_create_hindexed_block, (int) count, (int) blocklength, (const MPI_Aint*) array_of_displacements, @@ -393,14 +403,17 @@ PMPI_IMPL(int, MPI_Type_contiguous, int count, MPI_Datatype oldtype, PMPI_IMPL(int, MPI_Type_free, MPI_Datatype *type) PMPI_IMPL(int, MPI_Type_vector, int count, int blocklength, int stride, MPI_Datatype oldtype, MPI_Datatype *newtype) +#ifndef OPEN_MPI PMPI_IMPL(int, MPI_Type_hvector, int count, int blocklength, MPI_Aint stride, MPI_Datatype oldtype, MPI_Datatype *newtype) +#endif PMPI_IMPL(int, MPI_Type_create_hvector, int count, int blocklength, MPI_Aint stride, MPI_Datatype oldtype, MPI_Datatype *newtype) PMPI_IMPL(int, MPI_Type_create_struct, int count, const int array_of_blocklengths[], const MPI_Aint array_of_displacements[], const MPI_Datatype array_of_types[], MPI_Datatype *newtype) +#ifndef OPEN_MPI #if MPICH_NUMVERSION < MPICH_CALC_VERSION(3,4,0,0,2) && defined(CRAY_MPICH_VERSION) PMPI_IMPL(int, MPI_Type_struct, int count, const int array_of_blocklengths[], const MPI_Aint array_of_displacements[], const MPI_Datatype array_of_types[], @@ -416,6 +429,7 @@ PMPI_IMPL(int, MPI_Type_hindexed, int count, int array_of_blocklengths[], MPI_Aint array_of_displacements[], MPI_Datatype oldtype, MPI_Datatype *newtype); #endif +#endif // endif OPEN_MPI PMPI_IMPL(int, MPI_Type_size_x, MPI_Datatype type, MPI_Count *size) PMPI_IMPL(int, MPI_Type_indexed, int count, const int array_of_blocklengths[], From 252a07aaf4595c3917b413c9dfd3ed8ba3942993 Mon Sep 17 00:00:00 2001 From: Leonid Belyaev Date: Tue, 19 Sep 2023 20:18:20 -0400 Subject: [PATCH 24/49] Programmatic per-MPI virtual-ids.cpp definition --- mpi-proxy-split/virtual-ids.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/mpi-proxy-split/virtual-ids.cpp b/mpi-proxy-split/virtual-ids.cpp index e4c58bb73..3f6a464c0 100644 --- a/mpi-proxy-split/virtual-ids.cpp +++ b/mpi-proxy-split/virtual-ids.cpp @@ -38,6 +38,9 @@ #define MAX_VIRTUAL_ID 999 +MPI_Comm WORLD_COMM = MPI_COMM_WORLD; +MPI_Comm NULL_COMM = MPI_COMM_NULL; + // #define DEBUG_VIDS // TODO I use an explicitly integer virtual id, which under macro @@ -84,8 +87,8 @@ int hash(int i) { // OUT: rbuf // Returns ggid. int getggid(MPI_Comm comm, int worldRank, int commSize, int* rbuf) { - if (comm == MPI_COMM_NULL || comm == MPI_COMM_WORLD) { - return comm; + if (comm == NULL_COMM || comm == WORLD_COMM) { + return (intptr_t)comm; } unsigned int ggid = 0; @@ -356,6 +359,8 @@ datatype_desc_t* init_datatype_desc_t(MPI_Datatype realType) { } void update_datatype_desc_t(datatype_desc_t* datatype) { + // 2023-09-19: ExaMPI does not support this yet, though it ought to soon. +#ifndef EXAMPI // Free the existing memory, if it is not NULL. (i.e., from an older checkpoint) free(datatype->integers); free(datatype->addresses); @@ -384,11 +389,13 @@ void update_datatype_desc_t(datatype_desc_t* datatype) { NEXT_FUNC(Type_get_contents)(datatype->real_id, datatype->num_integers, datatype->num_addresses, datatype->num_datatypes, datatype->integers, datatype->addresses, datatype->datatypes); RETURN_TO_UPPER_HALF(); DMTCP_PLUGIN_ENABLE_CKPT(); +#endif } // Reconstruction arguments taken from here: // https://www.mpi-forum.org/docs/mpi-3.1/mpi31-report/node90.htm void reconstruct_with_datatype_desc_t(datatype_desc_t* datatype) { +#ifndef EXAMPI DMTCP_PLUGIN_DISABLE_CKPT(); JUMP_TO_LOWER_HALF(lh_info.fsaddr); switch (datatype->combiner) { @@ -451,6 +458,7 @@ void reconstruct_with_datatype_desc_t(datatype_desc_t* datatype) { NEXT_FUNC(Type_commit)(&datatype->real_id); RETURN_TO_UPPER_HALF(); DMTCP_PLUGIN_ENABLE_CKPT(); +#endif // ifndef EXAMPI } void destroy_datatype_desc_t(datatype_desc_t* datatype) { From f6730935019f94a5d5c010069d586ed755a3f272 Mon Sep 17 00:00:00 2001 From: Leonid Belyaev Date: Wed, 20 Sep 2023 21:36:24 -0400 Subject: [PATCH 25/49] Initial makefile conditional per-MPI --- mpi-proxy-split/Makefile | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/mpi-proxy-split/Makefile b/mpi-proxy-split/Makefile index 31bf85a48..d1f172b67 100644 --- a/mpi-proxy-split/Makefile +++ b/mpi-proxy-split/Makefile @@ -66,9 +66,19 @@ default: ${MANA_COORD_OBJS} libmana.so: ${LIBOBJS} ${WRAPPERS_SRCDIR}/libmpiwrappers.a ${CXX} -shared -fPIC -g3 -O0 -o $@ ${LIBOBJS} -Wl,--whole-archive ${WRAPPERS_SRCDIR}/libmpiwrappers.a -Wl,--no-whole-archive -mpi_unimplemented_wrappers.cpp: generate-mpi-unimplemented-wrappers.py \ - mpi_unimplemented_wrappers.txt - python $^ > $@ +mpi_unimplemented_wrappers.cpp: + if mpiexec -h | grep -q 'mpich'; then \ + generate-mpi-unimplemented-wrappers.py mpi_unimplemented_wrappers.txt \ + python $^ > $@ \ + elif mpixec -h | grep -q 'mpich'; then \ + generate-mpi-unimplemented-wrappers.py mpi_unimplemented_wrappers.txt \ + python $^ > $@ \ + elif mpiexec -h | grep -q 'open-mpi'; then \ + generate-mpi-unimplemented-wrappers.py mpi_unimplemented_wrappers.txt \ + python $^ > $@ \ + else \ + $(error The MPI flavor could not be identified.) + fi .c.o: ${MPICC} ${CFLAGS} -c -o $@ $< From 8e197218af558b12b0f9bb755c4824233308a03f Mon Sep 17 00:00:00 2001 From: Leonid Belyaev Date: Wed, 20 Sep 2023 21:40:43 -0400 Subject: [PATCH 26/49] Fix a type problem a-la MPI_File --- mpi-proxy-split/virtual-ids.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mpi-proxy-split/virtual-ids.h b/mpi-proxy-split/virtual-ids.h index 5d2ad4348..cd49fd986 100644 --- a/mpi-proxy-split/virtual-ids.h +++ b/mpi-proxy-split/virtual-ids.h @@ -60,7 +60,7 @@ #define VIRTUAL_TO_REAL(id, null, real_id_type, desc_type) \ ({ \ desc_type* _VTR_tmp = VIRTUAL_TO_DESC(id, null, desc_type); \ - real_id_type _VTR_id = (_VTR_tmp == NULL) ? (real_id_type)lh_constants_map[id] : _VTR_tmp->real_id; \ + real_id_type _VTR_id = (_VTR_tmp == NULL) ? (real_id_type)lh_constants_map[(intptr_t)id] : _VTR_tmp->real_id; \ _VTR_id; \ }) From a8fc31649dc3eb80f8ceab0809b7ced1ab48be4b Mon Sep 17 00:00:00 2001 From: Leonid Belyaev Date: Wed, 20 Sep 2023 21:52:31 -0400 Subject: [PATCH 27/49] Improved conditional makefile --- mpi-proxy-split/Makefile | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/mpi-proxy-split/Makefile b/mpi-proxy-split/Makefile index d1f172b67..72d9edfa8 100644 --- a/mpi-proxy-split/Makefile +++ b/mpi-proxy-split/Makefile @@ -66,17 +66,17 @@ default: ${MANA_COORD_OBJS} libmana.so: ${LIBOBJS} ${WRAPPERS_SRCDIR}/libmpiwrappers.a ${CXX} -shared -fPIC -g3 -O0 -o $@ ${LIBOBJS} -Wl,--whole-archive ${WRAPPERS_SRCDIR}/libmpiwrappers.a -Wl,--no-whole-archive -mpi_unimplemented_wrappers.cpp: - if mpiexec -h | grep -q 'mpich'; then \ - generate-mpi-unimplemented-wrappers.py mpi_unimplemented_wrappers.txt \ - python $^ > $@ \ - elif mpixec -h | grep -q 'mpich'; then \ - generate-mpi-unimplemented-wrappers.py mpi_unimplemented_wrappers.txt \ - python $^ > $@ \ - elif mpiexec -h | grep -q 'open-mpi'; then \ - generate-mpi-unimplemented-wrappers.py mpi_unimplemented_wrappers.txt \ - python $^ > $@ \ - else \ +mpi_unimplemented_wrappers.cpp: generate-mpi-unimplemented-wrappers.py \ + mpi_unimplemented_wrappers_mpich.txt \ + mpi_unimplemented_wrappers_openmpi.txt \ + mpi_unimplemented_wrappers_exampi.txt + if mpiexec -h | grep -q 'mpich'; then + python generate-mpi-unimplemented-wrappers.py mpi_unimplemented_wrappers_mpich.txt > $@ + elif mpixec -h | grep -q 'openmpi'; then + python generate-mpi-unimplemented-wrappers.py mpi_unimplemented_wrappers_openmpi.txt > $@ + elif mpiexec -h | grep -q 'exampi'; then + python generate-mpi-unimplemented-wrappers.py mpi_unimplemented_wrappers_exampi.txt > $@ + else $(error The MPI flavor could not be identified.) fi From 35cf6d0cac2d0c451153df44125c11160f87af2f Mon Sep 17 00:00:00 2001 From: Leonid Belyaev Date: Wed, 20 Sep 2023 22:14:17 -0400 Subject: [PATCH 28/49] Conditional stub generation, fixed greps --- mpi-proxy-split/Makefile | 4 ++-- mpi-proxy-split/mpi-wrappers/Makefile | 30 ++++++++++++++++++++++----- 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/mpi-proxy-split/Makefile b/mpi-proxy-split/Makefile index 72d9edfa8..5ad1fdbb6 100644 --- a/mpi-proxy-split/Makefile +++ b/mpi-proxy-split/Makefile @@ -72,9 +72,9 @@ mpi_unimplemented_wrappers.cpp: generate-mpi-unimplemented-wrappers.py \ mpi_unimplemented_wrappers_exampi.txt if mpiexec -h | grep -q 'mpich'; then python generate-mpi-unimplemented-wrappers.py mpi_unimplemented_wrappers_mpich.txt > $@ - elif mpixec -h | grep -q 'openmpi'; then + elif mpixec -h | grep -q 'open-mpi'; then python generate-mpi-unimplemented-wrappers.py mpi_unimplemented_wrappers_openmpi.txt > $@ - elif mpiexec -h | grep -q 'exampi'; then + elif mpiexec -h | grep -q 'ExaMPI'; then python generate-mpi-unimplemented-wrappers.py mpi_unimplemented_wrappers_exampi.txt > $@ else $(error The MPI flavor could not be identified.) diff --git a/mpi-proxy-split/mpi-wrappers/Makefile b/mpi-proxy-split/mpi-wrappers/Makefile index 57a5dea0d..485604521 100644 --- a/mpi-proxy-split/mpi-wrappers/Makefile +++ b/mpi-proxy-split/mpi-wrappers/Makefile @@ -52,8 +52,20 @@ ${LIBNAME}.a: ${LIBWRAPPER_OBJS} ar cr $@ $^ mpi_unimplemented_wrappers.cpp: generate-mpi-unimplemented-wrappers.py \ - mpi_unimplemented_wrappers.txt - python3 $^ > $@ + mpi_unimplemented_wrappers_mpich.txt \ + mpi_unimplemented_wrappers_openmpi.txt \ + mpi_unimplemented_wrappers_exampi.txt + if mpiexec -h | grep -q 'mpich'; then + python generate-mpi-unimplemented-wrappers.py mpi_unimplemented_wrappers_mpich.txt > $@ + elif mpixec -h | grep -q 'open-mpi'; then + python generate-mpi-unimplemented-wrappers.py mpi_unimplemented_wrappers_openmpi.txt > $@ + elif mpiexec -h | grep -q 'ExaMPI'; then + python generate-mpi-unimplemented-wrappers.py mpi_unimplemented_wrappers_exampi.txt > $@ + else + $(error The MPI flavor could not be identified.) + fi + + .c.o: ${MPICC} ${CFLAGS} -c -o $@ $< @@ -93,15 +105,23 @@ mpi_win_wrappers.o: mpi_win_wrappers.cpp ../virtual-ids.h # We need to write to mpi_stub_wrappers.c.tmp.$$ first, in case of 'make -j'. # The parallel make would generate mpi_stub_wrappers.c 3 times, and overwrite. -mpi_stub_wrappers.c: generate-mpi-stub-wrappers.py mpi_stub_wrappers.txt +mpi_stub_wrappers.c: generate-mpi-stub-wrappers.py mpi_stub_wrappers_mpich.txt mpi_stub_wrappers_openmpi.txt mpi_stub_wrappers_exampi.txt rm -f $@ tmp=$@.tmp.$$$$ ; \ printf "%s\n\n" \ "// *** THIS FILE IS AUTO-GENERATED! DO 'make' TO UPDATE. ***" >$$tmp;\ - python3 $^ >> $$tmp && mv -f $$tmp $@ || (rm -f $$tmp && false) + if mpiexec -h | grep -q 'mpich'; then + python3 generate-mpi-stub-wrappers.py mpi_stub_wrappers_mpich.txt >> $$tmp && mv -f $$tmp $@ || (rm -f $$tmp && false) + elif mpixec -h | grep -q 'open-mpi'; then + python3 generate-mpi-stub-wrappers.py mpi_stub_wrappers_openmpi.txt >> $$tmp && mv -f $$tmp $@ || (rm -f $$tmp && false) + elif mpiexec -h | grep -q 'ExaMPI'; then + python3 generate-mpi-stub-wrappers.py mpi_stub_wrappers_exampi.txt >> $$tmp && mv -f $$tmp $@ || (rm -f $$tmp && false) + else + $(error The MPI flavor could not be identified.) + fi mpi_fortran_wrappers.cpp: generate-mpi-fortran-wrappers.py \ - mpi_fortran_wrappers.txt + mpi_fortran_wrappers.txt \ rm -f $@ tmp=$@.tmp.$$$$ ; \ printf "%s\n\n" \ From 6ca98cecb273e0effb1009ff06de76cf237da24e Mon Sep 17 00:00:00 2001 From: Leonid Belyaev Date: Wed, 20 Sep 2023 22:18:26 -0400 Subject: [PATCH 29/49] REVERT conditional mpi_stub_wrappers. --- mpi-proxy-split/mpi-wrappers/Makefile | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/mpi-proxy-split/mpi-wrappers/Makefile b/mpi-proxy-split/mpi-wrappers/Makefile index 485604521..0bd508b8b 100644 --- a/mpi-proxy-split/mpi-wrappers/Makefile +++ b/mpi-proxy-split/mpi-wrappers/Makefile @@ -57,9 +57,9 @@ mpi_unimplemented_wrappers.cpp: generate-mpi-unimplemented-wrappers.py \ mpi_unimplemented_wrappers_exampi.txt if mpiexec -h | grep -q 'mpich'; then python generate-mpi-unimplemented-wrappers.py mpi_unimplemented_wrappers_mpich.txt > $@ - elif mpixec -h | grep -q 'open-mpi'; then + elif mpixec -h | grep -q 'openmpi'; then python generate-mpi-unimplemented-wrappers.py mpi_unimplemented_wrappers_openmpi.txt > $@ - elif mpiexec -h | grep -q 'ExaMPI'; then + elif mpiexec -h | grep -q 'exampi'; then python generate-mpi-unimplemented-wrappers.py mpi_unimplemented_wrappers_exampi.txt > $@ else $(error The MPI flavor could not be identified.) @@ -105,20 +105,12 @@ mpi_win_wrappers.o: mpi_win_wrappers.cpp ../virtual-ids.h # We need to write to mpi_stub_wrappers.c.tmp.$$ first, in case of 'make -j'. # The parallel make would generate mpi_stub_wrappers.c 3 times, and overwrite. -mpi_stub_wrappers.c: generate-mpi-stub-wrappers.py mpi_stub_wrappers_mpich.txt mpi_stub_wrappers_openmpi.txt mpi_stub_wrappers_exampi.txt +mpi_stub_wrappers.c: generate-mpi-stub-wrappers.py mpi_stub_wrappers.txt rm -f $@ tmp=$@.tmp.$$$$ ; \ printf "%s\n\n" \ "// *** THIS FILE IS AUTO-GENERATED! DO 'make' TO UPDATE. ***" >$$tmp;\ - if mpiexec -h | grep -q 'mpich'; then - python3 generate-mpi-stub-wrappers.py mpi_stub_wrappers_mpich.txt >> $$tmp && mv -f $$tmp $@ || (rm -f $$tmp && false) - elif mpixec -h | grep -q 'open-mpi'; then - python3 generate-mpi-stub-wrappers.py mpi_stub_wrappers_openmpi.txt >> $$tmp && mv -f $$tmp $@ || (rm -f $$tmp && false) - elif mpiexec -h | grep -q 'ExaMPI'; then - python3 generate-mpi-stub-wrappers.py mpi_stub_wrappers_exampi.txt >> $$tmp && mv -f $$tmp $@ || (rm -f $$tmp && false) - else - $(error The MPI flavor could not be identified.) - fi + python3 $^ >> $$tmp && mv -f $$tmp $@ || (rm -f $$tmp && false) mpi_fortran_wrappers.cpp: generate-mpi-fortran-wrappers.py \ mpi_fortran_wrappers.txt \ From 2dba7c4acbbb5f8b79872454c5f88483687442ca Mon Sep 17 00:00:00 2001 From: Leonid Belyaev Date: Wed, 20 Sep 2023 22:27:55 -0400 Subject: [PATCH 30/49] Conditional fortran wrappers --- mpi-proxy-split/mpi-wrappers/Makefile | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/mpi-proxy-split/mpi-wrappers/Makefile b/mpi-proxy-split/mpi-wrappers/Makefile index 0bd508b8b..5e27920b6 100644 --- a/mpi-proxy-split/mpi-wrappers/Makefile +++ b/mpi-proxy-split/mpi-wrappers/Makefile @@ -113,12 +113,21 @@ mpi_stub_wrappers.c: generate-mpi-stub-wrappers.py mpi_stub_wrappers.txt python3 $^ >> $$tmp && mv -f $$tmp $@ || (rm -f $$tmp && false) mpi_fortran_wrappers.cpp: generate-mpi-fortran-wrappers.py \ - mpi_fortran_wrappers.txt \ + mpi_fortran_wrappers_openmpi.txt \ + mpi_fortran_wrappers_mpich.txt \ rm -f $@ tmp=$@.tmp.$$$$ ; \ printf "%s\n\n" \ "// *** THIS FILE IS AUTO-GENERATED! DO 'make' TO UPDATE. ***" >$$tmp;\ - python3 $^ >> $$tmp && mv -f $$tmp $@ || (rm -f $$tmp && false) + if mpiexec -h | grep -q 'mpich'; then + python generate-mpi-fortran-wrappers.py mpi_fortran_wrappers_mpich.txt > $@ + elif mpixec -h | grep -q 'open-mpi'; then + python generate-mpi-fortran-wrappers.py mpi_fortran_wrappers_openmpi.txt > $@ + elif mpiexec -h | grep -q 'ExaMPI'; then + $(error ExaMPI has no fortran support.) + else + $(error The MPI flavor could not be identified.) + fi mpi_stub_wrappers.o: mpi_stub_wrappers.c ${MPICC} ${CFLAGS} -c -o $@ $< From 796f5c96788a0305cf31fb895e9326f2748e2845 Mon Sep 17 00:00:00 2001 From: Leonid Belyaev Date: Wed, 20 Sep 2023 22:32:20 -0400 Subject: [PATCH 31/49] MPI-specific wrapper files --- .../mpi_unimplemented_wrappers_exampi.txt | 176 ++++++++++++++++++ ...t => mpi_unimplemented_wrappers_mpich.txt} | 0 .../mpi_unimplemented_wrappers_openmpi.txt | 176 ++++++++++++++++++ 3 files changed, 352 insertions(+) create mode 100644 mpi-proxy-split/mpi-wrappers/mpi_unimplemented_wrappers_exampi.txt rename mpi-proxy-split/mpi-wrappers/{mpi_unimplemented_wrappers.txt => mpi_unimplemented_wrappers_mpich.txt} (100%) create mode 100644 mpi-proxy-split/mpi-wrappers/mpi_unimplemented_wrappers_openmpi.txt diff --git a/mpi-proxy-split/mpi-wrappers/mpi_unimplemented_wrappers_exampi.txt b/mpi-proxy-split/mpi-wrappers/mpi_unimplemented_wrappers_exampi.txt new file mode 100644 index 000000000..5899c44bb --- /dev/null +++ b/mpi-proxy-split/mpi-wrappers/mpi_unimplemented_wrappers_exampi.txt @@ -0,0 +1,176 @@ +#include ; +#include "jassert.h"; + +int MPI_Add_error_class(int *errorclass); +int MPI_Add_error_code(int errorclass, int *errorcode); +int MPI_Add_error_string(int errorcode, const char *string); +int MPI_Iallgather(const void *sendbuf, int sendcount, + MPI_Datatype sendtype, void *recvbuf, int recvcount, + MPI_Datatype recvtype, MPI_Comm comm, + MPI_Request *request); +int MPI_Iallgatherv(const void *sendbuf, int sendcount, + MPI_Datatype sendtype, void *recvbuf, + const int recvcounts[], const int displs[], + MPI_Datatype recvtype, MPI_Comm comm, + MPI_Request *request); +int MPI_Iallreduce(const void *sendbuf, void *recvbuf, int count, + MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, + MPI_Request *request); +int MPI_Ialltoall(const void *sendbuf, int sendcount, + MPI_Datatype sendtype, void *recvbuf, int recvcount, + MPI_Datatype recvtype, MPI_Comm comm, + MPI_Request *request); +int MPI_Ialltoallv(const void *sendbuf, const int sendcounts[], + const int sdispls[], MPI_Datatype sendtype, + void *recvbuf, const int recvcounts[], + const int rdispls[], MPI_Datatype recvtype, + MPI_Comm comm, MPI_Request *request); +int MPI_Alltoallw(const void *sendbuf, const int sendcounts[], + const int sdispls[], const MPI_Datatype sendtypes[], + void *recvbuf, const int recvcounts[], + const int rdispls[], const MPI_Datatype recvtypes[], + MPI_Comm comm); +int MPI_Ialltoallw(const void *sendbuf, const int sendcounts[], + const int sdispls[], const MPI_Datatype sendtypes[], + void *recvbuf, const int recvcounts[], + const int rdispls[], const MPI_Datatype recvtypes[], + MPI_Comm comm, MPI_Request *request); +int MPI_Bsend(const void *buf, int count, MPI_Datatype datatype, + int dest, int tag, MPI_Comm comm); +int MPI_Bsend_init(const void *buf, int count, MPI_Datatype datatype, + int dest, int tag, MPI_Comm comm, + MPI_Request *request); +int MPI_Buffer_attach(void *buffer, int size); +int MPI_Buffer_detach(void *buffer, int *size); +int MPI_Cancel(MPI_Request *request); +int MPI_Close_port(const char *port_name); +int MPI_Comm_accept(const char *port_name, MPI_Info info, int root, MPI_Comm comm, MPI_Comm *newcomm); + +int MPI_Comm_call_errhandler(MPI_Comm comm, int errorcode); +int MPI_Comm_connect(const char *port_name, MPI_Info info, int root, MPI_Comm comm, MPI_Comm *newcomm); +int MPI_Comm_disconnect(MPI_Comm *comm); +int MPI_Comm_idup(MPI_Comm comm, MPI_Comm *newcomm, MPI_Request *request); +int MPI_Comm_dup_with_info(MPI_Comm comm, MPI_Info info, MPI_Comm *newcomm); +int MPI_Dist_graph_create(MPI_Comm comm_old, int n, const int nodes[], const int degrees[], const int targets[], const int weights[], MPI_Info info, int reorder, MPI_Comm * newcomm); +int MPI_Dist_graph_create_adjacent(MPI_Comm comm_old, int indegree, const int sources[], const int sourceweights[], int outdegree, const int destinations[], const int destweights[], MPI_Info info, int reorder, MPI_Comm *comm_dist_graph); +int MPI_Dist_graph_neighbors(MPI_Comm comm, int maxindegree, int sources[], int sourceweights[], int maxoutdegree, int destinations[], int destweights[]); +int MPI_Dist_graph_neighbors_count(MPI_Comm comm, int *inneighbors, int *outneighbors, int *weighted); +int MPI_Comm_get_errhandler(MPI_Comm comm, MPI_Errhandler *erhandler); +int MPI_Comm_get_info(MPI_Comm comm, MPI_Info *info_used); +int MPI_Comm_get_name(MPI_Comm comm, char *comm_name, int *resultlen); +int MPI_Comm_get_parent(MPI_Comm *parent); +int MPI_Comm_join(int fd, MPI_Comm *intercomm); +int MPI_Comm_remote_group(MPI_Comm comm, MPI_Group *group); +int MPI_Comm_remote_size(MPI_Comm comm, int *size); +int MPI_Comm_set_info(MPI_Comm comm, MPI_Info info); +int MPI_Comm_set_name(MPI_Comm comm, const char *comm_name); +int MPI_Comm_spawn(const char *command, char *argv[], int maxprocs, MPI_Info info, int root, MPI_Comm comm, MPI_Comm *intercomm, int array_of_errcodes[]); +int MPI_Comm_spawn_multiple(int count, char *array_of_commands[], char **array_of_argv[], const int array_of_maxprocs[], const MPI_Info array_of_info[], int root, MPI_Comm comm, MPI_Comm *intercomm, int array_of_errcodes[]); +int MPI_Comm_test_inter(MPI_Comm comm, int *flag); +int MPI_Exscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); +int MPI_Iexscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request); + + +int MPI_Igather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request *request); +int MPI_Igatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request *request); +int MPI_Get_version(int *version, int *subversion); +int MPI_Graph_create(MPI_Comm comm_old, int nnodes, const int index[], const int edges[], int reorder, MPI_Comm *comm_graph); +int MPI_Graph_get(MPI_Comm comm, int maxindex, int maxedges, int index[], int edges[]); +int MPI_Graph_map(MPI_Comm comm, int nnodes, const int index[], const int edges[], int *newrank); +int MPI_Graph_neighbors_count(MPI_Comm comm, int rank, int *nneighbors); +int MPI_Graph_neighbors(MPI_Comm comm, int rank, int maxneighbors, int neighbors[]); +int MPI_Graphdims_get(MPI_Comm comm, int *nnodes, int *nedges); + +int MPI_Group_difference(MPI_Group group1, MPI_Group group2, MPI_Group *newgroup); +int MPI_Group_excl(MPI_Group group, int n, const int ranks[], MPI_Group *newgroup); + +int MPI_Group_intersection(MPI_Group group1, MPI_Group group2, MPI_Group *newgroup); +int MPI_Group_range_excl(MPI_Group group, int n, int ranges[][3], MPI_Group *newgroup); +int MPI_Group_range_incl(MPI_Group group, int n, int ranges[][3], MPI_Group *newgroup); +int MPI_Group_union(MPI_Group group1, MPI_Group group2, MPI_Group *newgroup); +int MPI_Ibsend(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request); + +int MPI_Info_create(MPI_Info *info); +int MPI_Info_delete(MPI_Info info, const char *key); +int MPI_Info_dup(MPI_Info info, MPI_Info *newinfo); + +int MPI_Info_free(MPI_Info *info); +int MPI_Info_get(MPI_Info info, const char *key, int valuelen, char *value, int *flag); +int MPI_Info_get_nkeys(MPI_Info info, int *nkeys); +int MPI_Info_get_nthkey(MPI_Info info, int n, char *key); +int MPI_Info_get_valuelen(MPI_Info info, const char *key, int *valuelen, int *flag); +int MPI_Info_set(MPI_Info info, const char *key, const char *value); +int MPI_Intercomm_create(MPI_Comm local_comm, int local_leader, MPI_Comm bridge_comm, int remote_leader, int tag, MPI_Comm *newintercomm); +int MPI_Intercomm_merge(MPI_Comm intercomm, int high, MPI_Comm *newintercomm); +int MPI_Irsend(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request); +int MPI_Issend(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request); +int MPI_Is_thread_main(int *flag); +int MPI_Lookup_name(const char *service_name, MPI_Info info, char *port_name); + + +int MPI_Neighbor_allgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm); +int MPI_Ineighbor_allgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm, MPI_Request *request); +int MPI_Neighbor_allgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, MPI_Comm comm); +int MPI_Ineighbor_allgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, MPI_Comm comm, MPI_Request *request); +int MPI_Neighbor_alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm); +int MPI_Ineighbor_alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm, MPI_Request *request); +int MPI_Neighbor_alltoallv(const void *sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, MPI_Comm comm); +int MPI_Ineighbor_alltoallv(const void *sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, MPI_Comm comm, MPI_Request *request); +int MPI_Neighbor_alltoallw(const void *sendbuf, const int sendcounts[], const MPI_Aint sdispls[], const MPI_Datatype sendtypes[], void *recvbuf, const int recvcounts[], const MPI_Aint rdispls[], const MPI_Datatype recvtypes[], MPI_Comm comm); +int MPI_Ineighbor_alltoallw(const void *sendbuf, const int sendcounts[], const MPI_Aint sdispls[], const MPI_Datatype sendtypes[], void *recvbuf, const int recvcounts[], const MPI_Aint rdispls[], const MPI_Datatype recvtypes[], MPI_Comm comm, MPI_Request *request); + +int MPI_Op_commutative(MPI_Op op, int *commute); +int MPI_Open_port(MPI_Info info, char *port_name); + +int MPI_Pack_external(const char datarep[], const void *inbuf, int incount, MPI_Datatype datatype, void *outbuf, MPI_Aint outsize, MPI_Aint *position); +int MPI_Pack_external_size(const char datarep[], int incount, MPI_Datatype datatype, MPI_Aint *size); +int MPI_Publish_name(const char *service_name, MPI_Info info, const char *port_name); +int MPI_Query_thread(int *provided); +int MPI_Recv_init(void *buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Request *request); +int MPI_Ireduce_scatter(const void *sendbuf, void *recvbuf, const int recvcounts[], MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request); +int MPI_Reduce_scatter_block(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); +int MPI_Ireduce_scatter_block(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request); + + +int MPI_Request_free(MPI_Request *request); +int MPI_Rsend_init(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request); +int MPI_Iscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request); +int MPI_Iscatter(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request *request); +int MPI_Iscatterv(const void *sendbuf, const int sendcounts[], const int displs[], MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request *request); +int MPI_Send_init(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request); +int MPI_Ssend_init(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request); +int MPI_Ssend(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm); +int MPI_Start(MPI_Request *request); +int MPI_Startall(int count, MPI_Request array_of_requests[]); + + +int MPI_Status_set_cancelled(MPI_Status *status, int flag); +int MPI_Status_set_elements(MPI_Status *status, MPI_Datatype datatype, int count); +int MPI_Status_set_elements_x(MPI_Status *status, MPI_Datatype datatype, MPI_Count count); +int MPI_Test_cancelled(const MPI_Status *status, int *flag); +int MPI_Testsome(int incount, MPI_Request array_of_requests[], int *outcount, int array_of_indices[], MPI_Status array_of_statuses[]); + +int MPI_Type_create_darray(int size, int rank, int ndims, const int gsize_array[], const int distrib_array[], const int darg_array[], const int psize_array[], int order, MPI_Datatype oldtype, MPI_Datatype *newtype); +int MPI_Type_create_f90_complex(int p, int r, MPI_Datatype *newtype); +int MPI_Type_create_f90_integer(int r, MPI_Datatype *newtype); +int MPI_Type_create_f90_real(int p, int r, MPI_Datatype *newtype); +int MPI_Type_create_indexed_block(int count, int blocklength, const int array_of_displacements[], MPI_Datatype oldtype, MPI_Datatype *newtype); +int MPI_Type_create_subarray(int ndims, const int size_array[], const int subsize_array[], const int start_array[], int order, MPI_Datatype oldtype, MPI_Datatype *newtype); +int MPI_Type_delete_attr(MPI_Datatype type, int type_keyval); +int MPI_Type_free_keyval(int *type_keyval); + +int MPI_Type_get_attr(MPI_Datatype type, int type_keyval, void *attribute_val, int *flag); +int MPI_Type_get_contents(MPI_Datatype mtype, int max_integers, int max_addresses, int max_datatypes, int array_of_integers[], MPI_Aint array_of_addresses[], MPI_Datatype array_of_datatypes[]); +int MPI_Type_get_envelope(MPI_Datatype type, int *num_integers, int *num_addresses, int *num_datatypes, int *combiner); +int MPI_Type_get_extent_x(MPI_Datatype type, MPI_Count *lb, MPI_Count *extent); +int MPI_Type_get_name(MPI_Datatype type, char *type_name, int *resultlen); +int MPI_Type_get_true_extent(MPI_Datatype datatype, MPI_Aint *true_lb, MPI_Aint *true_extent); +int MPI_Type_get_true_extent_x(MPI_Datatype datatype, MPI_Count *true_lb, MPI_Count *true_extent); +int MPI_Type_match_size(int typeclass, int size, MPI_Datatype *type); +int MPI_Type_set_attr(MPI_Datatype type, int type_keyval, void *attr_val); +int MPI_Type_set_name(MPI_Datatype type, const char *type_name); +int MPI_Unpack(const void *inbuf, int insize, int *position, void *outbuf, int outcount, MPI_Datatype datatype, MPI_Comm comm); +int MPI_Unpublish_name(const char *service_name, MPI_Info info, const char *port_name); +int MPI_Unpack_external (const char datarep[], const void *inbuf, MPI_Aint insize, MPI_Aint *position, void *outbuf, int outcount, MPI_Datatype datatype); +int MPI_Waitsome(int incount, MPI_Request array_of_requests[], int *outcount, int array_of_indices[], MPI_Status array_of_statuses[]); +double MPI_Wtick(void); diff --git a/mpi-proxy-split/mpi-wrappers/mpi_unimplemented_wrappers.txt b/mpi-proxy-split/mpi-wrappers/mpi_unimplemented_wrappers_mpich.txt similarity index 100% rename from mpi-proxy-split/mpi-wrappers/mpi_unimplemented_wrappers.txt rename to mpi-proxy-split/mpi-wrappers/mpi_unimplemented_wrappers_mpich.txt diff --git a/mpi-proxy-split/mpi-wrappers/mpi_unimplemented_wrappers_openmpi.txt b/mpi-proxy-split/mpi-wrappers/mpi_unimplemented_wrappers_openmpi.txt new file mode 100644 index 000000000..5899c44bb --- /dev/null +++ b/mpi-proxy-split/mpi-wrappers/mpi_unimplemented_wrappers_openmpi.txt @@ -0,0 +1,176 @@ +#include ; +#include "jassert.h"; + +int MPI_Add_error_class(int *errorclass); +int MPI_Add_error_code(int errorclass, int *errorcode); +int MPI_Add_error_string(int errorcode, const char *string); +int MPI_Iallgather(const void *sendbuf, int sendcount, + MPI_Datatype sendtype, void *recvbuf, int recvcount, + MPI_Datatype recvtype, MPI_Comm comm, + MPI_Request *request); +int MPI_Iallgatherv(const void *sendbuf, int sendcount, + MPI_Datatype sendtype, void *recvbuf, + const int recvcounts[], const int displs[], + MPI_Datatype recvtype, MPI_Comm comm, + MPI_Request *request); +int MPI_Iallreduce(const void *sendbuf, void *recvbuf, int count, + MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, + MPI_Request *request); +int MPI_Ialltoall(const void *sendbuf, int sendcount, + MPI_Datatype sendtype, void *recvbuf, int recvcount, + MPI_Datatype recvtype, MPI_Comm comm, + MPI_Request *request); +int MPI_Ialltoallv(const void *sendbuf, const int sendcounts[], + const int sdispls[], MPI_Datatype sendtype, + void *recvbuf, const int recvcounts[], + const int rdispls[], MPI_Datatype recvtype, + MPI_Comm comm, MPI_Request *request); +int MPI_Alltoallw(const void *sendbuf, const int sendcounts[], + const int sdispls[], const MPI_Datatype sendtypes[], + void *recvbuf, const int recvcounts[], + const int rdispls[], const MPI_Datatype recvtypes[], + MPI_Comm comm); +int MPI_Ialltoallw(const void *sendbuf, const int sendcounts[], + const int sdispls[], const MPI_Datatype sendtypes[], + void *recvbuf, const int recvcounts[], + const int rdispls[], const MPI_Datatype recvtypes[], + MPI_Comm comm, MPI_Request *request); +int MPI_Bsend(const void *buf, int count, MPI_Datatype datatype, + int dest, int tag, MPI_Comm comm); +int MPI_Bsend_init(const void *buf, int count, MPI_Datatype datatype, + int dest, int tag, MPI_Comm comm, + MPI_Request *request); +int MPI_Buffer_attach(void *buffer, int size); +int MPI_Buffer_detach(void *buffer, int *size); +int MPI_Cancel(MPI_Request *request); +int MPI_Close_port(const char *port_name); +int MPI_Comm_accept(const char *port_name, MPI_Info info, int root, MPI_Comm comm, MPI_Comm *newcomm); + +int MPI_Comm_call_errhandler(MPI_Comm comm, int errorcode); +int MPI_Comm_connect(const char *port_name, MPI_Info info, int root, MPI_Comm comm, MPI_Comm *newcomm); +int MPI_Comm_disconnect(MPI_Comm *comm); +int MPI_Comm_idup(MPI_Comm comm, MPI_Comm *newcomm, MPI_Request *request); +int MPI_Comm_dup_with_info(MPI_Comm comm, MPI_Info info, MPI_Comm *newcomm); +int MPI_Dist_graph_create(MPI_Comm comm_old, int n, const int nodes[], const int degrees[], const int targets[], const int weights[], MPI_Info info, int reorder, MPI_Comm * newcomm); +int MPI_Dist_graph_create_adjacent(MPI_Comm comm_old, int indegree, const int sources[], const int sourceweights[], int outdegree, const int destinations[], const int destweights[], MPI_Info info, int reorder, MPI_Comm *comm_dist_graph); +int MPI_Dist_graph_neighbors(MPI_Comm comm, int maxindegree, int sources[], int sourceweights[], int maxoutdegree, int destinations[], int destweights[]); +int MPI_Dist_graph_neighbors_count(MPI_Comm comm, int *inneighbors, int *outneighbors, int *weighted); +int MPI_Comm_get_errhandler(MPI_Comm comm, MPI_Errhandler *erhandler); +int MPI_Comm_get_info(MPI_Comm comm, MPI_Info *info_used); +int MPI_Comm_get_name(MPI_Comm comm, char *comm_name, int *resultlen); +int MPI_Comm_get_parent(MPI_Comm *parent); +int MPI_Comm_join(int fd, MPI_Comm *intercomm); +int MPI_Comm_remote_group(MPI_Comm comm, MPI_Group *group); +int MPI_Comm_remote_size(MPI_Comm comm, int *size); +int MPI_Comm_set_info(MPI_Comm comm, MPI_Info info); +int MPI_Comm_set_name(MPI_Comm comm, const char *comm_name); +int MPI_Comm_spawn(const char *command, char *argv[], int maxprocs, MPI_Info info, int root, MPI_Comm comm, MPI_Comm *intercomm, int array_of_errcodes[]); +int MPI_Comm_spawn_multiple(int count, char *array_of_commands[], char **array_of_argv[], const int array_of_maxprocs[], const MPI_Info array_of_info[], int root, MPI_Comm comm, MPI_Comm *intercomm, int array_of_errcodes[]); +int MPI_Comm_test_inter(MPI_Comm comm, int *flag); +int MPI_Exscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); +int MPI_Iexscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request); + + +int MPI_Igather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request *request); +int MPI_Igatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request *request); +int MPI_Get_version(int *version, int *subversion); +int MPI_Graph_create(MPI_Comm comm_old, int nnodes, const int index[], const int edges[], int reorder, MPI_Comm *comm_graph); +int MPI_Graph_get(MPI_Comm comm, int maxindex, int maxedges, int index[], int edges[]); +int MPI_Graph_map(MPI_Comm comm, int nnodes, const int index[], const int edges[], int *newrank); +int MPI_Graph_neighbors_count(MPI_Comm comm, int rank, int *nneighbors); +int MPI_Graph_neighbors(MPI_Comm comm, int rank, int maxneighbors, int neighbors[]); +int MPI_Graphdims_get(MPI_Comm comm, int *nnodes, int *nedges); + +int MPI_Group_difference(MPI_Group group1, MPI_Group group2, MPI_Group *newgroup); +int MPI_Group_excl(MPI_Group group, int n, const int ranks[], MPI_Group *newgroup); + +int MPI_Group_intersection(MPI_Group group1, MPI_Group group2, MPI_Group *newgroup); +int MPI_Group_range_excl(MPI_Group group, int n, int ranges[][3], MPI_Group *newgroup); +int MPI_Group_range_incl(MPI_Group group, int n, int ranges[][3], MPI_Group *newgroup); +int MPI_Group_union(MPI_Group group1, MPI_Group group2, MPI_Group *newgroup); +int MPI_Ibsend(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request); + +int MPI_Info_create(MPI_Info *info); +int MPI_Info_delete(MPI_Info info, const char *key); +int MPI_Info_dup(MPI_Info info, MPI_Info *newinfo); + +int MPI_Info_free(MPI_Info *info); +int MPI_Info_get(MPI_Info info, const char *key, int valuelen, char *value, int *flag); +int MPI_Info_get_nkeys(MPI_Info info, int *nkeys); +int MPI_Info_get_nthkey(MPI_Info info, int n, char *key); +int MPI_Info_get_valuelen(MPI_Info info, const char *key, int *valuelen, int *flag); +int MPI_Info_set(MPI_Info info, const char *key, const char *value); +int MPI_Intercomm_create(MPI_Comm local_comm, int local_leader, MPI_Comm bridge_comm, int remote_leader, int tag, MPI_Comm *newintercomm); +int MPI_Intercomm_merge(MPI_Comm intercomm, int high, MPI_Comm *newintercomm); +int MPI_Irsend(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request); +int MPI_Issend(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request); +int MPI_Is_thread_main(int *flag); +int MPI_Lookup_name(const char *service_name, MPI_Info info, char *port_name); + + +int MPI_Neighbor_allgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm); +int MPI_Ineighbor_allgather(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm, MPI_Request *request); +int MPI_Neighbor_allgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, MPI_Comm comm); +int MPI_Ineighbor_allgatherv(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int displs[], MPI_Datatype recvtype, MPI_Comm comm, MPI_Request *request); +int MPI_Neighbor_alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm); +int MPI_Ineighbor_alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm, MPI_Request *request); +int MPI_Neighbor_alltoallv(const void *sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, MPI_Comm comm); +int MPI_Ineighbor_alltoallv(const void *sendbuf, const int sendcounts[], const int sdispls[], MPI_Datatype sendtype, void *recvbuf, const int recvcounts[], const int rdispls[], MPI_Datatype recvtype, MPI_Comm comm, MPI_Request *request); +int MPI_Neighbor_alltoallw(const void *sendbuf, const int sendcounts[], const MPI_Aint sdispls[], const MPI_Datatype sendtypes[], void *recvbuf, const int recvcounts[], const MPI_Aint rdispls[], const MPI_Datatype recvtypes[], MPI_Comm comm); +int MPI_Ineighbor_alltoallw(const void *sendbuf, const int sendcounts[], const MPI_Aint sdispls[], const MPI_Datatype sendtypes[], void *recvbuf, const int recvcounts[], const MPI_Aint rdispls[], const MPI_Datatype recvtypes[], MPI_Comm comm, MPI_Request *request); + +int MPI_Op_commutative(MPI_Op op, int *commute); +int MPI_Open_port(MPI_Info info, char *port_name); + +int MPI_Pack_external(const char datarep[], const void *inbuf, int incount, MPI_Datatype datatype, void *outbuf, MPI_Aint outsize, MPI_Aint *position); +int MPI_Pack_external_size(const char datarep[], int incount, MPI_Datatype datatype, MPI_Aint *size); +int MPI_Publish_name(const char *service_name, MPI_Info info, const char *port_name); +int MPI_Query_thread(int *provided); +int MPI_Recv_init(void *buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Request *request); +int MPI_Ireduce_scatter(const void *sendbuf, void *recvbuf, const int recvcounts[], MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request); +int MPI_Reduce_scatter_block(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); +int MPI_Ireduce_scatter_block(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request); + + +int MPI_Request_free(MPI_Request *request); +int MPI_Rsend_init(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request); +int MPI_Iscan(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request); +int MPI_Iscatter(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request *request); +int MPI_Iscatterv(const void *sendbuf, const int sendcounts[], const int displs[], MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm, MPI_Request *request); +int MPI_Send_init(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request); +int MPI_Ssend_init(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request); +int MPI_Ssend(const void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm); +int MPI_Start(MPI_Request *request); +int MPI_Startall(int count, MPI_Request array_of_requests[]); + + +int MPI_Status_set_cancelled(MPI_Status *status, int flag); +int MPI_Status_set_elements(MPI_Status *status, MPI_Datatype datatype, int count); +int MPI_Status_set_elements_x(MPI_Status *status, MPI_Datatype datatype, MPI_Count count); +int MPI_Test_cancelled(const MPI_Status *status, int *flag); +int MPI_Testsome(int incount, MPI_Request array_of_requests[], int *outcount, int array_of_indices[], MPI_Status array_of_statuses[]); + +int MPI_Type_create_darray(int size, int rank, int ndims, const int gsize_array[], const int distrib_array[], const int darg_array[], const int psize_array[], int order, MPI_Datatype oldtype, MPI_Datatype *newtype); +int MPI_Type_create_f90_complex(int p, int r, MPI_Datatype *newtype); +int MPI_Type_create_f90_integer(int r, MPI_Datatype *newtype); +int MPI_Type_create_f90_real(int p, int r, MPI_Datatype *newtype); +int MPI_Type_create_indexed_block(int count, int blocklength, const int array_of_displacements[], MPI_Datatype oldtype, MPI_Datatype *newtype); +int MPI_Type_create_subarray(int ndims, const int size_array[], const int subsize_array[], const int start_array[], int order, MPI_Datatype oldtype, MPI_Datatype *newtype); +int MPI_Type_delete_attr(MPI_Datatype type, int type_keyval); +int MPI_Type_free_keyval(int *type_keyval); + +int MPI_Type_get_attr(MPI_Datatype type, int type_keyval, void *attribute_val, int *flag); +int MPI_Type_get_contents(MPI_Datatype mtype, int max_integers, int max_addresses, int max_datatypes, int array_of_integers[], MPI_Aint array_of_addresses[], MPI_Datatype array_of_datatypes[]); +int MPI_Type_get_envelope(MPI_Datatype type, int *num_integers, int *num_addresses, int *num_datatypes, int *combiner); +int MPI_Type_get_extent_x(MPI_Datatype type, MPI_Count *lb, MPI_Count *extent); +int MPI_Type_get_name(MPI_Datatype type, char *type_name, int *resultlen); +int MPI_Type_get_true_extent(MPI_Datatype datatype, MPI_Aint *true_lb, MPI_Aint *true_extent); +int MPI_Type_get_true_extent_x(MPI_Datatype datatype, MPI_Count *true_lb, MPI_Count *true_extent); +int MPI_Type_match_size(int typeclass, int size, MPI_Datatype *type); +int MPI_Type_set_attr(MPI_Datatype type, int type_keyval, void *attr_val); +int MPI_Type_set_name(MPI_Datatype type, const char *type_name); +int MPI_Unpack(const void *inbuf, int insize, int *position, void *outbuf, int outcount, MPI_Datatype datatype, MPI_Comm comm); +int MPI_Unpublish_name(const char *service_name, MPI_Info info, const char *port_name); +int MPI_Unpack_external (const char datarep[], const void *inbuf, MPI_Aint insize, MPI_Aint *position, void *outbuf, int outcount, MPI_Datatype datatype); +int MPI_Waitsome(int incount, MPI_Request array_of_requests[], int *outcount, int array_of_indices[], MPI_Status array_of_statuses[]); +double MPI_Wtick(void); From e8dfb6dc85509d241d044c140165a395233dd56b Mon Sep 17 00:00:00 2001 From: Leonid Belyaev Date: Wed, 20 Sep 2023 22:36:44 -0400 Subject: [PATCH 32/49] MPI-specific fortran wrappers --- ...ers.txt => mpi_fortran_wrappers_mpich.txt} | 0 .../mpi_fortran_wrappers_openmpi.txt | 127 ++++++++++++++++++ 2 files changed, 127 insertions(+) rename mpi-proxy-split/mpi-wrappers/{mpi_fortran_wrappers.txt => mpi_fortran_wrappers_mpich.txt} (100%) create mode 100644 mpi-proxy-split/mpi-wrappers/mpi_fortran_wrappers_openmpi.txt diff --git a/mpi-proxy-split/mpi-wrappers/mpi_fortran_wrappers.txt b/mpi-proxy-split/mpi-wrappers/mpi_fortran_wrappers_mpich.txt similarity index 100% rename from mpi-proxy-split/mpi-wrappers/mpi_fortran_wrappers.txt rename to mpi-proxy-split/mpi-wrappers/mpi_fortran_wrappers_mpich.txt diff --git a/mpi-proxy-split/mpi-wrappers/mpi_fortran_wrappers_openmpi.txt b/mpi-proxy-split/mpi-wrappers/mpi_fortran_wrappers_openmpi.txt new file mode 100644 index 000000000..841eee847 --- /dev/null +++ b/mpi-proxy-split/mpi-wrappers/mpi_fortran_wrappers_openmpi.txt @@ -0,0 +1,127 @@ +#include ; + +#include "dmtcp.h"; +#include "jassert.h"; + +int MPI_Finalize(); +int MPI_Finalized(int* flag); +int MPI_Get_processor_name(char* name, int* resultlen); +double MPI_Wtime(); +int MPI_Initialized(int* flag); +int MPI_Get_count(const MPI_Status* status, MPI_Datatype datatype, int* count); + +int MPI_Bcast(void* buffer, int count, MPI_Datatype datatype, int root, MPI_Comm comm); +int MPI_Barrier(MPI_Comm comm); +int MPI_Allreduce(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); +int MPI_Reduce(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm); +int MPI_Reduce_local(const void* inbuf, void* inoutbuf, int count, MPI_Datatype datatype, MPI_Op op); +int MPI_Reduce_scatter(const void* sendbuf, void* recvbuf, const int* recvcounts, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); +int MPI_Alltoall(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm); +int MPI_Alltoallv(const void* sendbuf, const int* sendcounts, const int* sdispls, MPI_Datatype sendtype, void* recvbuf, const int* recvcounts, const int* rdispls, MPI_Datatype recvtype, MPI_Comm comm); +int MPI_Allgather(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm); +int MPI_Allgatherv(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, const int* recvcount, const int* displs, MPI_Datatype recvtype, MPI_Comm comm); +int MPI_Gather(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm); +int MPI_Gatherv(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, const int* recvcounts, const int* displs, MPI_Datatype recvtype, int root, MPI_Comm comm); +int MPI_Scatter(const void* sendbuf, int sendcount, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm); +int MPI_Scatterv(const void* sendbuf, const int* sendcounts, const int* displs, MPI_Datatype sendtype, void* recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm); +int MPI_Scan(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); + +int MPI_Comm_size(MPI_Comm comm, int* world_size); +int MPI_Comm_rank(MPI_Comm comm, int* world_rank); +int MPI_Abort(MPI_Comm comm, int errorcode); +int MPI_Comm_split(MPI_Comm comm, int color, int key, MPI_Comm* newcomm); +int MPI_Comm_dup(MPI_Comm comm, MPI_Comm* newcomm); +int MPI_Comm_create(MPI_Comm comm, MPI_Group group, MPI_Comm* newcomm); +int MPI_Comm_compare(MPI_Comm comm1, MPI_Comm comm2, int* result); +int MPI_Comm_free(MPI_Comm* comm); +int MPI_Comm_set_errhandler(MPI_Comm comm, MPI_Errhandler errhandler); +int MPI_Topo_test(MPI_Comm comm, int* status); +int MPI_Comm_split_type(MPI_Comm comm, int split_type, int key, MPI_Info info, MPI_Comm* newcomm); +int MPI_Comm_create_keyval(MPI_Comm_copy_attr_function* comm_copy_attr_fn, MPI_Comm_delete_attr_function* comm_delete_attr_fn, int* comm_keyval, void* extra_state); +int MPI_Comm_free_keyval(int* comm_keyval); +int MPI_Comm_create_group(MPI_Comm comm, MPI_Group group, int tag, MPI_Comm* newcomm); + +int MPI_Cart_coords(MPI_Comm comm, int rank, int maxdims, int* coords); +int MPI_Cart_create(MPI_Comm old_comm, int ndims, const int* dims, const int* periods, int reorder, MPI_Comm* comm_cart); +int MPI_Cart_get(MPI_Comm comm, int maxdims, int* dims, int* periods, int* coords); +int MPI_Cart_map(MPI_Comm comm, int ndims, const int* dims, const int* periods, int* newrank); +int MPI_Cart_rank(MPI_Comm comm, const int* coords, int* rank); +int MPI_Cart_shift(MPI_Comm comm, int direction, int disp, int* rank_source, int* rank_dest); +int MPI_Cart_sub(MPI_Comm comm, const int* remain_dims, MPI_Comm* new_comm); +int MPI_Cartdim_get(MPI_Comm comm, int* ndims); +int MPI_Dims_create(int nnodes, int ndims, int* dims); + +int MPI_Test(MPI_Request* request, int* flag, MPI_Status* status); +int MPI_Wait(MPI_Request* request, MPI_Status* status); +int MPI_Iprobe(int source, int tag, MPI_Comm comm, int* flag, MPI_Status* status); +int MPI_Probe(int source, int tag, MPI_Comm comm, MPI_Status* status); +int MPI_Waitall(int count, MPI_Request* array_of_requests, MPI_Status* array_of_statuses); +int MPI_Waitany(int count, MPI_Request* array_of_requests, int* index, MPI_Status* status); +int MPI_Testall(int count, MPI_Request* array_of_requests, int* flag, MPI_Status* array_of_statuses); +int MPI_Testany(int count, MPI_Request* array_of_requests, int* index, int* flag, MPI_Status* status); + +int MPI_Comm_group(MPI_Comm comm, MPI_Group* group); +int MPI_Group_size(MPI_Group group, int* size); +int MPI_Group_free(MPI_Group* group); +int MPI_Group_compare(MPI_Group group1, MPI_Group group2, int* result); +int MPI_Group_rank(MPI_Group group, int* rank); +int MPI_Group_incl(MPI_Group group, int n, const int* ranks, MPI_Group* newgroup); + +int MPI_Type_size(MPI_Datatype datatype, int* size); +int MPI_Type_commit(MPI_Datatype* type); +int MPI_Type_contiguous(int count, MPI_Datatype oldtype, MPI_Datatype* newtype); +int MPI_Type_free(MPI_Datatype* type); +int MPI_Type_vector(int count, int blocklength, int stride, MPI_Datatype oldtype, MPI_Datatype* newtype); +int MPI_Type_create_struct(int count, const int* array_of_blocklengths, const MPI_Aint* array_of_displacements, MPI_Datatype* array_of_types, MPI_Datatype* newtype); +int MPI_Type_indexed(int count, const int* array_of_blocklengths, const int* array_of_displacements, MPI_Datatype oldtype, MPI_Datatype* newtype); +int MPI_Type_get_extent(MPI_Datatype type, MPI_Aint* lb, MPI_Aint* extent); +int MPI_Type_create_hvector(int count, int blocklength, MPI_Aint stride, MPI_Datatype oldtype, MPI_Datatype* newtype); +int MPI_Type_create_hindexed(int count, const int* array_of_blocklengths, const MPI_Aint* array_of_displacements, MPI_Datatype oldtype, MPI_Datatype* newtype); +int MPI_Type_create_hindexed_block(int count, int blocklength, const MPI_Aint* array_of_displacements, MPI_Datatype oldtype, MPI_Datatype* newtype); +int MPI_Type_create_resized(MPI_Datatype oldtype, MPI_Aint lb, MPI_Aint extent, MPI_Datatype* newtype); +int MPI_Type_dup(MPI_Datatype type, MPI_Datatype* newtype); +int MPI_Pack_size(int incount, MPI_Datatype datatype, MPI_Comm comm, int* size); +int MPI_Pack(const void* inbuf, int incount, MPI_Datatype datatype, void* outbuf, int outsize, int* position, MPI_Comm comm); + +int MPI_Op_create(MPI_User_function* user_fn, int commute, MPI_Op* op); +int MPI_Op_free(MPI_Op* op); + +int MPI_Send(const void* buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm); +int MPI_Isend(const void* buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request* request); +int MPI_Recv(void* buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Status* status); +int MPI_Irecv(void* buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Request* request); +int MPI_Sendrecv(const void* sendbuf, int sendcount, MPI_Datatype sendtype, int dest, int sendtag, void* recvbuf, int recvcount, MPI_Datatype recvtype, int source, int recvtag, MPI_Comm comm, MPI_Status* status); +int MPI_Sendrecv_replace(void* buf, int count, MPI_Datatype datatype, int dest, int sendtag, int source, int recvtag, MPI_Comm comm, MPI_Status* status); +int MPI_Rsend(const void* ibuf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm); +int MPI_Ibarrier(MPI_Comm comm, MPI_Request* request); +int MPI_Ibcast(void* buffer, int count, MPI_Datatype datatype, int root, MPI_Comm comm, MPI_Request* request); +int MPI_Ireduce(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm, MPI_Request* request); + +int MPI_Group_translate_ranks(MPI_Group group1, int n, const int* ranks1, MPI_Group group2, int* ranks2); +int MPI_Alloc_mem(MPI_Aint size, MPI_Info info, void* baseptr); +int MPI_Free_mem(void* base); + +int MPI_Error_string(int errorcode, char* string, int* resultlen); + +int MPI_File_open(MPI_Comm comm, const char* filename, int amode, MPI_Info info, MPI_File* fh); +int MPI_File_get_atomicity(MPI_File fh, int* flag); +int MPI_File_set_atomicity(MPI_File fh, int flag); +int MPI_File_set_size(MPI_File fh, MPI_Offset size); +int MPI_File_get_size(MPI_File fh, MPI_Offset* size); +int MPI_File_set_view(MPI_File fh, MPI_Offset disp, MPI_Datatype etype, MPI_Datatype filetype, const char* datarep, MPI_Info info); +int MPI_File_get_view(MPI_File fh, MPI_Offset* disp, MPI_Datatype* etype, MPI_Datatype* filetype, char* datarep); +int MPI_File_read(MPI_File fh, void* buf, int count, MPI_Datatype datatype, MPI_Status* status); +int MPI_File_read_at(MPI_File fh, MPI_Offset offset, void* buf, int count, MPI_Datatype datatype, MPI_Status* status); +int MPI_File_read_at_all(MPI_File fh, MPI_Offset offset, void* buf, int count, MPI_Datatype datatype, MPI_Status* status); +int MPI_File_write(MPI_File fh, const void* buf, int count, MPI_Datatype datatype, MPI_Status* status); +int MPI_File_write_at(MPI_File fh, MPI_Offset offset, const void* buf, int count, MPI_Datatype datatype, MPI_Status* status); +int MPI_File_write_at_all(MPI_File fh, MPI_Offset offset, const void* buf, int count, MPI_Datatype datatype, MPI_Status* status); +int MPI_File_sync(MPI_File fh); +int MPI_File_get_position(MPI_File fh, MPI_Offset* offset); +int MPI_File_seek(MPI_File fh, MPI_Offset offset, int whence); +int MPI_File_close(MPI_File* fh); +int MPI_File_set_errhandler(MPI_File fh, MPI_Errhandler errhandler); +int MPI_File_get_errhandler(MPI_File fh, MPI_Errhandler* errhandler); +int MPI_File_delete(const char* filename, MPI_Info info); +int MPI_Get_library_version(char* version, int* resultlen); +int MPI_Get_address(const void* location, MPI_Aint* address); From e41752981bd65bd3fb67f60705ecd2c5834cc5ce Mon Sep 17 00:00:00 2001 From: Leonid Belyaev Date: Wed, 20 Sep 2023 22:38:03 -0400 Subject: [PATCH 33/49] Remove extra tokens at endif directive --- mpi-proxy-split/mpi-wrappers/mpi_collective_wrappers.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mpi-proxy-split/mpi-wrappers/mpi_collective_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_collective_wrappers.cpp index d206bffb3..8381e5d1f 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_collective_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_collective_wrappers.cpp @@ -552,7 +552,7 @@ USER_DEFINED_WRAPPER(int, Scatterv, (const void *) sendbuf, if (recvbuf == FORTRAN_MPI_IN_PLACE) { recvbuf = MPI_IN_PLACE; } -#endif EXAMPI +#endif DMTCP_PLUGIN_DISABLE_CKPT(); MPI_Comm realComm = VIRTUAL_TO_REAL_COMM(comm); MPI_Datatype realSendType = VIRTUAL_TO_REAL_TYPE(sendtype); From 75f4f2ee2fd9c6a4c5c201bc593bc800104eace9 Mon Sep 17 00:00:00 2001 From: Leonid Belyaev Date: Wed, 20 Sep 2023 23:41:36 -0400 Subject: [PATCH 34/49] Fix trailing spaces --- mpi-proxy-split/mpi-wrappers/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mpi-proxy-split/mpi-wrappers/Makefile b/mpi-proxy-split/mpi-wrappers/Makefile index 5e27920b6..d29eb250e 100644 --- a/mpi-proxy-split/mpi-wrappers/Makefile +++ b/mpi-proxy-split/mpi-wrappers/Makefile @@ -52,8 +52,8 @@ ${LIBNAME}.a: ${LIBWRAPPER_OBJS} ar cr $@ $^ mpi_unimplemented_wrappers.cpp: generate-mpi-unimplemented-wrappers.py \ - mpi_unimplemented_wrappers_mpich.txt \ - mpi_unimplemented_wrappers_openmpi.txt \ + mpi_unimplemented_wrappers_mpich.txt \ + mpi_unimplemented_wrappers_openmpi.txt \ mpi_unimplemented_wrappers_exampi.txt if mpiexec -h | grep -q 'mpich'; then python generate-mpi-unimplemented-wrappers.py mpi_unimplemented_wrappers_mpich.txt > $@ From 7566a80c8978411edcc68bcfcbe59d93f9cf4384 Mon Sep 17 00:00:00 2001 From: Leonid Belyaev Date: Wed, 20 Sep 2023 23:42:44 -0400 Subject: [PATCH 35/49] Fix grep --- mpi-proxy-split/mpi-wrappers/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mpi-proxy-split/mpi-wrappers/Makefile b/mpi-proxy-split/mpi-wrappers/Makefile index d29eb250e..92e46a1e1 100644 --- a/mpi-proxy-split/mpi-wrappers/Makefile +++ b/mpi-proxy-split/mpi-wrappers/Makefile @@ -57,9 +57,9 @@ mpi_unimplemented_wrappers.cpp: generate-mpi-unimplemented-wrappers.py \ mpi_unimplemented_wrappers_exampi.txt if mpiexec -h | grep -q 'mpich'; then python generate-mpi-unimplemented-wrappers.py mpi_unimplemented_wrappers_mpich.txt > $@ - elif mpixec -h | grep -q 'openmpi'; then + elif mpixec -h | grep -q 'open-mpi'; then python generate-mpi-unimplemented-wrappers.py mpi_unimplemented_wrappers_openmpi.txt > $@ - elif mpiexec -h | grep -q 'exampi'; then + elif mpiexec -h | grep -q 'ExaMPI'; then python generate-mpi-unimplemented-wrappers.py mpi_unimplemented_wrappers_exampi.txt > $@ else $(error The MPI flavor could not be identified.) From 24a86b9fe814b1e8f4511d26a494d26e6f596035 Mon Sep 17 00:00:00 2001 From: Leonid Belyaev Date: Thu, 21 Sep 2023 00:02:42 -0400 Subject: [PATCH 36/49] Fixup multiline --- mpi-proxy-split/mpi-wrappers/Makefile | 37 +++++++++++++-------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/mpi-proxy-split/mpi-wrappers/Makefile b/mpi-proxy-split/mpi-wrappers/Makefile index 92e46a1e1..97f3b18b8 100644 --- a/mpi-proxy-split/mpi-wrappers/Makefile +++ b/mpi-proxy-split/mpi-wrappers/Makefile @@ -52,20 +52,19 @@ ${LIBNAME}.a: ${LIBWRAPPER_OBJS} ar cr $@ $^ mpi_unimplemented_wrappers.cpp: generate-mpi-unimplemented-wrappers.py \ - mpi_unimplemented_wrappers_mpich.txt \ - mpi_unimplemented_wrappers_openmpi.txt \ - mpi_unimplemented_wrappers_exampi.txt - if mpiexec -h | grep -q 'mpich'; then - python generate-mpi-unimplemented-wrappers.py mpi_unimplemented_wrappers_mpich.txt > $@ - elif mpixec -h | grep -q 'open-mpi'; then - python generate-mpi-unimplemented-wrappers.py mpi_unimplemented_wrappers_openmpi.txt > $@ - elif mpiexec -h | grep -q 'ExaMPI'; then - python generate-mpi-unimplemented-wrappers.py mpi_unimplemented_wrappers_exampi.txt > $@ - else - $(error The MPI flavor could not be identified.) - fi - - + mpi_unimplemented_wrappers_mpich.txt \ + mpi_unimplemented_wrappers_openmpi.txt \ + mpi_unimplemented_wrappers_exampi.txt + + if mpiexec -h | grep -q 'mpich'; then \ + python3 generate-mpi-unimplemented-wrappers.py mpi_unimplemented_wrappers_mpich.txt > $@; \ + elif mpiexec -h | grep -q 'open-mpi'; then \ + python3 generate-mpi-unimplemented-wrappers.py mpi_unimplemented_wrappers_openmpi.txt > $@; \ + elif mpiexec -h | grep -q 'ExaMPI'; then \ + python3 generate-mpi-unimplemented-wrappers.py mpi_unimplemented_wrappers_exampi.txt > $@; \ + else \ + $(error Could not identify the MPICH flavor); \ + fi .c.o: ${MPICC} ${CFLAGS} -c -o $@ $< @@ -114,19 +113,19 @@ mpi_stub_wrappers.c: generate-mpi-stub-wrappers.py mpi_stub_wrappers.txt mpi_fortran_wrappers.cpp: generate-mpi-fortran-wrappers.py \ mpi_fortran_wrappers_openmpi.txt \ - mpi_fortran_wrappers_mpich.txt \ + mpi_fortran_wrappers_mpich.txt rm -f $@ tmp=$@.tmp.$$$$ ; \ printf "%s\n\n" \ "// *** THIS FILE IS AUTO-GENERATED! DO 'make' TO UPDATE. ***" >$$tmp;\ if mpiexec -h | grep -q 'mpich'; then - python generate-mpi-fortran-wrappers.py mpi_fortran_wrappers_mpich.txt > $@ + python generate-mpi-fortran-wrappers.py mpi_fortran_wrappers_mpich.txt > $@; elif mpixec -h | grep -q 'open-mpi'; then - python generate-mpi-fortran-wrappers.py mpi_fortran_wrappers_openmpi.txt > $@ + python generate-mpi-fortran-wrappers.py mpi_fortran_wrappers_openmpi.txt > $@; elif mpiexec -h | grep -q 'ExaMPI'; then - $(error ExaMPI has no fortran support.) + $(error ExaMPI has no fortran support.); else - $(error The MPI flavor could not be identified.) + $(error The MPI flavor could not be identified.); fi mpi_stub_wrappers.o: mpi_stub_wrappers.c From 4f1a3681c4d2a31c909e9d5901986b7bfc170cb6 Mon Sep 17 00:00:00 2001 From: Leonid Belyaev Date: Thu, 21 Sep 2023 00:14:11 -0400 Subject: [PATCH 37/49] Change error to exit --- mpi-proxy-split/mpi-wrappers/Makefile | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/mpi-proxy-split/mpi-wrappers/Makefile b/mpi-proxy-split/mpi-wrappers/Makefile index 97f3b18b8..decfaaede 100644 --- a/mpi-proxy-split/mpi-wrappers/Makefile +++ b/mpi-proxy-split/mpi-wrappers/Makefile @@ -52,19 +52,19 @@ ${LIBNAME}.a: ${LIBWRAPPER_OBJS} ar cr $@ $^ mpi_unimplemented_wrappers.cpp: generate-mpi-unimplemented-wrappers.py \ - mpi_unimplemented_wrappers_mpich.txt \ + mpi_unimplemented_wrappers_mpich.txt \ mpi_unimplemented_wrappers_openmpi.txt \ mpi_unimplemented_wrappers_exampi.txt - - if mpiexec -h | grep -q 'mpich'; then \ - python3 generate-mpi-unimplemented-wrappers.py mpi_unimplemented_wrappers_mpich.txt > $@; \ - elif mpiexec -h | grep -q 'open-mpi'; then \ - python3 generate-mpi-unimplemented-wrappers.py mpi_unimplemented_wrappers_openmpi.txt > $@; \ - elif mpiexec -h | grep -q 'ExaMPI'; then \ - python3 generate-mpi-unimplemented-wrappers.py mpi_unimplemented_wrappers_exampi.txt > $@; \ - else \ - $(error Could not identify the MPICH flavor); \ - fi + if mpiexec -h | grep -q 'mpich'; then \ + python3 generate-mpi-unimplemented-wrappers.py mpi_unimplemented_wrappers_mpich.txt > $@; \ + elif mpiexec -h | grep -q 'open-mpi'; then \ + python3 generate-mpi-unimplemented-wrappers.py mpi_unimplemented_wrappers_openmpi.txt > $@; \ + elif mpiexec -h | grep -q 'ExaMPI'; then \ + python3 generate-mpi-unimplemented-wrappers.py mpi_unimplemented_wrappers_exampi.txt > $@; \ + else \ + echo 'Could not identify the MPICH flavor'; \ + exit 1; \ + fi .c.o: ${MPICC} ${CFLAGS} -c -o $@ $< From dd2d5ede8acb7f326d794e8f3b655a763b1ff481 Mon Sep 17 00:00:00 2001 From: Leonid Belyaev Date: Thu, 21 Sep 2023 00:18:28 -0400 Subject: [PATCH 38/49] Fix fortran makefile --- mpi-proxy-split/mpi-wrappers/Makefile | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/mpi-proxy-split/mpi-wrappers/Makefile b/mpi-proxy-split/mpi-wrappers/Makefile index decfaaede..6afb5747c 100644 --- a/mpi-proxy-split/mpi-wrappers/Makefile +++ b/mpi-proxy-split/mpi-wrappers/Makefile @@ -112,20 +112,22 @@ mpi_stub_wrappers.c: generate-mpi-stub-wrappers.py mpi_stub_wrappers.txt python3 $^ >> $$tmp && mv -f $$tmp $@ || (rm -f $$tmp && false) mpi_fortran_wrappers.cpp: generate-mpi-fortran-wrappers.py \ - mpi_fortran_wrappers_openmpi.txt \ + mpi_fortran_wrappers_openmpi.txt \ mpi_fortran_wrappers_mpich.txt rm -f $@ tmp=$@.tmp.$$$$ ; \ printf "%s\n\n" \ "// *** THIS FILE IS AUTO-GENERATED! DO 'make' TO UPDATE. ***" >$$tmp;\ - if mpiexec -h | grep -q 'mpich'; then - python generate-mpi-fortran-wrappers.py mpi_fortran_wrappers_mpich.txt > $@; - elif mpixec -h | grep -q 'open-mpi'; then - python generate-mpi-fortran-wrappers.py mpi_fortran_wrappers_openmpi.txt > $@; - elif mpiexec -h | grep -q 'ExaMPI'; then - $(error ExaMPI has no fortran support.); - else - $(error The MPI flavor could not be identified.); + if mpiexec -h | grep -q 'mpich'; then \ + python generate-mpi-fortran-wrappers.py mpi_fortran_wrappers_mpich.txt > $@; \ + elif mpixec -h | grep -q 'open-mpi'; then \ + python generate-mpi-fortran-wrappers.py mpi_fortran_wrappers_openmpi.txt > $@; \ + elif mpiexec -h | grep -q 'ExaMPI'; then \ + echo 'ExaMPI has no fortran support.'; \ + exit 1; \ + else \ + echo 'The MPI flavor could not be identified.'; \ + exit 1; \ fi mpi_stub_wrappers.o: mpi_stub_wrappers.c From 5aacf1f6b9c9c8916497dd890d088056d18e2186 Mon Sep 17 00:00:00 2001 From: Leonid Belyaev Date: Thu, 21 Sep 2023 00:34:50 -0400 Subject: [PATCH 39/49] Conditional libproxy.h using header file --- mpi-proxy-split/lower-half/libproxy.h | 294 ++++++++++++++++++++++++++ 1 file changed, 294 insertions(+) diff --git a/mpi-proxy-split/lower-half/libproxy.h b/mpi-proxy-split/lower-half/libproxy.h index 9f205e206..d05c1378e 100644 --- a/mpi-proxy-split/lower-half/libproxy.h +++ b/mpi-proxy-split/lower-half/libproxy.h @@ -40,6 +40,7 @@ do { \ #define INFO 2 // Informational logs #define ERROR 1 // Highest error/exception level +#if defined(MPICH) #define FOREACH_FNC(MACRO) \ MACRO(Init), \ MACRO(Finalize), \ @@ -404,6 +405,299 @@ do { \ MACRO(Wtime), \ MACRO(MANA_Internal), \ MACRO(Aint_diff), +#elif defined(OPEN_MPI) +#define FOREACH_FNC(MACRO) \ + MACRO(Init), \ + MACRO(Finalize), \ + MACRO(Send), \ + MACRO(Recv), \ + MACRO(Type_size), \ + MACRO(Iprobe), \ + MACRO(Get_count), \ + MACRO(Isend), \ + MACRO(Irecv), \ + MACRO(Wait), \ + MACRO(Test), \ + MACRO(Bcast), \ + MACRO(Abort), \ + MACRO(Barrier), \ + MACRO(Reduce), \ + MACRO(Allreduce), \ + MACRO(Alltoall), \ + MACRO(Alltoallv), \ + MACRO(Comm_split), \ + MACRO(Add_error_class), \ + MACRO(Add_error_code), \ + MACRO(Add_error_string), \ + MACRO(Allgather), \ + MACRO(Iallgather), \ + MACRO(Allgatherv), \ + MACRO(Iallgatherv), \ + MACRO(Iallreduce), \ + MACRO(Ialltoall), \ + MACRO(Ialltoallv), \ + MACRO(Alltoallw), \ + MACRO(Ialltoallw), \ + MACRO(Bsend), \ + MACRO(Ibcast), \ + MACRO(Bsend_init), \ + MACRO(Cancel), \ + MACRO(Cart_coords), \ + MACRO(Cart_create), \ + MACRO(Cart_get), \ + MACRO(Cart_rank), \ + MACRO(Cart_shift), \ + MACRO(Cart_sub), \ + MACRO(Comm_compare), \ + MACRO(Comm_create_group), \ + MACRO(Comm_create), \ + MACRO(Comm_dup), \ + MACRO(Comm_free), \ + MACRO(Comm_get_name), \ + MACRO(Comm_group), \ + MACRO(Comm_rank), \ + MACRO(Comm_remote_group), \ + MACRO(Comm_remote_size), \ + MACRO(Comm_set_errhandler), \ + MACRO(Comm_set_name), \ + MACRO(Comm_size), \ + MACRO(Comm_split_type), \ + MACRO(Comm_test_inter), \ + MACRO(Error_class), \ + MACRO(Error_string), \ + MACRO(Exscan), \ + MACRO(Iexscan), \ + MACRO(Finalized), \ + MACRO(Gather), \ + MACRO(Igather), \ + MACRO(Gatherv), \ + MACRO(Igatherv), \ + MACRO(Get_address), \ + MACRO(Get_library_version), \ + MACRO(Get_processor_name), \ + MACRO(Get_version), \ + MACRO(Group_compare), \ + MACRO(Group_difference), \ + MACRO(Group_excl), \ + MACRO(Group_free), \ + MACRO(Group_incl), \ + MACRO(Group_intersection), \ + MACRO(Group_rank), \ + MACRO(Group_size), \ + MACRO(Group_translate_ranks), \ + MACRO(Group_union), \ + MACRO(Ibsend), \ + MACRO(Info_create), \ + MACRO(Info_delete), \ + MACRO(Info_dup), \ + MACRO(Info_free), \ + MACRO(Info_get), \ + MACRO(Info_get_nkeys), \ + MACRO(Info_get_nthkey), \ + MACRO(Info_get_valuelen), \ + MACRO(Info_set), \ + MACRO(Initialized), \ + MACRO(Init_thread), \ + MACRO(Irsend), \ + MACRO(Issend), \ + MACRO(Is_thread_main), \ + MACRO(Op_create), \ + MACRO(Op_free), \ + MACRO(Pack), \ + MACRO(Pack_size), \ + MACRO(Probe), \ + MACRO(Recv_init), \ + MACRO(Ireduce), \ + MACRO(Request_free), \ + MACRO(Rsend), \ + MACRO(Rsend_init), \ + MACRO(Scan), \ + MACRO(Iscan), \ + MACRO(Scatter), \ + MACRO(Iscatter), \ + MACRO(Scatterv), \ + MACRO(Iscatterv), \ + MACRO(Send_init), \ + MACRO(Sendrecv), \ + MACRO(Ssend_init), \ + MACRO(Ssend), \ + MACRO(Start), \ + MACRO(Startall), \ + MACRO(Status_set_cancelled), \ + MACRO(Testall), \ + MACRO(Testany), \ + MACRO(Test_cancelled), \ + MACRO(Testsome), \ + MACRO(Topo_test), \ + MACRO(Type_commit), \ + MACRO(Type_contiguous), \ + MACRO(Type_create_hvector), \ + MACRO(Type_create_indexed_block), \ + MACRO(Type_create_struct), \ + MACRO(Type_create_subarray), \ + MACRO(Type_create_resized), \ + MACRO(Type_free), \ + MACRO(Type_get_extent), \ + MACRO(Type_get_name), \ + MACRO(Type_indexed), \ + MACRO(Type_set_name), \ + MACRO(Type_size_x), \ + MACRO(Type_vector), \ + MACRO(Unpack), \ + MACRO(Waitall), \ + MACRO(Waitany), \ + MACRO(Waitsome), \ + MACRO(Wtick), \ + MACRO(Wtime), \ + MACRO(MANA_Internal), +#elif defined(EXAMPI) +#define FOREACH_FNC(MACRO) \ + MACRO(Init), \ + MACRO(Finalize), \ + MACRO(Send), \ + MACRO(Recv), \ + MACRO(Type_size), \ + MACRO(Iprobe), \ + MACRO(Get_count), \ + MACRO(Isend), \ + MACRO(Irecv), \ + MACRO(Wait), \ + MACRO(Test), \ + MACRO(Bcast), \ + MACRO(Abort), \ + MACRO(Barrier), \ + MACRO(Reduce), \ + MACRO(Allreduce), \ + MACRO(Alltoall), \ + MACRO(Alltoallv), \ + MACRO(Comm_split), \ + MACRO(Add_error_class), \ + MACRO(Add_error_code), \ + MACRO(Add_error_string), \ + MACRO(Allgather), \ + MACRO(Iallgather), \ + MACRO(Allgatherv), \ + MACRO(Iallgatherv), \ + MACRO(Iallreduce), \ + MACRO(Ialltoall), \ + MACRO(Ialltoallv), \ + MACRO(Alltoallw), \ + MACRO(Ialltoallw), \ + MACRO(Bsend), \ + MACRO(Ibcast), \ + MACRO(Bsend_init), \ + MACRO(Cancel), \ + MACRO(Cart_coords), \ + MACRO(Cart_create), \ + MACRO(Cart_get), \ + MACRO(Cart_rank), \ + MACRO(Cart_shift), \ + MACRO(Cart_sub), \ + MACRO(Comm_compare), \ + MACRO(Comm_create_group), \ + MACRO(Comm_create), \ + MACRO(Comm_dup), \ + MACRO(Comm_free), \ + MACRO(Comm_get_name), \ + MACRO(Comm_group), \ + MACRO(Comm_rank), \ + MACRO(Comm_remote_group), \ + MACRO(Comm_remote_size), \ + MACRO(Comm_set_errhandler), \ + MACRO(Comm_set_name), \ + MACRO(Comm_size), \ + MACRO(Comm_split_type), \ + MACRO(Comm_test_inter), \ + MACRO(Error_class), \ + MACRO(Error_string), \ + MACRO(Exscan), \ + MACRO(Iexscan), \ + MACRO(Finalized), \ + MACRO(Gather), \ + MACRO(Igather), \ + MACRO(Gatherv), \ + MACRO(Igatherv), \ + MACRO(Get_address), \ + MACRO(Get_library_version), \ + MACRO(Get_processor_name), \ + MACRO(Get_version), \ + MACRO(Group_compare), \ + MACRO(Group_difference), \ + MACRO(Group_excl), \ + MACRO(Group_free), \ + MACRO(Group_incl), \ + MACRO(Group_intersection), \ + MACRO(Group_rank), \ + MACRO(Group_size), \ + MACRO(Group_translate_ranks), \ + MACRO(Group_union), \ + MACRO(Ibsend), \ + MACRO(Info_create), \ + MACRO(Info_delete), \ + MACRO(Info_dup), \ + MACRO(Info_free), \ + MACRO(Info_get), \ + MACRO(Info_get_nkeys), \ + MACRO(Info_get_nthkey), \ + MACRO(Info_get_valuelen), \ + MACRO(Info_set), \ + MACRO(Initialized), \ + MACRO(Init_thread), \ + MACRO(Irsend), \ + MACRO(Issend), \ + MACRO(Is_thread_main), \ + MACRO(Op_create), \ + MACRO(Op_free), \ + MACRO(Pack), \ + MACRO(Pack_size), \ + MACRO(Probe), \ + MACRO(Recv_init), \ + MACRO(Ireduce), \ + MACRO(Request_free), \ + MACRO(Rsend), \ + MACRO(Rsend_init), \ + MACRO(Scan), \ + MACRO(Iscan), \ + MACRO(Scatter), \ + MACRO(Iscatter), \ + MACRO(Scatterv), \ + MACRO(Iscatterv), \ + MACRO(Send_init), \ + MACRO(Sendrecv), \ + MACRO(Ssend_init), \ + MACRO(Ssend), \ + MACRO(Start), \ + MACRO(Startall), \ + MACRO(Status_set_cancelled), \ + MACRO(Testall), \ + MACRO(Testany), \ + MACRO(Test_cancelled), \ + MACRO(Testsome), \ + MACRO(Topo_test), \ + MACRO(Type_commit), \ + MACRO(Type_contiguous), \ + MACRO(Type_create_hvector), \ + MACRO(Type_create_indexed_block), \ + MACRO(Type_create_struct), \ + MACRO(Type_create_subarray), \ + MACRO(Type_create_resized), \ + MACRO(Type_free), \ + MACRO(Type_get_extent), \ + MACRO(Type_get_name), \ + MACRO(Type_indexed), \ + MACRO(Type_set_name), \ + MACRO(Type_size_x), \ + MACRO(Type_vector), \ + MACRO(Unpack), \ + MACRO(Waitall), \ + MACRO(Waitany), \ + MACRO(Waitsome), \ + MACRO(Wtick), \ + MACRO(Wtime), \ + MACRO(MANA_Internal), +#else +#error "Could not find an MPI implementation" +#endif // ifdef MPICH elseif OPEN_MPI elseif EXAMPI #define FOREACH_CONSTANT(MACRO) \ MACRO(GROUP_NULL) \ From 1ef684fa15dbd6e396ab7fa0dd7f50ecb06462df Mon Sep 17 00:00:00 2001 From: Leonid Belyaev Date: Thu, 21 Sep 2023 00:42:35 -0400 Subject: [PATCH 40/49] Include mpi.h --- mpi-proxy-split/lower-half/libproxy.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mpi-proxy-split/lower-half/libproxy.h b/mpi-proxy-split/lower-half/libproxy.h index d05c1378e..8647ee113 100644 --- a/mpi-proxy-split/lower-half/libproxy.h +++ b/mpi-proxy-split/lower-half/libproxy.h @@ -19,6 +19,9 @@ * . * ****************************************************************************/ +// Included to retrieve MPICH, OPEN_MPI, EXAMPI +#include + #ifndef _LIBPROXY_H #define _LIBPROXY_H From bcde8eabc9e197dc3585cd68425da180219bc79e Mon Sep 17 00:00:00 2001 From: Leonid Belyaev Date: Thu, 21 Sep 2023 21:38:05 -0400 Subject: [PATCH 41/49] Conditional cart_wrappers --- mpi-proxy-split/mpi-wrappers/mpi_cart_wrappers.cpp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/mpi-proxy-split/mpi-wrappers/mpi_cart_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_cart_wrappers.cpp index a76396c10..20909b87a 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_cart_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_cart_wrappers.cpp @@ -62,6 +62,7 @@ USER_DEFINED_WRAPPER(int, Cart_get, (MPI_Comm) comm, (int) maxdims, return retval; } +#if defined(MPICH) USER_DEFINED_WRAPPER(int, Cart_map, (MPI_Comm) comm, (int) ndims, (const int*) dims, (const int*) periods, (int *) newrank) { @@ -79,6 +80,7 @@ USER_DEFINED_WRAPPER(int, Cart_map, (MPI_Comm) comm, (int) ndims, DMTCP_PLUGIN_ENABLE_CKPT(); return retval; } +#endif // defined(MPICH) USER_DEFINED_WRAPPER(int, Cart_rank, (MPI_Comm) comm, (const int*) coords, (int *) rank) @@ -120,8 +122,6 @@ USER_DEFINED_WRAPPER(int, Cart_sub, (MPI_Comm) comm, retval = NEXT_FUNC(Cart_sub)(realComm, remain_dims, new_comm); RETURN_TO_UPPER_HALF(); if (retval == MPI_SUCCESS && mana_state != RESTART_REPLAY) { - int ndims = 0; - MPI_Cartdim_get(comm, &ndims); MPI_Comm virtComm = ADD_NEW_COMM(*new_comm); grant_ggid(virtComm); *new_comm = virtComm; @@ -132,6 +132,7 @@ USER_DEFINED_WRAPPER(int, Cart_sub, (MPI_Comm) comm, return retval; } +#if defined(MPICH) USER_DEFINED_WRAPPER(int, Cartdim_get, (MPI_Comm) comm, (int *) ndims) { int retval; @@ -154,6 +155,7 @@ USER_DEFINED_WRAPPER(int, Dims_create, (int)nnodes, (int)ndims, (int *)dims) DMTCP_PLUGIN_ENABLE_CKPT(); return retval; } +#endif // defined(MPICH) #ifdef SINGLE_CART_REORDER // This variable holds the cartesian properties and is only used at the time of @@ -263,13 +265,15 @@ PMPI_IMPL(int, MPI_Cart_create, MPI_Comm old_comm, int ndims, MPI_Comm *comm_cart) PMPI_IMPL(int, MPI_Cart_get, MPI_Comm comm, int maxdims, int dims[], int periods[], int coords[]) -PMPI_IMPL(int, MPI_Cart_map, MPI_Comm comm, int ndims, - const int dims[], const int periods[], int *newrank) PMPI_IMPL(int, MPI_Cart_rank, MPI_Comm comm, const int coords[], int *rank) PMPI_IMPL(int, MPI_Cart_shift, MPI_Comm comm, int direction, int disp, int *rank_source, int *rank_dest) PMPI_IMPL(int, MPI_Cart_sub, MPI_Comm comm, const int remain_dims[], MPI_Comm *new_comm) + +#if defined(MPICH) PMPI_IMPL(int, MPI_Cartdim_get, MPI_Comm comm, int *ndims) PMPI_IMPL(int, MPI_Dims_create, int nnodes, int ndims, int *dims) - +PMPI_IMPL(int, MPI_Cart_map, MPI_Comm comm, int ndims, + const int dims[], const int periods[], int *newrank) +#endif // defined(MPICH) From 4a3254ee704c3156a14faa348a97dafa26440c5b Mon Sep 17 00:00:00 2001 From: Leonid Belyaev Date: Thu, 21 Sep 2023 21:45:03 -0400 Subject: [PATCH 42/49] Condition collective_wrappers --- .../mpi-wrappers/mpi_collective_wrappers.cpp | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/mpi-proxy-split/mpi-wrappers/mpi_collective_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_collective_wrappers.cpp index 8381e5d1f..0f8526e62 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_collective_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_collective_wrappers.cpp @@ -145,6 +145,7 @@ USER_DEFINED_WRAPPER(int, Barrier, (MPI_Comm) comm) return retval; } +#if defined(MPICH) EXTERNC USER_DEFINED_WRAPPER(int, Ibarrier, (MPI_Comm) comm, (MPI_Request *) request) { @@ -164,6 +165,7 @@ USER_DEFINED_WRAPPER(int, Ibarrier, (MPI_Comm) comm, (MPI_Request *) request) DMTCP_PLUGIN_ENABLE_CKPT(); return retval; } +#endif // defined(MPICH) /******************************************************************************* * This version, MPI_Allreduce_reproducible, can be called from @@ -196,6 +198,7 @@ USER_DEFINED_WRAPPER(int, Ibarrier, (MPI_Comm) comm, (MPI_Request *) request) * non-deterministically. ******************************************************************************/ +#if defined(MPICH) int MPI_Allreduce_reproducible(const void *sendbuf, void *recvbuf, @@ -247,14 +250,17 @@ MPI_Allreduce_reproducible(const void *sendbuf, return rc; } +#endif // defined(MPICH) USER_DEFINED_WRAPPER(int, Allreduce, (const void *) sendbuf, (void *) recvbuf, (int) count, (MPI_Datatype) datatype, (MPI_Op) op, (MPI_Comm) comm) { +#if defined(MPICH) char *s = getenv("MANA_USE_ALLREDUCE_REPRODUCIBLE"); int use_allreduce_reproducible = (s != NULL) ? atoi(s) : 0; +#endif // defined(MPICH) bool passthrough = false; commit_begin(comm, passthrough); @@ -265,6 +271,7 @@ USER_DEFINED_WRAPPER(int, Allreduce, get_fortran_constants(); #endif +#if defined(MPICH) if (use_allreduce_reproducible) retval = MPI_Allreduce_reproducible(sendbuf, recvbuf, count, datatype, op, comm, 0); @@ -283,6 +290,22 @@ USER_DEFINED_WRAPPER(int, Allreduce, NEXT_FUNC(Allreduce)(sendbuf, recvbuf, count, realType, realOp, realComm); RETURN_TO_UPPER_HALF(); } +#else // defined(MPICH) + MPI_Comm realComm = VIRTUAL_TO_REAL_COMM(comm); + MPI_Datatype realType = VIRTUAL_TO_REAL_TYPE(datatype); + MPI_Op realOp = VIRTUAL_TO_REAL_OP(op); + // FIXME: Ideally, check FORTRAN_MPI_IN_PLACE only in the Fortran wrapper. +#ifndef EXAMPI + if (sendbuf == FORTRAN_MPI_IN_PLACE) { + sendbuf = MPI_IN_PLACE; + } +#endif + JUMP_TO_LOWER_HALF(lh_info.fsaddr); + retval = + NEXT_FUNC(Allreduce)(sendbuf, recvbuf, count, realType, realOp, realComm); + RETURN_TO_UPPER_HALF(); + +#endif // defined(MPICH) DMTCP_PLUGIN_ENABLE_CKPT(); commit_finish(comm, passthrough); return retval; @@ -346,6 +369,7 @@ USER_DEFINED_WRAPPER(int, Ireduce, return retval; } +#if defined(MPICH) USER_DEFINED_WRAPPER(int, Reduce_scatter, (const void *) sendbuf, (void *) recvbuf, (const int) recvcounts[], (MPI_Datatype) datatype, @@ -372,6 +396,7 @@ USER_DEFINED_WRAPPER(int, Reduce_scatter, commit_finish(comm, passthrough); return retval; } +#endif // defined(MPICH) #endif // #ifndef MPI_COLLECTIVE_P2P // NOTE: This C++ function in needed by p2p_drain_send_recv.cpp @@ -700,7 +725,9 @@ PMPI_IMPL(int, MPI_Bcast, void *buffer, int count, MPI_Datatype datatype, PMPI_IMPL(int, MPI_Ibcast, void *buffer, int count, MPI_Datatype datatype, int root, MPI_Comm comm, MPI_Request *request) PMPI_IMPL(int, MPI_Barrier, MPI_Comm comm) +#if defined(MPICH) PMPI_IMPL(int, MPI_Ibarrier, MPI_Comm comm, MPI_Request * request) +#endif // defined(MPICH) PMPI_IMPL(int, MPI_Allreduce, const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) PMPI_IMPL(int, MPI_Reduce, const void *sendbuf, void *recvbuf, int count, @@ -739,3 +766,5 @@ PMPI_IMPL(int, MPI_Scan, const void *sendbuf, void *recvbuf, int count, PMPI_IMPL(int, MPI_Comm_split, MPI_Comm comm, int color, int key, MPI_Comm *newcomm) PMPI_IMPL(int, MPI_Comm_dup, MPI_Comm comm, MPI_Comm *newcomm) + + From 7166f15be9a9c08385716551784d0255c5444b56 Mon Sep 17 00:00:00 2001 From: Leonid Belyaev Date: Thu, 21 Sep 2023 21:50:24 -0400 Subject: [PATCH 43/49] Conditional comm_wrappers --- .../mpi-wrappers/mpi_comm_wrappers.cpp | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/mpi-proxy-split/mpi-wrappers/mpi_comm_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_comm_wrappers.cpp index 8671372b2..7ae6da072 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_comm_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_comm_wrappers.cpp @@ -58,6 +58,7 @@ // multiple communicators can use the same attribute key for different // attributes. Thus, we use a structure like the following: // [keyval -> {[communicator -> attribute], extra_state, copy_fn, delete_fn}] +#if defined(MPICH) struct KeyvalTuple { KeyvalTuple () = default; @@ -74,9 +75,12 @@ struct KeyvalTuple { MPI_Comm_delete_attr_function *_deleteFn; std::unordered_map _attributeMap; }; +#endif // defined(MPICH) static std::vector keyvalVec; +#if defined(MPICH) static std::unordered_map tupleMap; +#endif // defined(MPICH) // The following are prvalues. So we need an address to point them to. // On Cori, this is 2^22 - 1, but it just needs to be greater than 2^15 - 1. @@ -180,6 +184,7 @@ USER_DEFINED_WRAPPER(int, Comm_free, (MPI_Comm *) comm) // attribute, but since our structure here is a bit different, we check, for // each key value, if a communicator/attribute value pairing exists, and if // it does then we call the callback function. +#if defined(MPICH) for (auto &tuplePair : tupleMap) { KeyvalTuple *tuple = &tuplePair.second; std::unordered_map *attributeMap = &tuple->_attributeMap; @@ -193,6 +198,7 @@ USER_DEFINED_WRAPPER(int, Comm_free, (MPI_Comm *) comm) attributeMap->erase(*comm); } } +#endif // defined(MPICH) DMTCP_PLUGIN_DISABLE_CKPT(); int retval = MPI_Comm_free_internal(comm); if (retval == MPI_SUCCESS && mana_state != RESTART_REPLAY) { @@ -209,6 +215,7 @@ USER_DEFINED_WRAPPER(int, Comm_free, (MPI_Comm *) comm) return retval; } +#if defined(MPICH) USER_DEFINED_WRAPPER(int, Comm_get_attr, (MPI_Comm) comm, (int) comm_keyval, (void *) attribute_val, (int *) flag) { @@ -249,7 +256,9 @@ USER_DEFINED_WRAPPER(int, Comm_get_attr, (MPI_Comm) comm, } return retval; } +#endif // defined(MPICH) +#if defined(MPICH) USER_DEFINED_WRAPPER(int, Comm_set_attr, (MPI_Comm) comm, (int) comm_keyval, (void *) attribute_val) { @@ -276,7 +285,9 @@ USER_DEFINED_WRAPPER(int, Comm_set_attr, (MPI_Comm) comm, attributeMap->emplace(comm, attribute_val); return retval; } +#endif // defined(MPICH) +#if defined(MPICH) USER_DEFINED_WRAPPER(int, Comm_delete_attr, (MPI_Comm) comm, (int) comm_keyval) { int retval = MPI_SUCCESS; @@ -300,6 +311,7 @@ USER_DEFINED_WRAPPER(int, Comm_delete_attr, (MPI_Comm) comm, (int) comm_keyval) } return retval; } +#endif // defined(MPICH) USER_DEFINED_WRAPPER(int, Comm_set_errhandler, (MPI_Comm) comm, (MPI_Errhandler) errhandler) @@ -348,6 +360,7 @@ USER_DEFINED_WRAPPER(int, Comm_split_type, (MPI_Comm) comm, (int) split_type, return retval; } +#if defined(MPICH) USER_DEFINED_WRAPPER(int, Attr_get, (MPI_Comm) comm, (int) keyval, (void*) attribute_val, (int*) flag) { @@ -363,7 +376,9 @@ USER_DEFINED_WRAPPER(int, Attr_get, (MPI_Comm) comm, (int) keyval, DMTCP_PLUGIN_ENABLE_CKPT(); return retval; } +#endif // defined(MPICH) +#if defined(MPICH) USER_DEFINED_WRAPPER(int, Attr_delete, (MPI_Comm) comm, (int) keyval) { @@ -381,7 +396,9 @@ USER_DEFINED_WRAPPER(int, Attr_delete, (MPI_Comm) comm, (int) keyval) DMTCP_PLUGIN_ENABLE_CKPT(); return retval; } +#endif // defined(MPICH) +#if defined(MPICH) USER_DEFINED_WRAPPER(int, Attr_put, (MPI_Comm) comm, (int) keyval, (void*) attribute_val) { @@ -399,7 +416,9 @@ USER_DEFINED_WRAPPER(int, Attr_put, (MPI_Comm) comm, DMTCP_PLUGIN_ENABLE_CKPT(); return retval; } +#endif // defined(MPICH) +#if defined(MPICH) USER_DEFINED_WRAPPER(int, Comm_create_keyval, (MPI_Comm_copy_attr_function *) comm_copy_attr_fn, (MPI_Comm_delete_attr_function *) comm_delete_attr_fn, @@ -431,7 +450,9 @@ USER_DEFINED_WRAPPER(int, Comm_create_keyval, std::unordered_map())); return retval; } +#endif // defined(MPICH) +#if defined(MPICH) USER_DEFINED_WRAPPER(int, Comm_free_keyval, (int *) comm_keyval) { int retval = MPI_SUCCESS; @@ -444,6 +465,7 @@ USER_DEFINED_WRAPPER(int, Comm_free_keyval, (int *) comm_keyval) } return retval; } +#endif // defined(MPICH) int MPI_Comm_create_group_internal(MPI_Comm comm, MPI_Group group, int tag, @@ -483,15 +505,18 @@ PMPI_IMPL(int, MPI_Comm_create, MPI_Comm comm, MPI_Group group, MPI_Comm *newcomm) PMPI_IMPL(int, MPI_Comm_compare, MPI_Comm comm1, MPI_Comm comm2, int *result) PMPI_IMPL(int, MPI_Comm_free, MPI_Comm *comm) +#if defined(MPICH) PMPI_IMPL(int, MPI_Comm_get_attr, MPI_Comm comm, int comm_keyval, void *attribute_val, int *flag) PMPI_IMPL(int, MPI_Comm_set_attr, MPI_Comm comm, int comm_keyval, void *attribute_val) +#endif // defined(MPICH) PMPI_IMPL(int, MPI_Comm_set_errhandler, MPI_Comm comm, MPI_Errhandler errhandler) PMPI_IMPL(int, MPI_Topo_test, MPI_Comm comm, int* status) PMPI_IMPL(int, MPI_Comm_split_type, MPI_Comm comm, int split_type, int key, MPI_Info info, MPI_Comm *newcomm) +#if defined(MPICH) PMPI_IMPL(int, MPI_Attr_get, MPI_Comm comm, int keyval, void *attribute_val, int *flag) PMPI_IMPL(int, MPI_Attr_delete, MPI_Comm comm, int keyval) @@ -501,5 +526,6 @@ PMPI_IMPL(int, MPI_Comm_create_keyval, MPI_Comm_delete_attr_function * comm_delete_attr_fn, int *comm_keyval, void *extra_state) PMPI_IMPL(int, MPI_Comm_free_keyval, int *comm_keyval) +#endif // defined(MPICH) PMPI_IMPL(int, MPI_Comm_create_group, MPI_Comm comm, MPI_Group group, int tag, MPI_Comm *newcomm) From a840f18ef334a95f5387cf4820b0f802a5229ae8 Mon Sep 17 00:00:00 2001 From: Leonid Belyaev Date: Thu, 21 Sep 2023 21:53:03 -0400 Subject: [PATCH 44/49] Conditional op_wrappers --- mpi-proxy-split/mpi-wrappers/mpi_op_wrappers.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mpi-proxy-split/mpi-wrappers/mpi_op_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_op_wrappers.cpp index 54581d7b6..b29dac79b 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_op_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_op_wrappers.cpp @@ -75,6 +75,7 @@ USER_DEFINED_WRAPPER(int, Op_free, (MPI_Op*) op) return retval; } +#if defined(MPICH) USER_DEFINED_WRAPPER(int, Reduce_local, (const void *) inbuf, (void *) inoutbuf, (int) count, (MPI_Datatype) datatype, (MPI_Op) op) @@ -90,10 +91,14 @@ USER_DEFINED_WRAPPER(int, Reduce_local, DMTCP_PLUGIN_ENABLE_CKPT(); return retval; } +#endif // defined(MPICH) + PMPI_IMPL(int, MPI_Op_create, MPI_User_function *user_fn, int commute, MPI_Op *op) PMPI_IMPL(int, MPI_Op_free, MPI_Op *op) +#if defined(MPICH) PMPI_IMPL(int, MPI_Reduce_local, const void *inbuf, void *inoutbuf, int count, MPI_Datatype datatype, MPI_Op op) +#endif // defined(MPICH) From 1d88699de72c6cb20e1abbd8eb160d5e3e4158a0 Mon Sep 17 00:00:00 2001 From: Leonid Belyaev Date: Thu, 21 Sep 2023 22:00:13 -0400 Subject: [PATCH 45/49] Conditional p2p_wrappers --- .../mpi-wrappers/mpi_p2p_wrappers.cpp | 23 ++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/mpi-proxy-split/mpi-wrappers/mpi_p2p_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_p2p_wrappers.cpp index 701c2095d..4121a0d35 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_p2p_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_p2p_wrappers.cpp @@ -32,7 +32,10 @@ #include "mpi_nextfunc.h" #include "virtual-ids.h" // To support MANA_P2P_LOG and MANA_P2P_REPLAY: + +#if defined(MPICH) #include "p2p-deterministic.h" +#endif // defined(MPICH) extern int p2p_deterministic_skip_save_request; @@ -57,9 +60,15 @@ USER_DEFINED_WRAPPER(int, Send, if (retval != MPI_SUCCESS) { return retval; } + +#if defined(MPICH) p2p_deterministic_skip_save_request = 1; retval = MPI_Wait(&req, &st); p2p_deterministic_skip_save_request = 0; +#else + retval = MPI_Wait(&req, &st); +#endif // defined(MPICH) + #endif return retval; } @@ -148,7 +157,9 @@ USER_DEFINED_WRAPPER(int, Recv, if (retval != MPI_SUCCESS) { return retval; } +#if defined(MPICH) p2p_deterministic_skip_save_request = 0; +#endif // defined(MPICH) retval = MPI_Wait(&req, status); #endif // updateLocalRecvs(); @@ -209,8 +220,8 @@ USER_DEFINED_WRAPPER(int, Irecv, DMTCP_PLUGIN_ENABLE_CKPT(); return retval; } - LOG_PRE_Irecv(&status); - REPLAY_PRE_Irecv(count,datatype,source,tag,comm); + // LOG_PRE_Irecv(&status); + // REPLAY_PRE_Irecv(count,datatype,source,tag,comm); MPI_Comm realComm = VIRTUAL_TO_REAL_COMM(comm); MPI_Datatype realType = VIRTUAL_TO_REAL_TYPE(datatype); @@ -227,7 +238,7 @@ USER_DEFINED_WRAPPER(int, Irecv, logRequestInfo(*request, IRECV_REQUEST); #endif } - LOG_POST_Irecv(source,tag,comm,&status,request,buf); + // LOG_POST_Irecv(source,tag,comm,&status,request,buf); DMTCP_PLUGIN_ENABLE_CKPT(); return retval; } @@ -321,9 +332,15 @@ USER_DEFINED_WRAPPER(int, Sendrecv_replace, (void *) buf, (int) count, memcpy(buf, tmpbuf, count * type_size); // Set status, free buffer, and return +#ifdef EXAMPI if (status != MPI_STATUS_IGNORE && status != FORTRAN_MPI_STATUS_IGNORE) { *status = sts[0]; } +#else + if (status != MPI_STATUS_IGNORE) { + *status = sts[0]; + } +#endif // defined(EXAMPI) free(tmpbuf); return retval; From 8ce2cb2a77c6dfa6f7158dd6fc4753d738d3074b Mon Sep 17 00:00:00 2001 From: Leonid Belyaev Date: Thu, 21 Sep 2023 22:27:57 -0400 Subject: [PATCH 46/49] Conditional request wrappers --- .../mpi-wrappers/mpi_request_wrappers.cpp | 39 ++++++++++++++++--- 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/mpi-proxy-split/mpi-wrappers/mpi_request_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_request_wrappers.cpp index c9929b57e..2d438b248 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_request_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_request_wrappers.cpp @@ -34,9 +34,9 @@ #include "mpi_nextfunc.h" #include "virtual-ids.h" // To support MANA_P2P_LOG and MANA_P2P_REPLAY: -#include "p2p-deterministic.h" +// #include "p2p-deterministic.h" -extern int p2p_deterministic_skip_save_request; +// extern int p2p_deterministic_skip_save_request; int MPI_Test_internal(MPI_Request *request, int *flag, MPI_Status *status, bool isRealRequest) @@ -67,14 +67,20 @@ USER_DEFINED_WRAPPER(int, Test, (MPI_Request*) request, *flag = true; return MPI_SUCCESS; } - LOG_PRE_Test(status); + // LOG_PRE_Test(status); DMTCP_PLUGIN_DISABLE_CKPT(); MPI_Status statusBuffer; MPI_Status *statusPtr = status; - if (statusPtr == MPI_STATUS_IGNORE || +#ifdef EXAMPI + if (statusPtr == MPI_STATUS_IGNORE) { + statusPtr = &statusBuffer; + } +#else +if (statusPtr == MPI_STATUS_IGNORE || statusPtr == FORTRAN_MPI_STATUS_IGNORE) { statusPtr = &statusBuffer; } +#endif // defined(EXAMPI) MPI_Request realRequest; realRequest = VIRTUAL_TO_REAL_REQUEST(*request); if (*request != REAL_CONSTANT(REQUEST_NULL) && realRequest == REAL_CONSTANT(REQUEST_NULL)) { @@ -107,7 +113,7 @@ USER_DEFINED_WRAPPER(int, Test, (MPI_Request*) request, fflush(stdout); #endif } - LOG_POST_Test(request, statusPtr); + // LOG_POST_Test(request, statusPtr); if (retval == MPI_SUCCESS && *flag && mana_state != RESTART_REPLAY) { clearPendingRequestFromLog(*request); REMOVE_OLD_REQUEST(*request); @@ -134,6 +140,15 @@ USER_DEFINED_WRAPPER(int, Testall, (int) count, for (int i = 0; i < count; i++) { // FIXME: Ideally, we should only check FORTRAN_MPI_STATUS_IGNORE // in the Fortran wrapper. +#ifdef EXAMPI + if (local_array_of_statuses != MPI_STATUSES_IGNORE) { + retval = MPI_Test(&local_array_of_requests[i], local_flag, + &local_array_of_statuses[i]); + } else { + retval = MPI_Test(&local_array_of_requests[i], local_flag, + MPI_STATUS_IGNORE); + } +#else if (local_array_of_statuses != MPI_STATUSES_IGNORE && local_array_of_statuses != FORTRAN_MPI_STATUSES_IGNORE) { retval = MPI_Test(&local_array_of_requests[i], local_flag, @@ -142,6 +157,7 @@ USER_DEFINED_WRAPPER(int, Testall, (int) count, retval = MPI_Test(&local_array_of_requests[i], local_flag, MPI_STATUS_IGNORE); } +#endif // defined(EXAMPI) if (retval != MPI_SUCCESS) { *local_flag = 0; break; @@ -344,10 +360,16 @@ USER_DEFINED_WRAPPER(int, Wait, (MPI_Request*) request, (MPI_Status*) status) MPI_Status *statusPtr = status; // FIXME: Ideally, we should only check FORTRAN_MPI_STATUS_IGNORE // in the Fortran wrapper. +#ifndef EXAMPI if (statusPtr == MPI_STATUS_IGNORE || statusPtr == FORTRAN_MPI_STATUS_IGNORE) { statusPtr = &statusBuffer; } +#else + if (statusPtr == MPI_STATUS_IGNORE) { + statusPtr = &statusBuffer; + } +#endif // ifndef EXAMPI // FIXME: We translate the virtual request in every iteration. // We want to translate it only once, and update the real request // after restart if we checkpoint in the while loop. @@ -416,6 +438,7 @@ USER_DEFINED_WRAPPER(int, Iprobe, (int) source, (int) tag, (MPI_Comm) comm, return retval; } +#if defined(MPICH) USER_DEFINED_WRAPPER(int, Request_get_status, (MPI_Request) request, (int *) flag, (MPI_Status *) status) { @@ -428,11 +451,14 @@ USER_DEFINED_WRAPPER(int, Request_get_status, (MPI_Request) request, DMTCP_PLUGIN_ENABLE_CKPT(); return retval; } +#endif // defined(MPICH) +#if defined(MPICH) DEFINE_FNC(int, Get_elements, (const MPI_Status *) status, (MPI_Datatype) datatype, (int *) count); DEFINE_FNC(int, Get_elements_x, (const MPI_Status *) status, (MPI_Datatype) datatype, (MPI_Count *) count); +#endif // defined(MPICH) PMPI_IMPL(int, MPI_Test, MPI_Request* request, int* flag, MPI_Status* status) PMPI_IMPL(int, MPI_Wait, MPI_Request* request, MPI_Status* status) @@ -448,10 +474,11 @@ PMPI_IMPL(int, MPI_Testall, int count, MPI_Request array_of_requests[], int *flag, MPI_Status *array_of_statuses) PMPI_IMPL(int, MPI_Testany, int count, MPI_Request array_of_requests[], int *index, int *flag, MPI_Status *status); +#if defined(MPICH) PMPI_IMPL(int, MPI_Get_elements, const MPI_Status *status, MPI_Datatype datatype, int *count) PMPI_IMPL(int, MPI_Get_elements_x, const MPI_Status *status, MPI_Datatype datatype, MPI_Count *count) PMPI_IMPL(int, MPI_Request_get_status, MPI_Request request, int* flag, MPI_Status *status) - +#endif // defined(MPICH) From 901749348a30424a74507a4b2d5d76c09d2b052d Mon Sep 17 00:00:00 2001 From: Leonid Belyaev Date: Thu, 21 Sep 2023 22:43:07 -0400 Subject: [PATCH 47/49] Conditional type wrappers --- .../mpi-wrappers/mpi_type_wrappers.cpp | 55 +++++++++++++------ 1 file changed, 38 insertions(+), 17 deletions(-) diff --git a/mpi-proxy-split/mpi-wrappers/mpi_type_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_type_wrappers.cpp index be600541c..26c35e6e7 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_type_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_type_wrappers.cpp @@ -72,9 +72,6 @@ USER_DEFINED_WRAPPER(int, Type_commit, (MPI_Datatype *) type) RETURN_TO_UPPER_HALF(); if (retval != MPI_SUCCESS) { realType = REMOVE_OLD_TYPE(*type); - } else { - if (mana_state != RESTART_REPLAY) { - } } DMTCP_PLUGIN_ENABLE_CKPT(); return retval; @@ -97,6 +94,7 @@ USER_DEFINED_WRAPPER(int, Type_contiguous, (int) count, (MPI_Datatype) oldtype, return retval; } +#if defined(MPICH) USER_DEFINED_WRAPPER(int, Type_hvector, (int) count, (int) blocklength, (MPI_Aint) stride, (MPI_Datatype) oldtype, (MPI_Datatype*) newtype) @@ -116,14 +114,33 @@ USER_DEFINED_WRAPPER(int, Type_hvector, (int) count, (int) blocklength, return retval; } -#ifndef OPEN_MPI + USER_DEFINED_WRAPPER(int, Type_create_hvector, (int) count, (int) blocklength, (MPI_Aint) stride, (MPI_Datatype) oldtype, (MPI_Datatype*) newtype) { return MPI_Type_hvector(count, blocklength, stride, oldtype, newtype); } -#endif +#else // defined(MPICH) + USER_DEFINED_WRAPPER(int, Type_create_hvector, (int) count, (int) blocklength, + (MPI_Aint) stride, (MPI_Datatype) oldtype, + (MPI_Datatype*) newtype) +{ + int retval; + DMTCP_PLUGIN_DISABLE_CKPT(); + MPI_Datatype realType = VIRTUAL_TO_REAL_TYPE(oldtype); + JUMP_TO_LOWER_HALF(lh_info.fsaddr); + retval = NEXT_FUNC(Type_create_hvector)(count, blocklength, + stride, realType, newtype); + RETURN_TO_UPPER_HALF(); + if (retval == MPI_SUCCESS && mana_state != RESTART_REPLAY) { + MPI_Datatype virtType = ADD_NEW_TYPE(*newtype); + *newtype = virtType; + } + DMTCP_PLUGIN_ENABLE_CKPT(); + return retval; +} +#endif // defined(MPICH) USER_DEFINED_WRAPPER(int, Type_vector, (int) count, (int) blocklength, (int) stride, (MPI_Datatype) oldtype, @@ -145,7 +162,6 @@ USER_DEFINED_WRAPPER(int, Type_vector, (int) count, (int) blocklength, return retval; } - // int MPI_Type_create_struct(int count, // const int array_of_blocklengths[], // const MPI_Aint array_of_displacements[], @@ -180,7 +196,7 @@ USER_DEFINED_WRAPPER(int, Type_create_struct, (int) count, return retval; } -#ifndef OPEN_MPI +#if defined(MPICH) // Perlmutter cray_mpich both implement MPI 3.1. However, they use different // APIs. We use MPICH_NUMVERSION (3.4a2) to differentiate the cray-mpich on Cori // and Perlmuttter. This ad-hoc workaround should be removed once the cray-mpich @@ -201,10 +217,10 @@ USER_DEFINED_WRAPPER(int, Type_struct, (int) count, array_of_displacements, array_of_types, newtype ); } -#endif // ifndef OPEN_MPI +#endif // if defined(MPICH) -#ifndef OPEN_MPI +#if defined(MPICH) #if MPICH_NUMVERSION < MPICH_CALC_VERSION(3,4,0,0,2) && defined(CRAY_MPICH_VERSION) USER_DEFINED_WRAPPER(int, Type_hindexed, (int) count, (const int*) array_of_blocklengths, @@ -235,9 +251,9 @@ USER_DEFINED_WRAPPER(int, Type_hindexed, (int) count, DMTCP_PLUGIN_ENABLE_CKPT(); return retval; } -#endif // ifndef OPEN_MPI +#endif // if defined(MPICH) -#ifndef OPEN_MPI +#if defined(MPICH) USER_DEFINED_WRAPPER(int, Type_create_hindexed, (int) count, (const int*) array_of_blocklengths, (const MPI_Aint*) array_of_displacements, @@ -258,9 +274,9 @@ USER_DEFINED_WRAPPER(int, Type_create_hindexed, (int) count, return ret; #endif } +#endif // if defined(MPICH) -#endif // ifndef OPEN_MPI - +#if defined(MPICH) USER_DEFINED_WRAPPER(int, Type_create_hindexed_block, (int) count, (int) blocklength, (const MPI_Aint*) array_of_displacements, @@ -282,6 +298,7 @@ USER_DEFINED_WRAPPER(int, Type_create_hindexed_block, (int) count, return ret; #endif } +#endif // defined(MPICH) USER_DEFINED_WRAPPER(int, Type_hindexed_block, (int) count, (int) blocklength, @@ -316,6 +333,7 @@ USER_DEFINED_WRAPPER(int, Type_indexed, (int) count, return retval; } +#if defined(MPICH) USER_DEFINED_WRAPPER(int, Type_dup, (MPI_Datatype) oldtype, (MPI_Datatype*) newtype) { @@ -332,6 +350,7 @@ USER_DEFINED_WRAPPER(int, Type_dup, (MPI_Datatype) oldtype, DMTCP_PLUGIN_ENABLE_CKPT(); return retval; } +#endif // defined(MPICH) USER_DEFINED_WRAPPER(int, Type_create_resized, (MPI_Datatype) oldtype, (MPI_Aint) lb, (MPI_Aint) extent, (MPI_Datatype*) newtype) @@ -401,19 +420,19 @@ PMPI_IMPL(int, MPI_Type_commit, MPI_Datatype *type) PMPI_IMPL(int, MPI_Type_contiguous, int count, MPI_Datatype oldtype, MPI_Datatype *newtype) PMPI_IMPL(int, MPI_Type_free, MPI_Datatype *type) +#if defined(MPICH) PMPI_IMPL(int, MPI_Type_vector, int count, int blocklength, int stride, MPI_Datatype oldtype, MPI_Datatype *newtype) -#ifndef OPEN_MPI PMPI_IMPL(int, MPI_Type_hvector, int count, int blocklength, MPI_Aint stride, MPI_Datatype oldtype, MPI_Datatype *newtype) -#endif +#endif // defined(MPICH) PMPI_IMPL(int, MPI_Type_create_hvector, int count, int blocklength, MPI_Aint stride, MPI_Datatype oldtype, MPI_Datatype *newtype) PMPI_IMPL(int, MPI_Type_create_struct, int count, const int array_of_blocklengths[], const MPI_Aint array_of_displacements[], const MPI_Datatype array_of_types[], MPI_Datatype *newtype) -#ifndef OPEN_MPI +#if defined(MPICH) #if MPICH_NUMVERSION < MPICH_CALC_VERSION(3,4,0,0,2) && defined(CRAY_MPICH_VERSION) PMPI_IMPL(int, MPI_Type_struct, int count, const int array_of_blocklengths[], const MPI_Aint array_of_displacements[], const MPI_Datatype array_of_types[], @@ -429,7 +448,7 @@ PMPI_IMPL(int, MPI_Type_hindexed, int count, int array_of_blocklengths[], MPI_Aint array_of_displacements[], MPI_Datatype oldtype, MPI_Datatype *newtype); #endif -#endif // endif OPEN_MPI +#endif // endif defined(MPICH) PMPI_IMPL(int, MPI_Type_size_x, MPI_Datatype type, MPI_Count *size) PMPI_IMPL(int, MPI_Type_indexed, int count, const int array_of_blocklengths[], @@ -443,7 +462,9 @@ PMPI_IMPL(int, MPI_Pack, const void *inbuf, int incount, MPI_Datatype datatype, void *outbuf, int outsize, int *position, MPI_Comm comm) PMPI_IMPL(int, MPI_Type_create_resized, MPI_Datatype oldtype, MPI_Aint lb, MPI_Aint extent, MPI_Datatype *newtype); +#if defined(MPICH) PMPI_IMPL(int, MPI_Type_dup, MPI_Datatype type, MPI_Datatype *newtype); +#endif PMPI_IMPL(int, MPI_Type_create_hindexed, int count, const int array_of_blocklengths[], From 5ef4a46360107fc9aad77ad2903d2d7c46c8eb1e Mon Sep 17 00:00:00 2001 From: Leonid Belyaev Date: Thu, 21 Sep 2023 23:30:25 -0400 Subject: [PATCH 48/49] Conditional general wrappers --- mpi-proxy-split/mpi-wrappers/mpi_wrappers.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/mpi-proxy-split/mpi-wrappers/mpi_wrappers.cpp b/mpi-proxy-split/mpi-wrappers/mpi_wrappers.cpp index ed3e4c2b3..588644e05 100644 --- a/mpi-proxy-split/mpi-wrappers/mpi_wrappers.cpp +++ b/mpi-proxy-split/mpi-wrappers/mpi_wrappers.cpp @@ -140,9 +140,15 @@ USER_DEFINED_WRAPPER(int, Finalize, (void)) return MPI_SUCCESS; } +#if defined(MPICH) USER_DEFINED_WRAPPER(int, Get_count, (const MPI_Status *) status, (MPI_Datatype) datatype, (int *) count) +#else +USER_DEFINED_WRAPPER(int, Get_count, + (MPI_Status *) status, (MPI_Datatype) datatype, + (int *) count) +#endif // defined(MPICH) { int retval; DMTCP_PLUGIN_DISABLE_CKPT(); @@ -213,7 +219,11 @@ PMPI_IMPL(double, MPI_Wtime, void) PMPI_IMPL(int, MPI_Initialized, int *flag) PMPI_IMPL(int, MPI_Init_thread, int *argc, char ***argv, int required, int *provided) +#if defined(MPICH) PMPI_IMPL(int, MPI_Get_count, const MPI_Status *status, MPI_Datatype datatype, int *count) +#else +PMPI_IMPL(int, MPI_Get_count, MPI_Status *status, MPI_Datatype datatype, +#endif // defined(MPICH) PMPI_IMPL(int, MPI_Get_library_version, char *version, int *resultlen) PMPI_IMPL(int, MPI_Get_address, const void *location, MPI_Aint *address) From 71caad2efb8635bc024150f5594f7943a9622e71 Mon Sep 17 00:00:00 2001 From: Leonid Belyaev Date: Thu, 21 Sep 2023 23:36:34 -0400 Subject: [PATCH 49/49] Conditional mpi_plugin --- mpi-proxy-split/mpi_plugin.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/mpi-proxy-split/mpi_plugin.cpp b/mpi-proxy-split/mpi_plugin.cpp index f7863831d..4b2d5d0ec 100644 --- a/mpi-proxy-split/mpi_plugin.cpp +++ b/mpi-proxy-split/mpi_plugin.cpp @@ -71,7 +71,9 @@ extern CartesianProperties g_cartesian_properties; #endif extern ManaHeader g_mana_header; +#if defined(MPICH) extern std::unordered_map g_params_map; +#endif // defined(MPICH) constexpr const char *MANA_FILE_REGEX_ENV = "MANA_FILE_REGEX"; constexpr const char *MANA_SEGV_DEBUG_LOOP = "MANA_SEGV_DEBUG_LOOP"; @@ -880,6 +882,7 @@ save_mana_header(const char *filename) close(fd); } +#if defined(MPICH) const char * get_mpi_file_filename() { @@ -985,6 +988,7 @@ restore_mpi_files(const char *filename) } } +#endif // defined(MPICH) #ifdef SINGLE_CART_REORDER const char * @@ -1183,8 +1187,10 @@ mpi_plugin_event_hook(DmtcpEvent_t event, DmtcpEventData_t *data) dmtcp_local_barrier("MPI:p2p_log_replay.cpp-void"); seq_num_reset(RESTART); dmtcp_local_barrier("MPI:seq_num_reset"); +#if defined(MPICH) const char *file = get_mpi_file_filename(); restore_mpi_files(file); +#endif // defined(MPICH) dmtcp_local_barrier("MPI:Restore-MPI-Files"); mana_state = RUNNING; break;