From 2dfd8c8687a610a21c488f73e2105645647226bb Mon Sep 17 00:00:00 2001 From: adityakpandare Date: Tue, 10 Sep 2024 16:11:35 -0600 Subject: [PATCH 1/6] do not save checkpoint for m2mtransfer objects; instead reinitialize them at restart --- src/Inciter/Discretization.cpp | 9 +++++++++ src/Inciter/Discretization.hpp | 3 +++ src/Inciter/OversetFE.cpp | 12 +++++++++++- src/Transfer/M2MTransfer.hpp | 3 +++ src/Transfer/TransferDetails.hpp | 13 ------------- 5 files changed, 26 insertions(+), 14 deletions(-) diff --git a/src/Inciter/Discretization.cpp b/src/Inciter/Discretization.cpp index 26dabce14cf..f7334c2a0b6 100644 --- a/src/Inciter/Discretization.cpp +++ b/src/Inciter/Discretization.cpp @@ -166,6 +166,15 @@ Discretization::Discretization( } // Register mesh with mesh-transfer lib + addMesh(); +} + +void +Discretization::addMesh() +// ***************************************************************************** +// Register mesh with mesh-transfer lib +// ***************************************************************************** +{ if (m_disc.size() == 1 || m_transfer.empty()) { // skip transfer if single mesh or if not involved in coupling transferInit(); diff --git a/src/Inciter/Discretization.hpp b/src/Inciter/Discretization.hpp index 1d365b5383d..1f9c79c5565 100644 --- a/src/Inciter/Discretization.hpp +++ b/src/Inciter/Discretization.hpp @@ -92,6 +92,9 @@ class Discretization : public CBase_Discretization { //! Configure Charm++ reduction types static void registerReducers(); + //! Register mesh with mesh-transfer lib + void addMesh(); + //! Start computing new mesh veloctity for ALE mesh motion void meshvelStart( const tk::UnsMesh::Coords vel, diff --git a/src/Inciter/OversetFE.cpp b/src/Inciter/OversetFE.cpp index a6efa9e067e..907d5752ad9 100644 --- a/src/Inciter/OversetFE.cpp +++ b/src/Inciter/OversetFE.cpp @@ -32,6 +32,9 @@ #include "Around.hpp" #include "CGPDE.hpp" #include "FieldOutput.hpp" +#include "M2MTransfer.hpp" + +extern CollideHandle collideHandle; namespace inciter { @@ -1415,7 +1418,14 @@ OversetFE::evalLB( int nrestart ) // Detect if just returned from a checkpoint and if so, zero timers and // finished flag - if (d->restarted( nrestart )) m_finished = 0; + if (d->restarted( nrestart )) { + //// TODO: Need to make sure this is actually correct + //CollideGrid3d gridMap(CkVector3d(0, 0, 0),CkVector3d(2, 100, 2)); + //collideHandle = CollideCreate(gridMap, + // CollideSerialClient(exam2m::collisionHandler, 0)); + //d->addMesh(); + m_finished = 0; + } const auto lbfreq = g_inputdeck.get< tag::cmd, tag::lbfreq >(); const auto nonblocking = g_inputdeck.get< tag::cmd, tag::nonblocking >(); diff --git a/src/Transfer/M2MTransfer.hpp b/src/Transfer/M2MTransfer.hpp index 29397447970..3ef83f401d3 100644 --- a/src/Transfer/M2MTransfer.hpp +++ b/src/Transfer/M2MTransfer.hpp @@ -7,6 +7,9 @@ namespace exam2m { +void collisionHandler( [[maybe_unused]] void *param, + int nColl, + Collision *colls ); void addMesh(CkArrayID p, int elem, CkCallback cb); void setSourceTets(CkArrayID p, int index, std::vector< std::size_t >* inpoel, tk::UnsMesh::Coords* coords, const tk::Fields& u); void setDestPoints(CkArrayID p, int index, tk::UnsMesh::Coords* coords, tk::Fields& u, CkCallback cb); diff --git a/src/Transfer/TransferDetails.hpp b/src/Transfer/TransferDetails.hpp index e8f5c6091af..0d5cfc3874f 100644 --- a/src/Transfer/TransferDetails.hpp +++ b/src/Transfer/TransferDetails.hpp @@ -80,19 +80,6 @@ class TransferDetails : public CBase_TransferDetails { //! Transfer the interpolated solution data back to destination mesh void transferSolution( const std::vector< SolutionData >& soln ); - /** @name Charm++ pack/unpack serializer member functions */ - ///@{ - //! \brief Pack/Unpack serialize member function - //! \param[in,out] p Charm++'s PUP::er serializer object reference - void pup( PUP::er &p ) override { - p | m_firstchunk; - } - //! \brief Pack/Unpack serialize operator| - //! \param[in,out] p Charm++'s PUP::er serializer object reference - //! \param[in,out] i TransferDetails object reference - friend void operator|( PUP::er& p, TransferDetails& i ) { i.pup(p); } - //@} - private: //! The ID of my first chunk (used for collision detection library) int m_firstchunk; From 973ebfdf4e340c86a01bc7b850ab530e36d74d71 Mon Sep 17 00:00:00 2001 From: adityakpandare Date: Tue, 10 Sep 2024 16:47:38 -0600 Subject: [PATCH 2/6] added migration constructor for LibMain, and call Disc()->addMesh() from OversetFE on restart --- src/Inciter/Discretization.hpp | 1 + src/Inciter/OversetFE.cpp | 9 +-------- src/Transfer/M2MTransfer.hpp | 7 +++++++ src/Transfer/m2mtransfer.ci | 2 +- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/Inciter/Discretization.hpp b/src/Inciter/Discretization.hpp index 1f9c79c5565..9cde229caad 100644 --- a/src/Inciter/Discretization.hpp +++ b/src/Inciter/Discretization.hpp @@ -24,6 +24,7 @@ #include "CommMap.hpp" #include "History.hpp" #include "Inciter/InputDeck/InputDeck.hpp" +#include "M2MTransfer.hpp" #include "NoWarning/discretization.decl.h" #include "NoWarning/refiner.decl.h" diff --git a/src/Inciter/OversetFE.cpp b/src/Inciter/OversetFE.cpp index 907d5752ad9..f7245f34372 100644 --- a/src/Inciter/OversetFE.cpp +++ b/src/Inciter/OversetFE.cpp @@ -32,9 +32,6 @@ #include "Around.hpp" #include "CGPDE.hpp" #include "FieldOutput.hpp" -#include "M2MTransfer.hpp" - -extern CollideHandle collideHandle; namespace inciter { @@ -1419,11 +1416,7 @@ OversetFE::evalLB( int nrestart ) // Detect if just returned from a checkpoint and if so, zero timers and // finished flag if (d->restarted( nrestart )) { - //// TODO: Need to make sure this is actually correct - //CollideGrid3d gridMap(CkVector3d(0, 0, 0),CkVector3d(2, 100, 2)); - //collideHandle = CollideCreate(gridMap, - // CollideSerialClient(exam2m::collisionHandler, 0)); - //d->addMesh(); + d->addMesh(); m_finished = 0; } diff --git a/src/Transfer/M2MTransfer.hpp b/src/Transfer/M2MTransfer.hpp index 3ef83f401d3..a197bcb436d 100644 --- a/src/Transfer/M2MTransfer.hpp +++ b/src/Transfer/M2MTransfer.hpp @@ -1,4 +1,6 @@ // Controller for the library +#ifndef M2MTransfer_hpp +#define M2MTransfer_hpp #include "NoWarning/m2mtransfer.decl.h" @@ -17,6 +19,9 @@ void setDestPoints(CkArrayID p, int index, tk::UnsMesh::Coords* coords, tk::Fiel class LibMain : public CBase_LibMain { public: LibMain(CkArgMsg* msg); + explicit LibMain(CkMigrateMessage* msg) : CBase_LibMain(msg) {} + void pup(PUP::er&) {} + friend void operator|( PUP::er& p, LibMain& m ) { m.pup(p); } }; class MeshData { @@ -57,3 +62,5 @@ class M2MTransfer : public CBase_M2MTransfer { }; } + +#endif // M2MTransfer_hpp diff --git a/src/Transfer/m2mtransfer.ci b/src/Transfer/m2mtransfer.ci index b9f9b4cee13..24a821ec4f1 100644 --- a/src/Transfer/m2mtransfer.ci +++ b/src/Transfer/m2mtransfer.ci @@ -9,7 +9,7 @@ module m2mtransfer { class MeshData; - mainchare LibMain { + mainchare [migratable] LibMain { entry LibMain(CkArgMsg* msg); }; From 6fcff4a2dba5471399605b52e7531fd1937e8954 Mon Sep 17 00:00:00 2001 From: adityakpandare Date: Wed, 18 Sep 2024 16:23:27 -0600 Subject: [PATCH 3/6] correctly reinitialize the collide-lib's state on restart. Goes with appropriate changes in charm-collide: https://github.com/adityakpandare/charm/tree/migratable-collidev701 --- src/Inciter/Discretization.cpp | 34 +++++++++++++++++++++++++------- src/Inciter/Discretization.hpp | 3 +-- src/Inciter/OversetFE.cpp | 23 +++++++++++++++++++-- src/Inciter/OversetFE.hpp | 3 +++ src/Inciter/oversetfe.ci | 1 + src/Transfer/M2MTransfer.cpp | 11 +++++++++++ src/Transfer/M2MTransfer.hpp | 17 +++++++++++++++- src/Transfer/TransferDetails.cpp | 9 +++++++++ 8 files changed, 89 insertions(+), 12 deletions(-) diff --git a/src/Inciter/Discretization.cpp b/src/Inciter/Discretization.cpp index f7334c2a0b6..4f646a12f1a 100644 --- a/src/Inciter/Discretization.cpp +++ b/src/Inciter/Discretization.cpp @@ -35,6 +35,12 @@ extern ctr::InputDeck g_inputdeck_defaults; } // inciter:: +namespace exam2m { + +extern CollideHandle collideHandle; + +} // exam2m:: + using inciter::Discretization; Discretization::Discretization( @@ -166,23 +172,34 @@ Discretization::Discretization( } // Register mesh with mesh-transfer lib - addMesh(); + if (m_disc.size() == 1 || m_transfer.empty()) { + // skip transfer if single mesh or if not involved in coupling + transferInit(); + } else { + if (thisIndex == 0) { + exam2m::addMesh( thisProxy, m_nchare, + CkCallback( CkIndex_Discretization::transferInit(), thisProxy ) ); + std::cout << "Disc: " << m_meshid << " called addMesh(). \n"; + } + } } void -Discretization::addMesh() +Discretization::addRestartedMesh( CkCallback cb ) // ***************************************************************************** -// Register mesh with mesh-transfer lib +// Register mesh with mesh-transfer lib on restart +//! \param[in] cb Callback to call when mesh-registration is complete. // ***************************************************************************** { if (m_disc.size() == 1 || m_transfer.empty()) { // skip transfer if single mesh or if not involved in coupling - transferInit(); + cb.send(); } else { + CollideSerialClientRestart(exam2m::collideHandle, exam2m::collisionHandler, + 0); if (thisIndex == 0) { - exam2m::addMesh( thisProxy, m_nchare, - CkCallback( CkIndex_Discretization::transferInit(), thisProxy ) ); - //std::cout << "Disc: " << m_meshid << " m2m::addMesh()\n"; + exam2m::addMesh( thisProxy, m_nchare, cb ); + std::cout << "Disc: on restart " << m_meshid << " called addMesh(). \n"; } } } @@ -209,6 +226,7 @@ Discretization::transferInit() // coupled to other solver) // ***************************************************************************** { + std::cout << "Disc: " << m_meshid << " completed addMesh(). \n"; // Compute number of mesh points owned std::size_t npoin = m_gid.size(); for (auto g : m_gid) if (tk::slave(m_nodeCommMap,g,thisIndex)) --npoin; @@ -350,6 +368,7 @@ Discretization::transfer( // Pass source and destination meshes to mesh transfer lib (if coupled) Assert( m_nsrc < m_mytransfer.size(), "Indexing out of mytransfer[src]" ); if (fromMesh == m_meshid) { + std::cout << "Disc: " << m_meshid << " setting source tets. \n"; exam2m::setSourceTets( thisProxy, thisIndex, &m_inpoel, &m_coord, u ); ++m_nsrc; } else { @@ -357,6 +376,7 @@ Discretization::transfer( } Assert( m_ndst < m_mytransfer.size(), "Indexing out of mytransfer[dst]" ); if (toMesh == m_meshid) { + std::cout << "Disc: " << m_meshid << " setting destination pts. \n"; exam2m::setDestPoints( thisProxy, thisIndex, &m_coord, u, cb_xfer ); ++m_ndst; diff --git a/src/Inciter/Discretization.hpp b/src/Inciter/Discretization.hpp index 9cde229caad..f395250c2f8 100644 --- a/src/Inciter/Discretization.hpp +++ b/src/Inciter/Discretization.hpp @@ -24,7 +24,6 @@ #include "CommMap.hpp" #include "History.hpp" #include "Inciter/InputDeck/InputDeck.hpp" -#include "M2MTransfer.hpp" #include "NoWarning/discretization.decl.h" #include "NoWarning/refiner.decl.h" @@ -94,7 +93,7 @@ class Discretization : public CBase_Discretization { static void registerReducers(); //! Register mesh with mesh-transfer lib - void addMesh(); + void addRestartedMesh( CkCallback cb ); //! Start computing new mesh veloctity for ALE mesh motion void meshvelStart( diff --git a/src/Inciter/OversetFE.cpp b/src/Inciter/OversetFE.cpp index f7245f34372..a26095a5063 100644 --- a/src/Inciter/OversetFE.cpp +++ b/src/Inciter/OversetFE.cpp @@ -594,6 +594,9 @@ OversetFE::transferSol() // Transfer solution to other solver and mesh if coupled // ***************************************************************************** { + std::cout << "Overset " << Disc()->MeshId() + << " in transferSol(), m_ixfer: " << m_ixfer << "\n"; + // Set up transfer-flags for receiving mesh if (m_ixfer == 1) { applySolTransfer(0); @@ -601,6 +604,9 @@ OversetFE::transferSol() setTransferFlags(m_ixfer); ++m_ixfer; + std::cout << "Overset " << Disc()->MeshId() + << " transferflags set, m_ixfer: " << m_ixfer << "\n"; + // Initiate IC transfer (if coupled) Disc()->transfer( m_uc, m_ixfer-1, CkCallback(CkIndex_OversetFE::lhs(), thisProxy[thisIndex]) ); @@ -1407,7 +1413,7 @@ OversetFE::out() void OversetFE::evalLB( int nrestart ) // ***************************************************************************** -// Evaluate whether to do load balancing +// Begins to evaluate whether to do load balancing, but actually just adds mesh //! \param[in] nrestart Number of times restarted // ***************************************************************************** { @@ -1416,9 +1422,22 @@ OversetFE::evalLB( int nrestart ) // Detect if just returned from a checkpoint and if so, zero timers and // finished flag if (d->restarted( nrestart )) { - d->addMesh(); + d->addRestartedMesh( + CkCallback(CkIndex_OversetFE::continueEvalLB(), thisProxy) ); m_finished = 0; } + else + continueEvalLB(); +} + +void +OversetFE::continueEvalLB() +// ***************************************************************************** +// Continue evaluating whether to do load balancing and proceed to next step +// ***************************************************************************** +{ + auto d = Disc(); + std::cout << "Overset " << d->MeshId() << " in continueEvalLB. \n"; const auto lbfreq = g_inputdeck.get< tag::cmd, tag::lbfreq >(); const auto nonblocking = g_inputdeck.get< tag::cmd, tag::nonblocking >(); diff --git a/src/Inciter/OversetFE.hpp b/src/Inciter/OversetFE.hpp index 71a0da26fbb..5a9032eecee 100644 --- a/src/Inciter/OversetFE.hpp +++ b/src/Inciter/OversetFE.hpp @@ -157,6 +157,9 @@ class OversetFE : public CBase_OversetFE { // Evaluate whether to do load balancing void evalLB( int nrestart ); + // Continue evaluating whether to do load balancing and proceed to next step + void continueEvalLB(); + //! Evaluate whether to continue with next time step stage void stage(); diff --git a/src/Inciter/oversetfe.ci b/src/Inciter/oversetfe.ci index b5a7519c73d..b5c37e882a2 100644 --- a/src/Inciter/oversetfe.ci +++ b/src/Inciter/oversetfe.ci @@ -62,6 +62,7 @@ module oversetfe { entry void next(); entry void stage(); entry void evalLB( int nrestart ); + entry void continueEvalLB(); //! [Entry methods] // SDAG code follows. See http://charm.cs.illinois.edu/manuals/html/ diff --git a/src/Transfer/M2MTransfer.cpp b/src/Transfer/M2MTransfer.cpp index c73e69c72e2..fec12e4c071 100644 --- a/src/Transfer/M2MTransfer.cpp +++ b/src/Transfer/M2MTransfer.cpp @@ -4,6 +4,7 @@ #include "TransferDetails.hpp" #include +#include namespace exam2m { @@ -49,13 +50,22 @@ void setDestPoints(CkArrayID p, int index, tk::UnsMesh::Coords* coords, tk::Fiel } LibMain::LibMain(CkArgMsg* msg) { + std::cout << "LibMain() called..." << std::endl; delete msg; m2mtransferProxy = CProxy_M2MTransfer::ckNew(); + setProxies(); + std::cout << "LibMain() cmplt." << std::endl; +} +void LibMain::setProxies() +{ + std::cout << " in setProxies..." << std::endl; // TODO: Need to make sure this is actually correct CollideGrid3d gridMap(CkVector3d(0, 0, 0),CkVector3d(2, 100, 2)); + std::cout << " gridMap initialized..." << std::endl; collideHandle = CollideCreate(gridMap, CollideSerialClient(collisionHandler, 0)); + std::cout << " setProxies cmplt." << std::endl; } M2MTransfer::M2MTransfer() : current_chunk(0) {} @@ -72,6 +82,7 @@ void M2MTransfer::addMesh(CkArrayID p, int elem, CkCallback cb) { mesh.m_proxy = CProxy_TransferDetails::ckNew(p, mesh, cb, opts); proxyMap[id] = mesh; current_chunk += elem; + std::cout << "M2MTransfer::addMesh() cmplt. " << thisIndex << std::endl; } else { CkAbort("Uhoh...\n"); } diff --git a/src/Transfer/M2MTransfer.hpp b/src/Transfer/M2MTransfer.hpp index a197bcb436d..11a8d499a1b 100644 --- a/src/Transfer/M2MTransfer.hpp +++ b/src/Transfer/M2MTransfer.hpp @@ -7,6 +7,8 @@ #include "collidecharm.h" #include "Fields.hpp" +#include + namespace exam2m { void collisionHandler( [[maybe_unused]] void *param, @@ -19,9 +21,18 @@ void setDestPoints(CkArrayID p, int index, tk::UnsMesh::Coords* coords, tk::Fiel class LibMain : public CBase_LibMain { public: LibMain(CkArgMsg* msg); - explicit LibMain(CkMigrateMessage* msg) : CBase_LibMain(msg) {} + explicit LibMain(CkMigrateMessage* msg) : CBase_LibMain(msg) { + std::cout << "LibMain() migrate ctor called..." << std::endl; + + //setProxies(); + + std::cout << "LibMain() migrate ctor cmplt." << std::endl; + } void pup(PUP::er&) {} friend void operator|( PUP::er& p, LibMain& m ) { m.pup(p); } + +private: + void setProxies(); }; class MeshData { @@ -59,6 +70,10 @@ class M2MTransfer : public CBase_M2MTransfer { void setDestPoints(CkArrayID p, int index, tk::UnsMesh::Coords* coords, tk::Fields& u, CkCallback cb); void distributeCollisions(int nColl, Collision* colls); + + void pup(PUP::er& p) { + } + friend void operator|( PUP::er& p, M2MTransfer& m ) { m.pup(p); } }; } diff --git a/src/Transfer/TransferDetails.cpp b/src/Transfer/TransferDetails.cpp index b26f190e817..7f840bd5304 100644 --- a/src/Transfer/TransferDetails.cpp +++ b/src/Transfer/TransferDetails.cpp @@ -16,6 +16,7 @@ #include "M2MTransfer.hpp" #include "collidecharm.h" +#include #if defined(__clang__) #pragma clang diagnostic push @@ -43,10 +44,12 @@ TransferDetails::TransferDetails( CkArrayID p, MeshData d, CkCallback cb ) : //! \param[in] cb Callback to inform application that the library is ready // ***************************************************************************** { + std::cout << "TransferDetails() ctor beginning... " << thisIndex << std::endl; CollideRegister(collideHandle, m_firstchunk + thisIndex); d.m_proxy = thisProxy; m2mtransferProxy.ckLocalBranch()->setMesh( p, d ); contribute(cb); + std::cout << "TransferDetails() ctor complete. " << thisIndex << std::endl; } void @@ -123,6 +126,9 @@ TransferDetails::collideVertices() prio[nBoxes] = firstchunk; ++nBoxes; } + + std::cout << "colliding vertices " << nBoxes << std::endl; + CollideBoxesPrio( collideHandle, firstchunk + thisIndex, static_cast(nBoxes), boxes.data(), prio.data() ); } @@ -149,6 +155,9 @@ TransferDetails::collideTets() const boxes[i].add(CkVector3d(coord[0][p], coord[1][p], coord[2][p])); } } + + std::cout << "colliding tets " << nBoxes << std::endl; + CollideBoxesPrio( collideHandle, firstchunk + thisIndex, static_cast(nBoxes), boxes.data(), prio.data() ); } From a95be4a4e4f23f7a3e0193213606d338e2261f27 Mon Sep 17 00:00:00 2001 From: adityakpandare Date: Thu, 19 Sep 2024 08:38:51 -0600 Subject: [PATCH 4/6] removed some unnecessary changes --- src/Transfer/M2MTransfer.cpp | 9 +-------- src/Transfer/M2MTransfer.hpp | 7 ------- 2 files changed, 1 insertion(+), 15 deletions(-) diff --git a/src/Transfer/M2MTransfer.cpp b/src/Transfer/M2MTransfer.cpp index fec12e4c071..fb1c7826923 100644 --- a/src/Transfer/M2MTransfer.cpp +++ b/src/Transfer/M2MTransfer.cpp @@ -53,19 +53,12 @@ LibMain::LibMain(CkArgMsg* msg) { std::cout << "LibMain() called..." << std::endl; delete msg; m2mtransferProxy = CProxy_M2MTransfer::ckNew(); - setProxies(); - std::cout << "LibMain() cmplt." << std::endl; -} -void LibMain::setProxies() -{ - std::cout << " in setProxies..." << std::endl; // TODO: Need to make sure this is actually correct CollideGrid3d gridMap(CkVector3d(0, 0, 0),CkVector3d(2, 100, 2)); - std::cout << " gridMap initialized..." << std::endl; collideHandle = CollideCreate(gridMap, CollideSerialClient(collisionHandler, 0)); - std::cout << " setProxies cmplt." << std::endl; + std::cout << "LibMain() cmplt." << std::endl; } M2MTransfer::M2MTransfer() : current_chunk(0) {} diff --git a/src/Transfer/M2MTransfer.hpp b/src/Transfer/M2MTransfer.hpp index 11a8d499a1b..81300162c6b 100644 --- a/src/Transfer/M2MTransfer.hpp +++ b/src/Transfer/M2MTransfer.hpp @@ -22,17 +22,10 @@ class LibMain : public CBase_LibMain { public: LibMain(CkArgMsg* msg); explicit LibMain(CkMigrateMessage* msg) : CBase_LibMain(msg) { - std::cout << "LibMain() migrate ctor called..." << std::endl; - - //setProxies(); - std::cout << "LibMain() migrate ctor cmplt." << std::endl; } void pup(PUP::er&) {} friend void operator|( PUP::er& p, LibMain& m ) { m.pup(p); } - -private: - void setProxies(); }; class MeshData { From c7acd723ee15496ce368cee5308bfad25c0aa54b Mon Sep 17 00:00:00 2001 From: adityakpandare Date: Thu, 19 Sep 2024 12:02:14 -0600 Subject: [PATCH 5/6] some comments in m2mtransfer --- src/Transfer/M2MTransfer.cpp | 11 ++++++++++- src/Transfer/M2MTransfer.hpp | 16 ++++++++++++++-- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/src/Transfer/M2MTransfer.cpp b/src/Transfer/M2MTransfer.cpp index fb1c7826923..836e3e1c637 100644 --- a/src/Transfer/M2MTransfer.cpp +++ b/src/Transfer/M2MTransfer.cpp @@ -63,7 +63,16 @@ LibMain::LibMain(CkArgMsg* msg) { M2MTransfer::M2MTransfer() : current_chunk(0) {} -void M2MTransfer::addMesh(CkArrayID p, int elem, CkCallback cb) { +void M2MTransfer::addMesh(CkArrayID p, int elem, CkCallback cb) +// ***************************************************************************** +// Register mesh with the mesh-to-mesh transfer library +//! \param[in] p Proxy from which this function call originated +//! \param[in] elem Total number of chares in the application +//! \param[in] cb Callback to inform application that the library is ready +//! \details This function registers a mesh with M2MTransfer. This needs to +//! be called during normal execution and when restarting from checkpoint. +// ***************************************************************************** +{ auto id = static_cast(CkGroupID(p).idx); if (proxyMap.count(id) == 0) { CkArrayOptions opts; diff --git a/src/Transfer/M2MTransfer.hpp b/src/Transfer/M2MTransfer.hpp index 81300162c6b..a1aa26d3f94 100644 --- a/src/Transfer/M2MTransfer.hpp +++ b/src/Transfer/M2MTransfer.hpp @@ -11,6 +11,7 @@ namespace exam2m { +//! External user interface functions to M2MTransfer void collisionHandler( [[maybe_unused]] void *param, int nColl, Collision *colls ); @@ -18,6 +19,7 @@ void addMesh(CkArrayID p, int elem, CkCallback cb); void setSourceTets(CkArrayID p, int index, std::vector< std::size_t >* inpoel, tk::UnsMesh::Coords* coords, const tk::Fields& u); void setDestPoints(CkArrayID p, int index, tk::UnsMesh::Coords* coords, tk::Fields& u, CkCallback cb); +//! LibMain mainchare that creates collidecharm-proxies at startup class LibMain : public CBase_LibMain { public: LibMain(CkArgMsg* msg); @@ -28,6 +30,7 @@ class LibMain : public CBase_LibMain { friend void operator|( PUP::er& p, LibMain& m ) { m.pup(p); } }; +//! MeshData class that contains the mesh class MeshData { public: CProxy_TransferDetails m_proxy; @@ -40,6 +43,7 @@ class MeshData { } }; +//! M2MTransfer chare-group which is inciter's interface to collidecharm class M2MTransfer : public CBase_M2MTransfer { private: std::unordered_map proxyMap; @@ -47,16 +51,23 @@ class M2MTransfer : public CBase_M2MTransfer { CmiUInt8 m_sourcemesh, m_destmesh; public: + + //! Constructor M2MTransfer(); + #if defined(__clang__) #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wundefined-func-template" #endif + //! Migrate constructor explicit M2MTransfer( CkMigrateMessage* m ) : CBase_M2MTransfer( m ) {} #if defined(__clang__) #pragma clang diagnostic pop #endif + + //! Register mesh with the mesh-to-mesh transfer library void addMesh(CkArrayID p, int elem, CkCallback cb); + void setMesh(CkArrayID p, MeshData d); void setSourceTets(CkArrayID p, int index, std::vector< std::size_t >* inpoel, tk::UnsMesh::Coords* coords, const tk::Fields& u); @@ -64,8 +75,9 @@ class M2MTransfer : public CBase_M2MTransfer { tk::Fields& u, CkCallback cb); void distributeCollisions(int nColl, Collision* colls); - void pup(PUP::er& p) { - } + //! Pack/Unpack serialize member function for Charm + void pup(PUP::er&) {} + //! Pack/Unpack serialize operator| for Charm friend void operator|( PUP::er& p, M2MTransfer& m ) { m.pup(p); } }; From b53dbb0915d99fb74a48cef7b62caeabf5bab0c2 Mon Sep 17 00:00:00 2001 From: Advait Tahilyani Date: Thu, 2 Apr 2026 18:42:01 -0500 Subject: [PATCH 6/6] Initial Changes --- src/Control/Inciter/CmdLine/CmdLine.hpp | 24 +++++++++++++++++++++++- src/Inciter/Discretization.cpp | 25 +++++++++++++++++++++---- src/Inciter/Discretization.hpp | 5 +++++ src/Inciter/discretization.ci | 1 + src/Transfer/M2MTransfer.cpp | 1 + 5 files changed, 51 insertions(+), 5 deletions(-) diff --git a/src/Control/Inciter/CmdLine/CmdLine.hpp b/src/Control/Inciter/CmdLine/CmdLine.hpp index 0c0ef976e1e..b2f804cc6c3 100644 --- a/src/Control/Inciter/CmdLine/CmdLine.hpp +++ b/src/Control/Inciter/CmdLine/CmdLine.hpp @@ -169,7 +169,29 @@ class CmdLine : public tk::TaggedTuple< CmdLineMembers > { ///@{ //! \brief Pack/Unpack serialize member function //! \param[in,out] p Charm++'s PUP::er serializer object reference - void pup( PUP::er& p ) { tk::TaggedTuple< CmdLineMembers >::pup(p); } + //! \details cmdinfo, ctrinfo, and helpkw are intentionally skipped: they + //! are always reconstructed identically by the constructors from + //! compile-time string literals and must not be checkpointed, as their + //! large size (MB of help text) can corrupt the PUP stream offset. + void pup( PUP::er& p ) { + p | get< tag::io >(); + p | get< tag::virtualization >(); + p | get< tag::verbose >(); + p | get< tag::chare >(); + p | get< tag::nonblocking >(); + p | get< tag::benchmark >(); + p | get< tag::feedback >(); + p | get< tag::help >(); + p | get< tag::helpctr >(); + p | get< tag::quiescence >(); + p | get< tag::trace >(); + p | get< tag::version >(); + p | get< tag::license >(); + p | get< tag::error >(); + p | get< tag::lbfreq >(); + p | get< tag::rsfreq >(); + // tag::cmdinfo, tag::ctrinfo, tag::helpkw intentionally not PUP'd + } //! \brief Pack/Unpack serialize operator| //! \param[in,out] p Charm++'s PUP::er serializer object reference //! \param[in,out] c CmdLine object reference diff --git a/src/Inciter/Discretization.cpp b/src/Inciter/Discretization.cpp index ce19d8636ad..f8017598071 100644 --- a/src/Inciter/Discretization.cpp +++ b/src/Inciter/Discretization.cpp @@ -197,17 +197,34 @@ Discretization::addRestartedMesh( CkCallback cb ) // skip transfer if single mesh or if not involved in coupling cb.send(); } else { + // Store the callback so collideRestartDone() (element 0) can forward it + // to addMesh once all PEs have finished reinitClient. + m_restartcb = cb; + // Reinitialize the collision client on this PE's collideMgr branch. + // Contribute to a barrier so ALL PEs finish reinitClient before element 0 + // calls addMesh. Without this, collideMgr on some PEs may not yet be + // restored from checkpoint when the first collision messages arrive, + // causing "group proxy not initialized" (CmiAbort / SIGSEGV on restart). CollideSerialClientRestart(exam2m::collideHandle, exam2m::collisionHandler, 0); - if (thisIndex == 0) { - exam2m::addMesh( thisProxy, m_nchare, cb ); - std::cout << "Disc: on restart " << m_meshid << " called addMesh(). \n"; - } + contribute( CkCallback(CkIndex_Discretization::collideRestartDone(), thisProxy) ); } // Array elements must not use the chare_objs table chareIdx = -1; } +void +Discretization::collideRestartDone() +// ***************************************************************************** +// Called on element 0 once every chare has finished CollideSerialClientRestart +// ***************************************************************************** +{ + if (thisIndex == 0) { + std::cout << "Disc: on restart " << m_meshid << " called addMesh(). \n"; + exam2m::addMesh( thisProxy, m_nchare, m_restartcb ); + } +} + std::unordered_map< std::size_t, std::size_t > Discretization::genBid() // ***************************************************************************** diff --git a/src/Inciter/Discretization.hpp b/src/Inciter/Discretization.hpp index d12f9f89751..c965022dfc0 100644 --- a/src/Inciter/Discretization.hpp +++ b/src/Inciter/Discretization.hpp @@ -96,6 +96,9 @@ class Discretization : public CBase_Discretization { //! Register mesh with mesh-transfer lib void addRestartedMesh( CkCallback cb ); + //! Barrier target: all PEs done with CollideSerialClientRestart + void collideRestartDone(); + //! Start computing new mesh veloctity for ALE mesh motion void meshvelStart( const tk::UnsMesh::Coords vel, @@ -517,6 +520,8 @@ class Discretization : public CBase_Discretization { //! \brief Charm++ callback of the function to call after a mesh-to-mesh //! solution transfer (to-and-fro) is complete CkCallback m_transfer_complete; + //! Callback stored during addRestartedMesh, forwarded by collideRestartDone + CkCallback m_restartcb; //! Solution/mesh transfer (coupling) information coordination propagation //! \details This has the same size with the same src/dst information on //! all solvers. diff --git a/src/Inciter/discretization.ci b/src/Inciter/discretization.ci index 755f4f7b0d7..e85c7aa8ed9 100644 --- a/src/Inciter/discretization.ci +++ b/src/Inciter/discretization.ci @@ -47,6 +47,7 @@ module discretization { const std::vector< tk::real >& nodevol ); entry void stat( tk::real mesh_volume ); entry void transferInit(); + entry void collideRestartDone(); entry void transfer_complete(); entry void to_complete(); entry void from_complete(); diff --git a/src/Transfer/M2MTransfer.cpp b/src/Transfer/M2MTransfer.cpp index 836e3e1c637..20e1fbe3414 100644 --- a/src/Transfer/M2MTransfer.cpp +++ b/src/Transfer/M2MTransfer.cpp @@ -61,6 +61,7 @@ LibMain::LibMain(CkArgMsg* msg) { std::cout << "LibMain() cmplt." << std::endl; } + M2MTransfer::M2MTransfer() : current_chunk(0) {} void M2MTransfer::addMesh(CkArrayID p, int elem, CkCallback cb)