diff --git a/src/Control/Inciter/CmdLine/CmdLine.hpp b/src/Control/Inciter/CmdLine/CmdLine.hpp index 0c0ef976e1e..b2f804cc6c3 100644 --- a/src/Control/Inciter/CmdLine/CmdLine.hpp +++ b/src/Control/Inciter/CmdLine/CmdLine.hpp @@ -169,7 +169,29 @@ class CmdLine : public tk::TaggedTuple< CmdLineMembers > { ///@{ //! \brief Pack/Unpack serialize member function //! \param[in,out] p Charm++'s PUP::er serializer object reference - void pup( PUP::er& p ) { tk::TaggedTuple< CmdLineMembers >::pup(p); } + //! \details cmdinfo, ctrinfo, and helpkw are intentionally skipped: they + //! are always reconstructed identically by the constructors from + //! compile-time string literals and must not be checkpointed, as their + //! large size (MB of help text) can corrupt the PUP stream offset. + void pup( PUP::er& p ) { + p | get< tag::io >(); + p | get< tag::virtualization >(); + p | get< tag::verbose >(); + p | get< tag::chare >(); + p | get< tag::nonblocking >(); + p | get< tag::benchmark >(); + p | get< tag::feedback >(); + p | get< tag::help >(); + p | get< tag::helpctr >(); + p | get< tag::quiescence >(); + p | get< tag::trace >(); + p | get< tag::version >(); + p | get< tag::license >(); + p | get< tag::error >(); + p | get< tag::lbfreq >(); + p | get< tag::rsfreq >(); + // tag::cmdinfo, tag::ctrinfo, tag::helpkw intentionally not PUP'd + } //! \brief Pack/Unpack serialize operator| //! \param[in,out] p Charm++'s PUP::er serializer object reference //! \param[in,out] c CmdLine object reference diff --git a/src/Inciter/Discretization.cpp b/src/Inciter/Discretization.cpp index e6304c7b80c..f8017598071 100644 --- a/src/Inciter/Discretization.cpp +++ b/src/Inciter/Discretization.cpp @@ -35,6 +35,12 @@ extern ctr::InputDeck g_inputdeck_defaults; } // inciter:: +namespace exam2m { + +extern CollideHandle collideHandle; + +} // exam2m:: + using inciter::Discretization; Discretization::Discretization( @@ -175,13 +181,50 @@ Discretization::Discretization( if (thisIndex == 0) { exam2m::addMesh( thisProxy, m_nchare, CkCallback( CkIndex_Discretization::transferInit(), thisProxy ) ); - //std::cout << "Disc: " << m_meshid << " m2m::addMesh()\n"; + std::cout << "Disc: " << m_meshid << " called addMesh(). \n"; } } +} + +void +Discretization::addRestartedMesh( CkCallback cb ) +// ***************************************************************************** +// Register mesh with mesh-transfer lib on restart +//! \param[in] cb Callback to call when mesh-registration is complete. +// ***************************************************************************** +{ + if (m_disc.size() == 1 || m_transfer.empty()) { + // skip transfer if single mesh or if not involved in coupling + cb.send(); + } else { + // Store the callback so collideRestartDone() (element 0) can forward it + // to addMesh once all PEs have finished reinitClient. + m_restartcb = cb; + // Reinitialize the collision client on this PE's collideMgr branch. + // Contribute to a barrier so ALL PEs finish reinitClient before element 0 + // calls addMesh. Without this, collideMgr on some PEs may not yet be + // restored from checkpoint when the first collision messages arrive, + // causing "group proxy not initialized" (CmiAbort / SIGSEGV on restart). + CollideSerialClientRestart(exam2m::collideHandle, exam2m::collisionHandler, + 0); + contribute( CkCallback(CkIndex_Discretization::collideRestartDone(), thisProxy) ); + } // Array elements must not use the chare_objs table chareIdx = -1; } +void +Discretization::collideRestartDone() +// ***************************************************************************** +// Called on element 0 once every chare has finished CollideSerialClientRestart +// ***************************************************************************** +{ + if (thisIndex == 0) { + std::cout << "Disc: on restart " << m_meshid << " called addMesh(). \n"; + exam2m::addMesh( thisProxy, m_nchare, m_restartcb ); + } +} + std::unordered_map< std::size_t, std::size_t > Discretization::genBid() // ***************************************************************************** @@ -204,6 +247,7 @@ Discretization::transferInit() // coupled to other solver) // ***************************************************************************** { + std::cout << "Disc: " << m_meshid << " completed addMesh(). \n"; // Compute number of mesh points owned std::size_t npoin = m_gid.size(); for (auto g : m_gid) if (tk::slave(m_nodeCommMap,g,thisIndex)) --npoin; @@ -345,6 +389,7 @@ Discretization::transfer( // Pass source and destination meshes to mesh transfer lib (if coupled) Assert( m_nsrc < m_mytransfer.size(), "Indexing out of mytransfer[src]" ); if (fromMesh == m_meshid) { + std::cout << "Disc: " << m_meshid << " setting source tets. \n"; exam2m::setSourceTets( thisProxy, thisIndex, &m_inpoel, &m_coord, u ); ++m_nsrc; } else { @@ -352,6 +397,7 @@ Discretization::transfer( } Assert( m_ndst < m_mytransfer.size(), "Indexing out of mytransfer[dst]" ); if (toMesh == m_meshid) { + std::cout << "Disc: " << m_meshid << " setting destination pts. \n"; exam2m::setDestPoints( thisProxy, thisIndex, &m_coord, u, cb_xfer ); ++m_ndst; diff --git a/src/Inciter/Discretization.hpp b/src/Inciter/Discretization.hpp index 1bdf472f8db..c965022dfc0 100644 --- a/src/Inciter/Discretization.hpp +++ b/src/Inciter/Discretization.hpp @@ -93,6 +93,12 @@ class Discretization : public CBase_Discretization { //! Configure Charm++ reduction types static void registerReducers(); + //! Register mesh with mesh-transfer lib + void addRestartedMesh( CkCallback cb ); + + //! Barrier target: all PEs done with CollideSerialClientRestart + void collideRestartDone(); + //! Start computing new mesh veloctity for ALE mesh motion void meshvelStart( const tk::UnsMesh::Coords vel, @@ -514,6 +520,8 @@ class Discretization : public CBase_Discretization { //! \brief Charm++ callback of the function to call after a mesh-to-mesh //! solution transfer (to-and-fro) is complete CkCallback m_transfer_complete; + //! Callback stored during addRestartedMesh, forwarded by collideRestartDone + CkCallback m_restartcb; //! Solution/mesh transfer (coupling) information coordination propagation //! \details This has the same size with the same src/dst information on //! all solvers. diff --git a/src/Inciter/OversetFE.cpp b/src/Inciter/OversetFE.cpp index 3d652f37d86..713860fb686 100644 --- a/src/Inciter/OversetFE.cpp +++ b/src/Inciter/OversetFE.cpp @@ -667,6 +667,9 @@ OversetFE::transferSol() // Transfer solution to other solver and mesh if coupled // ***************************************************************************** { + std::cout << "Overset " << Disc()->MeshId() + << " in transferSol(), m_ixfer: " << m_ixfer << "\n"; + // Set up transfer-flags for receiving mesh if (m_ixfer == 1) { applySolTransfer(0); @@ -674,6 +677,9 @@ OversetFE::transferSol() setTransferFlags(m_ixfer); ++m_ixfer; + std::cout << "Overset " << Disc()->MeshId() + << " transferflags set, m_ixfer: " << m_ixfer << "\n"; + // Initiate IC transfer (if coupled) Disc()->transfer( m_uc, m_ixfer-1, CkCallback(CkIndex_OversetFE::lhs(), thisProxy[thisIndex]) ); @@ -1677,7 +1683,7 @@ OversetFE::out() void OversetFE::evalLB( int nrestart ) // ***************************************************************************** -// Evaluate whether to do load balancing +// Begins to evaluate whether to do load balancing, but actually just adds mesh //! \param[in] nrestart Number of times restarted // ***************************************************************************** { @@ -1685,7 +1691,23 @@ OversetFE::evalLB( int nrestart ) // Detect if just returned from a checkpoint and if so, zero timers and // finished flag - if (d->restarted( nrestart )) m_finished = 0; + if (d->restarted( nrestart )) { + d->addRestartedMesh( + CkCallback(CkIndex_OversetFE::continueEvalLB(), thisProxy) ); + m_finished = 0; + } + else + continueEvalLB(); +} + +void +OversetFE::continueEvalLB() +// ***************************************************************************** +// Continue evaluating whether to do load balancing and proceed to next step +// ***************************************************************************** +{ + auto d = Disc(); + std::cout << "Overset " << d->MeshId() << " in continueEvalLB. \n"; const auto lbfreq = g_inputdeck.get< tag::cmd, tag::lbfreq >(); const auto nonblocking = g_inputdeck.get< tag::cmd, tag::nonblocking >(); diff --git a/src/Inciter/OversetFE.hpp b/src/Inciter/OversetFE.hpp index 6e0f1bc6cb2..f915590d2bc 100644 --- a/src/Inciter/OversetFE.hpp +++ b/src/Inciter/OversetFE.hpp @@ -157,6 +157,9 @@ class OversetFE : public CBase_OversetFE { // Evaluate whether to do load balancing void evalLB( int nrestart ); + // Continue evaluating whether to do load balancing and proceed to next step + void continueEvalLB(); + //! Evaluate whether to continue with next time step stage void stage(); diff --git a/src/Inciter/discretization.ci b/src/Inciter/discretization.ci index 755f4f7b0d7..e85c7aa8ed9 100644 --- a/src/Inciter/discretization.ci +++ b/src/Inciter/discretization.ci @@ -47,6 +47,7 @@ module discretization { const std::vector< tk::real >& nodevol ); entry void stat( tk::real mesh_volume ); entry void transferInit(); + entry void collideRestartDone(); entry void transfer_complete(); entry void to_complete(); entry void from_complete(); diff --git a/src/Inciter/oversetfe.ci b/src/Inciter/oversetfe.ci index 935a7226594..4ca7778f7ee 100644 --- a/src/Inciter/oversetfe.ci +++ b/src/Inciter/oversetfe.ci @@ -62,6 +62,7 @@ module oversetfe { entry void next(); entry void stage(); entry void evalLB( int nrestart ); + entry void continueEvalLB(); //! [Entry methods] // SDAG code follows. See http://charm.cs.illinois.edu/manuals/html/ diff --git a/src/Transfer/M2MTransfer.cpp b/src/Transfer/M2MTransfer.cpp index c73e69c72e2..20e1fbe3414 100644 --- a/src/Transfer/M2MTransfer.cpp +++ b/src/Transfer/M2MTransfer.cpp @@ -4,6 +4,7 @@ #include "TransferDetails.hpp" #include +#include namespace exam2m { @@ -49,6 +50,7 @@ void setDestPoints(CkArrayID p, int index, tk::UnsMesh::Coords* coords, tk::Fiel } LibMain::LibMain(CkArgMsg* msg) { + std::cout << "LibMain() called..." << std::endl; delete msg; m2mtransferProxy = CProxy_M2MTransfer::ckNew(); @@ -56,11 +58,22 @@ LibMain::LibMain(CkArgMsg* msg) { CollideGrid3d gridMap(CkVector3d(0, 0, 0),CkVector3d(2, 100, 2)); collideHandle = CollideCreate(gridMap, CollideSerialClient(collisionHandler, 0)); + std::cout << "LibMain() cmplt." << std::endl; } + M2MTransfer::M2MTransfer() : current_chunk(0) {} -void M2MTransfer::addMesh(CkArrayID p, int elem, CkCallback cb) { +void M2MTransfer::addMesh(CkArrayID p, int elem, CkCallback cb) +// ***************************************************************************** +// Register mesh with the mesh-to-mesh transfer library +//! \param[in] p Proxy from which this function call originated +//! \param[in] elem Total number of chares in the application +//! \param[in] cb Callback to inform application that the library is ready +//! \details This function registers a mesh with M2MTransfer. This needs to +//! be called during normal execution and when restarting from checkpoint. +// ***************************************************************************** +{ auto id = static_cast(CkGroupID(p).idx); if (proxyMap.count(id) == 0) { CkArrayOptions opts; @@ -72,6 +85,7 @@ void M2MTransfer::addMesh(CkArrayID p, int elem, CkCallback cb) { mesh.m_proxy = CProxy_TransferDetails::ckNew(p, mesh, cb, opts); proxyMap[id] = mesh; current_chunk += elem; + std::cout << "M2MTransfer::addMesh() cmplt. " << thisIndex << std::endl; } else { CkAbort("Uhoh...\n"); } diff --git a/src/Transfer/M2MTransfer.hpp b/src/Transfer/M2MTransfer.hpp index 29397447970..a1aa26d3f94 100644 --- a/src/Transfer/M2MTransfer.hpp +++ b/src/Transfer/M2MTransfer.hpp @@ -1,21 +1,36 @@ // Controller for the library +#ifndef M2MTransfer_hpp +#define M2MTransfer_hpp #include "NoWarning/m2mtransfer.decl.h" #include "collidecharm.h" #include "Fields.hpp" +#include + namespace exam2m { +//! External user interface functions to M2MTransfer +void collisionHandler( [[maybe_unused]] void *param, + int nColl, + Collision *colls ); void addMesh(CkArrayID p, int elem, CkCallback cb); void setSourceTets(CkArrayID p, int index, std::vector< std::size_t >* inpoel, tk::UnsMesh::Coords* coords, const tk::Fields& u); void setDestPoints(CkArrayID p, int index, tk::UnsMesh::Coords* coords, tk::Fields& u, CkCallback cb); +//! LibMain mainchare that creates collidecharm-proxies at startup class LibMain : public CBase_LibMain { public: LibMain(CkArgMsg* msg); + explicit LibMain(CkMigrateMessage* msg) : CBase_LibMain(msg) { + std::cout << "LibMain() migrate ctor cmplt." << std::endl; + } + void pup(PUP::er&) {} + friend void operator|( PUP::er& p, LibMain& m ) { m.pup(p); } }; +//! MeshData class that contains the mesh class MeshData { public: CProxy_TransferDetails m_proxy; @@ -28,6 +43,7 @@ class MeshData { } }; +//! M2MTransfer chare-group which is inciter's interface to collidecharm class M2MTransfer : public CBase_M2MTransfer { private: std::unordered_map proxyMap; @@ -35,22 +51,36 @@ class M2MTransfer : public CBase_M2MTransfer { CmiUInt8 m_sourcemesh, m_destmesh; public: + + //! Constructor M2MTransfer(); + #if defined(__clang__) #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wundefined-func-template" #endif + //! Migrate constructor explicit M2MTransfer( CkMigrateMessage* m ) : CBase_M2MTransfer( m ) {} #if defined(__clang__) #pragma clang diagnostic pop #endif + + //! Register mesh with the mesh-to-mesh transfer library void addMesh(CkArrayID p, int elem, CkCallback cb); + void setMesh(CkArrayID p, MeshData d); void setSourceTets(CkArrayID p, int index, std::vector< std::size_t >* inpoel, tk::UnsMesh::Coords* coords, const tk::Fields& u); void setDestPoints(CkArrayID p, int index, tk::UnsMesh::Coords* coords, tk::Fields& u, CkCallback cb); void distributeCollisions(int nColl, Collision* colls); + + //! Pack/Unpack serialize member function for Charm + void pup(PUP::er&) {} + //! Pack/Unpack serialize operator| for Charm + friend void operator|( PUP::er& p, M2MTransfer& m ) { m.pup(p); } }; } + +#endif // M2MTransfer_hpp diff --git a/src/Transfer/TransferDetails.cpp b/src/Transfer/TransferDetails.cpp index b26f190e817..7f840bd5304 100644 --- a/src/Transfer/TransferDetails.cpp +++ b/src/Transfer/TransferDetails.cpp @@ -16,6 +16,7 @@ #include "M2MTransfer.hpp" #include "collidecharm.h" +#include #if defined(__clang__) #pragma clang diagnostic push @@ -43,10 +44,12 @@ TransferDetails::TransferDetails( CkArrayID p, MeshData d, CkCallback cb ) : //! \param[in] cb Callback to inform application that the library is ready // ***************************************************************************** { + std::cout << "TransferDetails() ctor beginning... " << thisIndex << std::endl; CollideRegister(collideHandle, m_firstchunk + thisIndex); d.m_proxy = thisProxy; m2mtransferProxy.ckLocalBranch()->setMesh( p, d ); contribute(cb); + std::cout << "TransferDetails() ctor complete. " << thisIndex << std::endl; } void @@ -123,6 +126,9 @@ TransferDetails::collideVertices() prio[nBoxes] = firstchunk; ++nBoxes; } + + std::cout << "colliding vertices " << nBoxes << std::endl; + CollideBoxesPrio( collideHandle, firstchunk + thisIndex, static_cast(nBoxes), boxes.data(), prio.data() ); } @@ -149,6 +155,9 @@ TransferDetails::collideTets() const boxes[i].add(CkVector3d(coord[0][p], coord[1][p], coord[2][p])); } } + + std::cout << "colliding tets " << nBoxes << std::endl; + CollideBoxesPrio( collideHandle, firstchunk + thisIndex, static_cast(nBoxes), boxes.data(), prio.data() ); } diff --git a/src/Transfer/TransferDetails.hpp b/src/Transfer/TransferDetails.hpp index e8f5c6091af..0d5cfc3874f 100644 --- a/src/Transfer/TransferDetails.hpp +++ b/src/Transfer/TransferDetails.hpp @@ -80,19 +80,6 @@ class TransferDetails : public CBase_TransferDetails { //! Transfer the interpolated solution data back to destination mesh void transferSolution( const std::vector< SolutionData >& soln ); - /** @name Charm++ pack/unpack serializer member functions */ - ///@{ - //! \brief Pack/Unpack serialize member function - //! \param[in,out] p Charm++'s PUP::er serializer object reference - void pup( PUP::er &p ) override { - p | m_firstchunk; - } - //! \brief Pack/Unpack serialize operator| - //! \param[in,out] p Charm++'s PUP::er serializer object reference - //! \param[in,out] i TransferDetails object reference - friend void operator|( PUP::er& p, TransferDetails& i ) { i.pup(p); } - //@} - private: //! The ID of my first chunk (used for collision detection library) int m_firstchunk; diff --git a/src/Transfer/m2mtransfer.ci b/src/Transfer/m2mtransfer.ci index b9f9b4cee13..24a821ec4f1 100644 --- a/src/Transfer/m2mtransfer.ci +++ b/src/Transfer/m2mtransfer.ci @@ -9,7 +9,7 @@ module m2mtransfer { class MeshData; - mainchare LibMain { + mainchare [migratable] LibMain { entry LibMain(CkArgMsg* msg); };