Skip to content

Commit 7ad0534

Browse files
Remove MPI static globals (#4858)
Fixes #4856 Description of changes: - fix multiple bugs caused by undefined behavior due to the static initialization order of MPI global objects - ESPResSo is now compatible with Boost 1.84+
2 parents 2e7b11b + 5776a20 commit 7ad0534

33 files changed

+208
-112
lines changed

.github/workflows/push_pull.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ permissions:
1010
jobs:
1111
macos:
1212
runs-on: macos-12
13-
if: false
13+
if: ${{ github.repository == 'espressomd/espresso' }}
1414
steps:
1515
- name: Checkout
1616
uses: actions/checkout@main

CMakeLists.txt

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -435,9 +435,6 @@ if(ESPRESSO_BUILD_TESTS)
435435
endif()
436436

437437
find_package(Boost 1.74.0 REQUIRED ${BOOST_COMPONENTS})
438-
if(${Boost_VERSION} VERSION_GREATER_EQUAL 1.84.0)
439-
message(FATAL_ERROR "Boost version ${Boost_VERSION} is unsupported.")
440-
endif()
441438

442439
#
443440
# Paths

src/core/MpiCallbacks.hpp

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141

4242
#include <boost/mpi/collectives/broadcast.hpp>
4343
#include <boost/mpi/communicator.hpp>
44+
#include <boost/mpi/environment.hpp>
4445
#include <boost/mpi/packed_iarchive.hpp>
4546
#include <boost/range/algorithm/remove_if.hpp>
4647

@@ -201,8 +202,8 @@ class MpiCallbacks {
201202
template <typename F, class = std::enable_if_t<std::is_same_v<
202203
typename detail::functor_types<F>::argument_types,
203204
std::tuple<Args...>>>>
204-
CallbackHandle(MpiCallbacks *cb, F &&f)
205-
: m_id(cb->add(std::forward<F>(f))), m_cb(cb) {}
205+
CallbackHandle(std::shared_ptr<MpiCallbacks> cb, F &&f)
206+
: m_id(cb->add(std::forward<F>(f))), m_cb(std::move(cb)) {}
206207

207208
CallbackHandle(CallbackHandle const &) = delete;
208209
CallbackHandle(CallbackHandle &&rhs) noexcept = default;
@@ -211,7 +212,7 @@ class MpiCallbacks {
211212

212213
private:
213214
int m_id;
214-
MpiCallbacks *m_cb;
215+
std::shared_ptr<MpiCallbacks> m_cb;
215216

216217
public:
217218
/**
@@ -237,7 +238,6 @@ class MpiCallbacks {
237238
m_cb->remove(m_id);
238239
}
239240

240-
MpiCallbacks *cb() const { return m_cb; }
241241
int id() const { return m_id; }
242242
};
243243

@@ -255,9 +255,9 @@ class MpiCallbacks {
255255
}
256256

257257
public:
258-
explicit MpiCallbacks(boost::mpi::communicator comm,
259-
bool abort_on_exit = true)
260-
: m_abort_on_exit(abort_on_exit), m_comm(std::move(comm)) {
258+
MpiCallbacks(boost::mpi::communicator comm,
259+
std::shared_ptr<boost::mpi::environment> mpi_env)
260+
: m_comm(std::move(comm)), m_mpi_env(std::move(mpi_env)) {
261261
/* Add a dummy at id 0 for loop abort. */
262262
m_callback_map.add(nullptr);
263263

@@ -268,7 +268,7 @@ class MpiCallbacks {
268268

269269
~MpiCallbacks() {
270270
/* Release the clients on exit */
271-
if (m_abort_on_exit && (m_comm.rank() == 0)) {
271+
if (m_comm.rank() == 0) {
272272
try {
273273
abort_loop();
274274
} catch (...) {
@@ -447,22 +447,25 @@ class MpiCallbacks {
447447
*/
448448
boost::mpi::communicator const &comm() const { return m_comm; }
449449

450+
std::shared_ptr<boost::mpi::environment> share_mpi_env() const {
451+
return m_mpi_env;
452+
}
453+
450454
private:
451455
/**
452456
* @brief Id for the @ref abort_loop. Has to be 0.
453457
*/
454-
enum { LOOP_ABORT = 0 };
458+
static constexpr int LOOP_ABORT = 0;
455459

456460
/**
457-
* @brief If @ref abort_loop should be called on destruction
458-
* on the head node.
461+
* The MPI communicator used for the callbacks.
459462
*/
460-
bool m_abort_on_exit;
463+
boost::mpi::communicator m_comm;
461464

462465
/**
463-
* The MPI communicator used for the callbacks.
466+
* The MPI environment used for the callbacks.
464467
*/
465-
boost::mpi::communicator m_comm;
468+
std::shared_ptr<boost::mpi::environment> m_mpi_env;
466469

467470
/**
468471
* Internal storage for the callback functions.

src/core/communication.cpp

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535

3636
#include <boost/mpi.hpp>
3737
#include <boost/mpi/communicator.hpp>
38+
#include <boost/mpi/environment.hpp>
3839

3940
#include <mpi.h>
4041

@@ -47,17 +48,20 @@ boost::mpi::communicator comm_cart;
4748
Communicator communicator{};
4849

4950
namespace Communication {
50-
static auto const &mpi_datatype_cache =
51-
boost::mpi::detail::mpi_datatype_cache();
52-
static std::shared_ptr<boost::mpi::environment> mpi_env;
53-
static std::unique_ptr<MpiCallbacks> m_callbacks;
51+
static std::shared_ptr<MpiCallbacks> m_callbacks;
5452

5553
/* We use a singleton callback class for now. */
5654
MpiCallbacks &mpiCallbacks() {
5755
assert(m_callbacks && "Mpi not initialized!");
5856

5957
return *m_callbacks;
6058
}
59+
60+
std::shared_ptr<MpiCallbacks> mpiCallbacksHandle() {
61+
assert(m_callbacks && "Mpi not initialized!");
62+
63+
return m_callbacks;
64+
}
6165
} // namespace Communication
6266

6367
using Communication::mpiCallbacks;
@@ -66,14 +70,12 @@ int this_node = -1;
6670

6771
namespace Communication {
6872
void init(std::shared_ptr<boost::mpi::environment> mpi_env) {
69-
Communication::mpi_env = std::move(mpi_env);
70-
7173
communicator.full_initialization();
7274

7375
Communication::m_callbacks =
74-
std::make_unique<Communication::MpiCallbacks>(comm_cart);
76+
std::make_shared<Communication::MpiCallbacks>(comm_cart, mpi_env);
7577

76-
ErrorHandling::init_error_handling(mpiCallbacks());
78+
ErrorHandling::init_error_handling(Communication::m_callbacks);
7779

7880
#ifdef WALBERLA
7981
walberla::mpi_init();
@@ -83,6 +85,8 @@ void init(std::shared_ptr<boost::mpi::environment> mpi_env) {
8385
cuda_on_program_start();
8486
#endif
8587
}
88+
89+
void deinit() { Communication::m_callbacks.reset(); }
8690
} // namespace Communication
8791

8892
Communicator::Communicator()

src/core/communication.hpp

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ namespace Communication {
8383
* @brief Returns a reference to the global callback class instance.
8484
*/
8585
MpiCallbacks &mpiCallbacks();
86+
std::shared_ptr<MpiCallbacks> mpiCallbacksHandle();
8687
} // namespace Communication
8788

8889
/**************************************************
@@ -124,12 +125,18 @@ namespace Communication {
124125
/**
125126
* @brief Init globals for communication.
126127
*
127-
* and calls @ref cuda_on_program_start. Keeps a copy of
128-
* the pointer to the mpi environment to keep it alive
129-
* while the program is loaded.
130-
*
131128
* @param mpi_env MPI environment that should be used
132129
*/
133130
void init(std::shared_ptr<boost::mpi::environment> mpi_env);
131+
void deinit();
134132
} // namespace Communication
133+
134+
struct MpiContainerUnitTest {
135+
std::shared_ptr<boost::mpi::environment> m_mpi_env;
136+
MpiContainerUnitTest(int argc, char **argv) {
137+
m_mpi_env = mpi_init(argc, argv);
138+
Communication::init(m_mpi_env);
139+
}
140+
~MpiContainerUnitTest() { Communication::deinit(); }
141+
};
135142
#endif

src/core/errorhandling.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
#include <functional>
3535
#include <memory>
3636
#include <string>
37+
#include <utility>
3738
#include <vector>
3839

3940
namespace ErrorHandling {
@@ -44,13 +45,13 @@ namespace ErrorHandling {
4445
static std::unique_ptr<RuntimeErrorCollector> runtimeErrorCollector;
4546

4647
/** The callback loop we are on. */
47-
static Communication::MpiCallbacks *m_callbacks = nullptr;
48+
static std::weak_ptr<Communication::MpiCallbacks> m_callbacks;
4849

49-
void init_error_handling(Communication::MpiCallbacks &cb) {
50-
m_callbacks = &cb;
50+
void init_error_handling(std::weak_ptr<Communication::MpiCallbacks> callbacks) {
51+
m_callbacks = std::move(callbacks);
5152

5253
runtimeErrorCollector =
53-
std::make_unique<RuntimeErrorCollector>(m_callbacks->comm());
54+
std::make_unique<RuntimeErrorCollector>(m_callbacks.lock()->comm());
5455
}
5556

5657
RuntimeErrorStream _runtimeMessageStream(RuntimeError::ErrorLevel level,
@@ -67,7 +68,7 @@ static void mpi_gather_runtime_errors_local() {
6768
REGISTER_CALLBACK(mpi_gather_runtime_errors_local)
6869

6970
std::vector<RuntimeError> mpi_gather_runtime_errors() {
70-
m_callbacks->call(mpi_gather_runtime_errors_local);
71+
m_callbacks.lock()->call(mpi_gather_runtime_errors_local);
7172
return runtimeErrorCollector->gather();
7273
}
7374

@@ -81,7 +82,7 @@ std::vector<RuntimeError> mpi_gather_runtime_errors_all(bool is_head_node) {
8182
} // namespace ErrorHandling
8283

8384
void errexit() {
84-
ErrorHandling::m_callbacks->comm().abort(1);
85+
ErrorHandling::m_callbacks.lock()->comm().abort(1);
8586

8687
std::abort();
8788
}

src/core/errorhandling.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include "error_handling/RuntimeError.hpp"
3232
#include "error_handling/RuntimeErrorStream.hpp"
3333

34+
#include <memory>
3435
#include <string>
3536
#include <vector>
3637

@@ -85,7 +86,7 @@ namespace ErrorHandling {
8586
*
8687
* @param callbacks Callbacks system the error handler should be on.
8788
*/
88-
void init_error_handling(Communication::MpiCallbacks &callbacks);
89+
void init_error_handling(std::weak_ptr<Communication::MpiCallbacks> callbacks);
8990

9091
RuntimeErrorStream _runtimeMessageStream(RuntimeError::ErrorLevel level,
9192
const std::string &file, int line,

src/core/reaction_methods/tests/ReactionAlgorithm_test.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,7 @@ BOOST_FIXTURE_TEST_CASE(ReactionAlgorithm_test, ParticleFactory) {
330330
}
331331

332332
int main(int argc, char **argv) {
333-
mpi_init_stand_alone(argc, argv);
333+
auto const mpi_handle = MpiContainerUnitTest(argc, argv);
334334
espresso::system = System::System::create();
335335
espresso::system->set_cell_structure_topology(CellStructureType::REGULAR);
336336
::System::set_system(espresso::system);

src/core/system/System.cpp

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -463,10 +463,3 @@ unsigned System::get_global_ghost_flags() const {
463463
}
464464

465465
} // namespace System
466-
467-
void mpi_init_stand_alone(int argc, char **argv) {
468-
auto mpi_env = mpi_init(argc, argv);
469-
470-
// initialize the MpiCallbacks framework
471-
Communication::init(mpi_env);
472-
}

src/core/system/System.hpp

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -308,10 +308,3 @@ void reset_system();
308308
bool is_system_set();
309309

310310
} // namespace System
311-
312-
/**
313-
* @brief Initialize MPI global state to run ESPResSo in stand-alone mode.
314-
* Use this function in simulations written in C++, such as unit tests.
315-
* The script interface has its own MPI initialization mechanism.
316-
*/
317-
void mpi_init_stand_alone(int argc, char **argv);

src/core/unit_tests/EspressoSystemStandAlone_test.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -483,7 +483,7 @@ BOOST_FIXTURE_TEST_CASE(espresso_system_stand_alone, ParticleFactory) {
483483
}
484484

485485
int main(int argc, char **argv) {
486-
mpi_init_stand_alone(argc, argv);
486+
auto const mpi_handle = MpiContainerUnitTest(argc, argv);
487487
espresso::system = System::System::create();
488488
espresso::system->set_cell_structure_topology(CellStructureType::REGULAR);
489489
::System::set_system(espresso::system);

src/core/unit_tests/EspressoSystem_test.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ BOOST_FIXTURE_TEST_CASE(check_with_gpu, ParticleFactory,
151151
}
152152

153153
int main(int argc, char **argv) {
154-
mpi_init_stand_alone(argc, argv);
154+
auto const mpi_handle = MpiContainerUnitTest(argc, argv);
155155

156156
return boost::unit_test::unit_test_main(init_unit_test, argc, argv);
157157
}

0 commit comments

Comments
 (0)