-
Notifications
You must be signed in to change notification settings - Fork 189
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Allocating multiple blocks to one mpi rank in LB #5026
base: python
Are you sure you want to change the base?
Changes from all commits
e76ecf5
067d3fa
6392e3c
9e7f3c9
e3ee829
0135af7
0793276
0c33a15
d40edca
75e9e17
e8d0b1e
281abc2
a55c6bf
cb1561c
42a24e7
e26d439
a91eaf5
a509615
2d221c1
6091226
a6bac85
1b0e7c1
9d9bd13
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -96,7 +96,7 @@ class BoundaryPackInfo : public PackInfo<GhostLayerField_T> { | |
WALBERLA_ASSERT_EQUAL(bSize, buf_size); | ||
#endif | ||
|
||
auto const offset = std::get<0>(m_lattice->get_local_grid_range()); | ||
auto const offset = to_vector3i(receiver->getAABB().min()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Wouldn't it be better to have functions for this in the Lattice class, so they can be used by EK as well? |
||
typename Boundary_T::value_type value; | ||
for (auto it = begin(flag_field); it != flag_field->end(); ++it) { | ||
if (isFlagSet(it, boundary_flag)) { | ||
|
@@ -133,7 +133,7 @@ class BoundaryPackInfo : public PackInfo<GhostLayerField_T> { | |
<< buf_size; | ||
#endif | ||
|
||
auto const offset = std::get<0>(m_lattice->get_local_grid_range()); | ||
auto const offset = to_vector3i(sender->getAABB().min()); | ||
for (auto it = begin(flag_field); it != flag_field->end(); ++it) { | ||
if (isFlagSet(it, boundary_flag)) { | ||
auto const node = offset + Utils::Vector3i{{it.x(), it.y(), it.z()}}; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -40,6 +40,7 @@ | |
|
||
LatticeWalberla::LatticeWalberla(Utils::Vector3i const &grid_dimensions, | ||
Utils::Vector3i const &node_grid, | ||
Utils::Vector3i const &block_grid, | ||
unsigned int n_ghost_layers) | ||
: m_grid_dimensions{grid_dimensions}, m_n_ghost_layers{n_ghost_layers} { | ||
using walberla::real_t; | ||
|
@@ -50,21 +51,28 @@ LatticeWalberla::LatticeWalberla(Utils::Vector3i const &grid_dimensions, | |
throw std::runtime_error( | ||
"Lattice grid dimensions and MPI node grid are not compatible."); | ||
} | ||
if (m_grid_dimensions[i] % block_grid[i] != 0) { | ||
throw std::runtime_error( | ||
"Lattice grid dimensions and block grid are not compatible."); | ||
} | ||
} | ||
|
||
auto constexpr lattice_constant = real_t{1}; | ||
auto const cells_block = Utils::hadamard_division(grid_dimensions, node_grid); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. cells_per_block? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I changed cells_block to cells_per_block. |
||
auto const cells_per_block = | ||
Utils::hadamard_division(grid_dimensions, block_grid); | ||
|
||
m_blocks = walberla::blockforest::createUniformBlockGrid( | ||
// number of blocks in each direction | ||
uint_c(node_grid[0]), uint_c(node_grid[1]), uint_c(node_grid[2]), | ||
uint_c(block_grid[0]), uint_c(block_grid[1]), uint_c(block_grid[2]), | ||
// number of cells per block in each direction | ||
uint_c(cells_block[0]), uint_c(cells_block[1]), uint_c(cells_block[2]), | ||
lattice_constant, | ||
uint_c(cells_per_block[0]), uint_c(cells_per_block[1]), | ||
uint_c(cells_per_block[2]), lattice_constant, | ||
// number of cpus per direction | ||
uint_c(node_grid[0]), uint_c(node_grid[1]), uint_c(node_grid[2]), | ||
// periodicity | ||
true, true, true); | ||
true, true, true, | ||
// keep global block information | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What does this do/mean? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If "keep global block information" is true, each process keeps information about remote blocks that reside on other processes. |
||
false); | ||
for (IBlock &block : *m_blocks) { | ||
m_cached_blocks.push_back(&block); | ||
} | ||
|
@@ -73,11 +81,19 @@ LatticeWalberla::LatticeWalberla(Utils::Vector3i const &grid_dimensions, | |
[[nodiscard]] std::pair<Utils::Vector3d, Utils::Vector3d> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are we surr that there is no funciot in block forest or uniform blcok forest which supplies the answer to this?
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In my understanding, a BlockForest knows the domain size for the whole space, and blocks know their domain range for the entire space. Hence, there is no class that knows the domain range in 1 CPU. Then, I rewrote the code to make it compact. |
||
LatticeWalberla::get_local_domain() const { | ||
using walberla::to_vector3d; | ||
// We only have one block per mpi rank | ||
assert(++(m_blocks->begin()) == m_blocks->end()); | ||
|
||
auto const ab = m_blocks->begin()->getAABB(); | ||
return {to_vector3d(ab.min()), to_vector3d(ab.max())}; | ||
// Get upper and lower corner of BlockForest assigned to a mpi rank. | ||
// Since we can allocate multiple blocks per mpi rank, | ||
// the corners of all Blocks are compared. | ||
auto aa = to_vector3d(m_blocks->begin()->getAABB().min()); | ||
auto bb = to_vector3d(m_blocks->begin()->getAABB().max()); | ||
for (auto b = m_blocks->begin(); b != m_blocks->end(); ++b) { | ||
auto cc = b->getAABB(); | ||
for (auto const i : {0u, 1u, 2u}) { | ||
aa[i] = std::min(aa[i], cc.min()[i]); | ||
bb[i] = std::max(bb[i], cc.max()[i]); | ||
} | ||
} | ||
return {aa, bb}; | ||
} | ||
|
||
[[nodiscard]] bool | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
"Using more than one block per MPI rank is not supported for GPU LB" (but why, actually?)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
how about "GPU LB only uses 1 block per MPI rank"?