Skip to content

Commit a55c6bf

Browse files
committed
Responding to Reviews
1 parent 281abc2 commit a55c6bf

File tree

11 files changed

+457
-479
lines changed

11 files changed

+457
-479
lines changed

maintainer/benchmarks/lb.py

Lines changed: 10 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -50,18 +50,9 @@
5050
parser.add_argument("--output", metavar="FILEPATH", action="store",
5151
type=str, required=False, default="benchmarks.csv",
5252
help="Output file (default: benchmarks.csv)")
53-
parser.add_argument("--divided_block", action="store",
54-
type=int, default=1, required=False,
55-
help="blocks^(1/3) per mpi rank")
56-
parser.add_argument("--divided_block_x", action="store",
57-
type=int, default=0, required=False,
58-
help="The number of divided blocks for x direction")
59-
parser.add_argument("--divided_block_y", action="store",
60-
type=int, default=0, required=False,
61-
help="The number of divided blocks for x direction")
62-
parser.add_argument("--divided_block_z", action="store",
63-
type=int, default=0, required=False,
64-
help="The number of divided blocks for x direction")
53+
parser.add_argument("--blocks_per_mpi_rank", action="store", nargs=3,
54+
type=int, default=[1, 1, 1], required=False,
55+
help="blocks per mpi rank")
6556

6657
args = parser.parse_args()
6758

@@ -97,10 +88,7 @@
9788
n_proc = system.cell_system.get_state()["n_nodes"]
9889
n_part = n_proc * args.particles_per_core
9990
if n_part == 0:
100-
if len(args.box_l) == 1:
101-
box_l = 3 * args.box_l
102-
elif len(args.box_l) == 3:
103-
box_l = args.box_l
91+
box_l = 3 * args.box_l if len(args.box_l) == 1 else args.box_l
10492
agrid = 1.
10593
lb_grid = box_l
10694
measurement_steps = 80
@@ -116,21 +104,15 @@
116104
lb_grid = 3 * [lb_grid]
117105
box_l = 3 * [box_l]
118106

119-
divided_block_x = args.divided_block_x
120-
divided_block_y = args.divided_block_y
121-
divided_block_z = args.divided_block_z
122-
if divided_block_x != 0 and divided_block_y != 0 and divided_block_z != 0:
123-
blocks_per_mpi_rank = [divided_block_x,
124-
divided_block_y, divided_block_z]
125-
else:
126-
divided_block = args.divided_block
127-
blocks_per_mpi_rank = [divided_block] * 3
107+
print(f"box length: {box_l}")
108+
print(f"LB shape: {lb_grid}")
109+
print(f"LB agrid: {agrid:.3f}")
110+
111+
blocks_per_mpi_rank = args.blocks_per_mpi_rank
128112

129113
# System
130114
#############################################################
131-
system.box_l = box_l * system.cell_system.node_grid
132-
print(f"LB agrid: {agrid:.3f}")
133-
print("LB shape", system.box_l)
115+
system.box_l = box_l
134116

135117
# Integration parameters
136118
#############################################################
Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
#
2+
# Copyright (C) 2013-2022 The ESPResSo project
3+
#
4+
# This file is part of ESPResSo.
5+
#
6+
# ESPResSo is free software: you can redistribute it and/or modify
7+
# it under the terms of the GNU General Public License as published by
8+
# the Free Software Foundation, either version 3 of the License, or
9+
# (at your option) any later version.
10+
#
11+
# ESPResSo is distributed in the hope that it will be useful,
12+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14+
# GNU General Public License for more details.
15+
#
16+
# You should have received a copy of the GNU General Public License
17+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
18+
#
19+
20+
"""
21+
Benchmark Lattice-Boltzmann fluid + Lennard-Jones particles.
22+
"""
23+
import espressomd
24+
import espressomd.lb
25+
import benchmarks
26+
import numpy as np
27+
import argparse
28+
29+
parser = argparse.ArgumentParser(description="Benchmark LB simulations. "
30+
"Save the results to a CSV file.")
31+
parser.add_argument("--particles_per_core", metavar="N", action="store",
32+
type=int, default=125, required=False,
33+
help="Number of particles per core")
34+
parser.add_argument("--box_l", action="store", nargs="+",
35+
type=int, default=argparse.SUPPRESS, required=False,
36+
help="Box length (cubic box)")
37+
parser.add_argument("--lb_sites_per_particle", metavar="N_LB", action="store",
38+
type=float, default=28, required=False,
39+
help="Number of LB sites per particle")
40+
parser.add_argument("--volume_fraction", metavar="FRAC", action="store",
41+
type=float, default=0.03, required=False,
42+
help="Fraction of the simulation box volume occupied by "
43+
"particles (range: [0.01-0.74], default: 0.03)")
44+
parser.add_argument("--single_precision", action="store_true", required=False,
45+
help="Using single-precision floating point accuracy")
46+
parser.add_argument("--gpu", action=argparse.BooleanOptionalAction,
47+
default=False, required=False, help="Use GPU implementation")
48+
parser.add_argument("--multi-gpu", action=argparse.BooleanOptionalAction,
49+
default=False, required=False, help="Use multi-GPU implementation")
50+
parser.add_argument("--output", metavar="FILEPATH", action="store",
51+
type=str, required=False, default="benchmarks.csv",
52+
help="Output file (default: benchmarks.csv)")
53+
parser.add_argument("--blocks_per_mpi_rank", action="store", nargs=3,
54+
type=int, default=[1, 1, 1], required=False,
55+
help="blocks per mpi rank")
56+
57+
args = parser.parse_args()
58+
59+
# process and check arguments
60+
n_iterations = 30
61+
assert args.volume_fraction > 0, "--volume_fraction must be a positive number"
62+
assert args.volume_fraction < np.pi / (3 * np.sqrt(2)), \
63+
"--volume_fraction exceeds the physical limit of sphere packing (~0.74)"
64+
assert "box_l" not in args or args.particles_per_core == 0, \
65+
"Argument --box_l requires --particles_per_core=0"
66+
67+
required_features = ["LENNARD_JONES", "WALBERLA"]
68+
if args.gpu:
69+
required_features.append("CUDA")
70+
espressomd.assert_features(required_features)
71+
72+
# make simulation deterministic
73+
np.random.seed(42)
74+
75+
# System
76+
#############################################################
77+
system = espressomd.System(box_l=[1, 1, 1])
78+
79+
# Interaction parameters (Lennard-Jones)
80+
#############################################################
81+
82+
lj_eps = 1.0 # LJ epsilon
83+
lj_sig = 1.0 # particle diameter
84+
lj_cut = lj_sig * 2**(1. / 6.) # cutoff distance
85+
86+
# System parameters
87+
#############################################################
88+
n_proc = system.cell_system.get_state()["n_nodes"]
89+
n_part = n_proc * args.particles_per_core
90+
if n_part == 0:
91+
box_l = 3 * args.box_l if len(args.box_l) == 1 else args.box_l
92+
agrid = 1.
93+
lb_grid = box_l
94+
measurement_steps = 80
95+
else:
96+
# volume of N spheres with radius r: N * (4/3*pi*r^3)
97+
box_l = (n_part * 4. / 3. * np.pi * (lj_sig / 2.)**3
98+
/ args.volume_fraction)**(1. / 3.)
99+
lb_grid = (n_part * args.lb_sites_per_particle)**(1. / 3.)
100+
lb_grid = int(2. * round(lb_grid / 2.))
101+
agrid = box_l / lb_grid
102+
measurement_steps = max(50, int(120**3 / lb_grid**3))
103+
measurement_steps = 40
104+
lb_grid = 3 * [lb_grid]
105+
box_l = 3 * [box_l]
106+
107+
blocks_per_mpi_rank = args.blocks_per_mpi_rank
108+
109+
# System
110+
#############################################################
111+
system.box_l = box_l * system.cell_system.node_grid
112+
print(f"box length: {system.box_l}")
113+
print(f"LB shape: {lb_grid}")
114+
print(f"LB agrid: {agrid:.3f}")
115+
116+
# Integration parameters
117+
#############################################################
118+
system.time_step = 0.01
119+
system.cell_system.skin = 0.5
120+
121+
# Interaction and particle setup
122+
#############################################################
123+
if n_part:
124+
system.non_bonded_inter[0, 0].lennard_jones.set_params(
125+
epsilon=lj_eps, sigma=lj_sig, cutoff=lj_cut, shift="auto")
126+
system.part.add(pos=np.random.random((n_part, 3)) * system.box_l)
127+
benchmarks.minimize(system, n_part / 2.)
128+
system.integrator.set_vv()
129+
system.thermostat.set_langevin(kT=1.0, gamma=1.0, seed=42)
130+
131+
# tuning and equilibration
132+
min_skin = 0.2
133+
max_skin = 1.0
134+
print("Tune skin: {:.3f}".format(system.cell_system.tune_skin(
135+
min_skin=min_skin, max_skin=max_skin, tol=0.05, int_steps=100)))
136+
print("Equilibration")
137+
system.integrator.run(500)
138+
print("Tune skin: {:.3f}".format(system.cell_system.tune_skin(
139+
min_skin=min_skin, max_skin=max_skin, tol=0.05, int_steps=100)))
140+
print("Equilibration")
141+
system.integrator.run(500)
142+
system.thermostat.turn_off()
143+
144+
# LB fluid setup
145+
#############################################################
146+
lb_class = espressomd.lb.LBFluidWalberla
147+
if args.gpu or args.multi_gpu:
148+
lb_class = espressomd.lb.LBFluidWalberlaGPU
149+
if args.multi_gpu:
150+
system.cuda_init_handle.call_method("set_device_id_per_rank")
151+
lbf = lb_class(agrid=agrid, tau=system.time_step, kinematic_viscosity=1.,
152+
density=1., single_precision=args.single_precision, blocks_per_mpi_rank=blocks_per_mpi_rank)
153+
system.lb = lbf
154+
if n_part:
155+
system.thermostat.set_lb(LB_fluid=lbf, gamma=1., seed=42)
156+
157+
158+
# time integration loop
159+
timings = benchmarks.get_timings(system, measurement_steps, n_iterations)
160+
161+
# average time
162+
avg, ci = benchmarks.get_average_time(timings)
163+
print(f"average: {1000 * avg:.2f} +/- {1000 * ci:.2f} ms (95% C.I.)")
164+
165+
# write report
166+
benchmarks.write_report(args.output, n_proc, timings, measurement_steps)

src/script_interface/walberla/LBFluid.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ void LBFluidGPU::make_instance(VariantMap const &params) {
143143
params, "blocks_per_mpi_rank", Utils::Vector3i{{1, 1, 1}});
144144
if (blocks_per_mpi_rank != Utils::Vector3i{{1, 1, 1}}) {
145145
throw std::runtime_error(
146-
"GPU architecture PROHIBITED allocating many blocks to 1 CPU.");
146+
"Using more than one block per MPI rank is not supported for GPU LB");
147147
}
148148
auto const lb_lattice = m_lattice->lattice();
149149
auto const lb_visc = m_conv_visc * visc;

src/walberla_bridge/src/LatticeWalberla.cpp

Lines changed: 8 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -58,15 +58,15 @@ LatticeWalberla::LatticeWalberla(Utils::Vector3i const &grid_dimensions,
5858
}
5959

6060
auto constexpr lattice_constant = real_t{1};
61-
auto const cells_block =
61+
auto const cells_per_block =
6262
Utils::hadamard_division(grid_dimensions, block_grid);
6363

6464
m_blocks = walberla::blockforest::createUniformBlockGrid(
6565
// number of blocks in each direction
6666
uint_c(block_grid[0]), uint_c(block_grid[1]), uint_c(block_grid[2]),
6767
// number of cells per block in each direction
68-
uint_c(cells_block[0]), uint_c(cells_block[1]), uint_c(cells_block[2]),
69-
lattice_constant,
68+
uint_c(cells_per_block[0]), uint_c(cells_per_block[1]),
69+
uint_c(cells_per_block[2]), lattice_constant,
7070
// number of cpus per direction
7171
uint_c(node_grid[0]), uint_c(node_grid[1]), uint_c(node_grid[2]),
7272
// periodicity
@@ -84,41 +84,16 @@ LatticeWalberla::get_local_domain() const {
8484
// Get upper and lower corner of BlockForest assigned to a mpi rank.
8585
// Since we can allocate multiple blocks per mpi rank,
8686
// the corners of all Blocks are compared.
87-
int64_t const stride_y = m_grid_dimensions[2];
88-
int64_t const stride_x = m_grid_dimensions[1] * stride_y;
89-
auto aa = m_blocks->begin()->getAABB();
90-
auto bb = m_blocks->begin()->getAABB();
91-
int64_t aa_index = stride_x * static_cast<int>(aa.min()[0]) +
92-
stride_y * static_cast<int>(aa.min()[1]) +
93-
static_cast<int>(aa.min()[2]);
94-
int64_t bb_index = stride_x * static_cast<int>(bb.max()[0]) +
95-
stride_y * static_cast<int>(bb.max()[1]) +
96-
static_cast<int>(bb.max()[2]);
87+
auto aa = to_vector3d(m_blocks->begin()->getAABB().min());
88+
auto bb = to_vector3d(m_blocks->begin()->getAABB().max());
9789
for (auto b = m_blocks->begin(); b != m_blocks->end(); ++b) {
9890
auto cc = b->getAABB();
9991
for (auto const i : {0u, 1u, 2u}) {
100-
if ((cc.max()[i] - cc.min()[i]) != 0) {
101-
assert(m_grid_dimensions[i] %
102-
static_cast<int>(cc.max()[i] - cc.min()[i]) ==
103-
0);
104-
}
105-
}
106-
int64_t min_index = stride_x * static_cast<int>(cc.min()[0]) +
107-
stride_y * static_cast<int>(cc.min()[1]) +
108-
static_cast<int>(cc.min()[2]);
109-
int64_t max_index = stride_x * static_cast<int>(cc.max()[0]) +
110-
stride_y * static_cast<int>(cc.max()[1]) +
111-
static_cast<int>(cc.max()[2]);
112-
if (min_index < aa_index) {
113-
aa = cc;
114-
aa_index = min_index;
115-
}
116-
if (max_index > bb_index) {
117-
bb = cc;
118-
bb_index = max_index;
92+
aa[i] = std::min(aa[i], cc.min()[i]);
93+
bb[i] = std::max(bb[i], cc.max()[i]);
11994
}
12095
}
121-
return {to_vector3d(aa.min()), to_vector3d(bb.max())};
96+
return {aa, bb};
12297
}
12398

12499
[[nodiscard]] bool

0 commit comments

Comments
 (0)