Skip to content

Commit 78afc1c

Browse files
authored
Merge pull request #246 from alexandermote/dev
Non-uniform processor allocation in domain-decomposed simulations
2 parents 847d9d6 + 8327ef3 commit 78afc1c

File tree

15 files changed

+465
-70
lines changed

15 files changed

+465
-70
lines changed

mcdc/global_.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,10 +84,10 @@ def reset(self):
8484
"ww_mesh": make_card_mesh(),
8585
"domain_decomposition": False,
8686
"dd_idx": 0,
87+
"dd_local_rank": 0,
8788
"dd_mesh": make_card_mesh(),
8889
"dd_exchange_rate": 0,
8990
"dd_exchange_rate_padding": 0,
90-
"dd_repro": False,
9191
"dd_work_ratio": np.array([1]),
9292
"weight_roulette": False,
9393
"wr_threshold": 0.0,

mcdc/input_.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1320,7 +1320,6 @@ def domain_decomposition(
13201320
exchange_rate=100000,
13211321
exchange_rate_padding=None,
13221322
work_ratio=None,
1323-
repro=True,
13241323
):
13251324
"""
13261325
Activate domain decomposition.
@@ -1347,7 +1346,6 @@ def domain_decomposition(
13471346
card["domain_decomposition"] = True
13481347
card["dd_exchange_rate"] = int(exchange_rate)
13491348
card["dd_exchange_rate_padding"] = exchange_rate_padding
1350-
card["dd_repro"] = repro
13511349
dom_num = 1
13521350
# Set mesh
13531351
if x is not None:
@@ -1359,11 +1357,8 @@ def domain_decomposition(
13591357
if z is not None:
13601358
card["dd_mesh"]["z"] = z
13611359
dom_num += len(z)
1362-
# Set work ratio
1363-
if work_ratio is None:
1364-
card["dd_work_ratio"] = None
1365-
elif work_ratio is not None:
1366-
card["dd_work_ratio"] = work_ratio
1360+
1361+
card["dd_work_ratio"] = work_ratio
13671362
card["dd_idx"] = 0
13681363
card["dd_xp_neigh"] = []
13691364
card["dd_xn_neigh"] = []

mcdc/kernel.py

Lines changed: 90 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -702,12 +702,8 @@ def source_particle_dd(seed, mcdc):
702702
def distribute_work_dd(N, mcdc, precursor=False):
703703
# Total # of work
704704
work_size_total = N
705-
706-
if not mcdc["technique"]["dd_repro"]:
707-
work_size, work_start = domain_work(mcdc, mcdc["dd_idx"], N)
708-
else:
709-
work_start = 0
710-
work_size = work_size_total
705+
work_start = 0
706+
work_size = work_size_total
711707

712708
if not precursor:
713709
mcdc["mpi_work_start"] = work_start
@@ -2145,6 +2141,38 @@ def calculate_distance_in_coarse_bin(start, end, distance, center, cs_bin_size):
21452141
return distance_inside
21462142

21472143

2144+
@njit
2145+
def dd_reduce(data, mcdc):
2146+
tally_bin = data[TALLY]
2147+
2148+
# find number of subdomains
2149+
d_Nx = mcdc["technique"]["dd_mesh"]["x"].size - 1
2150+
d_Ny = mcdc["technique"]["dd_mesh"]["y"].size - 1
2151+
d_Nz = mcdc["technique"]["dd_mesh"]["z"].size - 1
2152+
2153+
with objmode():
2154+
# assign processors to their subdomain group
2155+
i = 0
2156+
for n in range(d_Nx * d_Ny * d_Nz):
2157+
dd_group = []
2158+
for r in range(int(mcdc["technique"]["dd_work_ratio"][n])):
2159+
dd_group.append(i)
2160+
i += 1
2161+
# create MPI Comm group out of subdomain processors
2162+
dd_group = MPI.COMM_WORLD.group.Incl(dd_group)
2163+
dd_comm = MPI.COMM_WORLD.Create(dd_group)
2164+
# MPI Reduce on subdomain processors
2165+
buff = np.zeros_like(tally_bin[TALLY_SCORE])
2166+
if MPI.COMM_NULL != dd_comm:
2167+
dd_comm.Reduce(tally_bin[TALLY_SCORE], buff, MPI.SUM, 0)
2168+
if mcdc["dd_idx"] == n:
2169+
tally_bin[TALLY_SCORE][:] = buff
2170+
# free comm group
2171+
dd_group.Free()
2172+
if MPI.COMM_NULL != dd_comm:
2173+
dd_comm.Free()
2174+
2175+
21482176
@njit
21492177
def tally_reduce(data, mcdc):
21502178
tally_bin = data[TALLY]
@@ -2162,6 +2190,16 @@ def tally_reduce(data, mcdc):
21622190
MPI.COMM_WORLD.Reduce(tally_bin[TALLY_SCORE], buff, MPI.SUM, 0)
21632191
tally_bin[TALLY_SCORE][:] = buff
21642192

2193+
else:
2194+
# find number of subdomains
2195+
N_dd = 1
2196+
N_dd *= mcdc["technique"]["dd_mesh"]["x"].size - 1
2197+
N_dd *= mcdc["technique"]["dd_mesh"]["y"].size - 1
2198+
N_dd *= mcdc["technique"]["dd_mesh"]["z"].size - 1
2199+
# DD Reduce if multiple processors per subdomain
2200+
if N_dd != mcdc["mpi_size"]:
2201+
dd_reduce(data, mcdc)
2202+
21652203

21662204
@njit
21672205
def tally_accumulate(data, mcdc):
@@ -2178,6 +2216,42 @@ def tally_accumulate(data, mcdc):
21782216
tally_bin[TALLY_SCORE, i] = 0.0
21792217

21802218

2219+
@njit
2220+
def dd_closeout(data, mcdc):
2221+
tally_bin = data[TALLY]
2222+
2223+
# find number of subdomains
2224+
d_Nx = mcdc["technique"]["dd_mesh"]["x"].size - 1
2225+
d_Ny = mcdc["technique"]["dd_mesh"]["y"].size - 1
2226+
d_Nz = mcdc["technique"]["dd_mesh"]["z"].size - 1
2227+
2228+
with objmode():
2229+
# assign processors to their subdomain group
2230+
i = 0
2231+
for n in range(d_Nx * d_Ny * d_Nz):
2232+
dd_ranks = []
2233+
for r in range(int(mcdc["technique"]["dd_work_ratio"][n])):
2234+
dd_ranks.append(i)
2235+
i += 1
2236+
# create MPI Comm group out of subdomain processors
2237+
dd_group = MPI.COMM_WORLD.group.Incl(dd_ranks)
2238+
dd_comm = MPI.COMM_WORLD.Create(dd_group)
2239+
# MPI Reduce on subdomain processors
2240+
buff = np.zeros_like(tally_bin[TALLY_SUM])
2241+
buff_sq = np.zeros_like(tally_bin[TALLY_SUM_SQ])
2242+
if MPI.COMM_NULL != dd_comm:
2243+
dd_comm.Reduce(tally_bin[TALLY_SUM], buff, MPI.SUM, 0)
2244+
dd_comm.Reduce(tally_bin[TALLY_SUM_SQ], buff_sq, MPI.SUM, 0)
2245+
if mcdc["dd_idx"] == n:
2246+
tally_bin[TALLY_SUM] = buff
2247+
tally_bin[TALLY_SUM_SQ] = buff_sq
2248+
2249+
# free comm group
2250+
dd_group.Free()
2251+
if MPI.COMM_NULL != dd_comm:
2252+
dd_comm.Free()
2253+
2254+
21812255
@njit
21822256
def tally_closeout(data, mcdc):
21832257
tally = data[TALLY]
@@ -2199,6 +2273,16 @@ def tally_closeout(data, mcdc):
21992273
tally[TALLY_SUM] = buff
22002274
tally[TALLY_SUM_SQ] = buff_sq
22012275

2276+
else:
2277+
# find number of subdomains
2278+
N_dd = 1
2279+
N_dd *= mcdc["technique"]["dd_mesh"]["x"].size - 1
2280+
N_dd *= mcdc["technique"]["dd_mesh"]["y"].size - 1
2281+
N_dd *= mcdc["technique"]["dd_mesh"]["z"].size - 1
2282+
# DD Reduce if multiple processors per subdomain
2283+
if N_dd != mcdc["mpi_size"]:
2284+
dd_closeout(data, mcdc)
2285+
# tally[TALLY_SUM_SQ] /= mcdc["technique"]["dd_work_ratio"][mcdc["dd_idx"]]
22022286
# Calculate and store statistics
22032287
# sum --> mean
22042288
# sum_sq --> standard deviation

mcdc/loop.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,8 +200,36 @@ def generate_source_particle(work_start, idx_work, seed, prog):
200200
P = P_arr[0]
201201

202202
# Check if it is beyond current census index
203+
hit_census = False
203204
idx_census = mcdc["idx_census"]
204205
if P["t"] > mcdc["setting"]["census_time"][idx_census]:
206+
hit_census = True
207+
208+
# Check if particle is in the domain (if decomposed)
209+
if mcdc["technique"]["domain_decomposition"]:
210+
if not kernel.particle_in_domain(P_arr, mcdc):
211+
return
212+
213+
# Also check if it belongs to the current rank
214+
mcdc["dd_N_local_source"] += 1
215+
if mcdc["technique"]["dd_work_ratio"][mcdc["dd_idx"]] > 1:
216+
if (
217+
mcdc["dd_N_local_source"]
218+
% mcdc["technique"]["dd_work_ratio"][mcdc["dd_idx"]]
219+
!= mcdc["dd_local_rank"]
220+
):
221+
return
222+
223+
# Put into the bank
224+
if hit_census:
225+
# TODO: Need a special bank for source particles in the later census indices.
226+
# This is needed so that those particles are not prematurely
227+
# population controlled.
228+
adapt.add_census(P_arr, prog)
229+
else:
230+
adapt.add_active(P_arr, prog)
231+
232+
"""
205233
if mcdc["technique"]["domain_decomposition"]:
206234
if mcdc["technique"]["dd_work_ratio"][mcdc["dd_idx"]] > 0:
207235
P["w"] /= mcdc["technique"]["dd_work_ratio"][mcdc["dd_idx"]]
@@ -222,6 +250,7 @@ def generate_source_particle(work_start, idx_work, seed, prog):
222250
else:
223251
kernel.recordlike_to_particle(P_new_arr, P_arr)
224252
adapt.add_active(P_new_arr, prog)
253+
"""
225254

226255

227256
@njit

0 commit comments

Comments
 (0)