Merge remote-tracking branch 'upstream/dev' into dev

ethan-lame · Jan 15, 2025 · 996b112 · 996b112
2 parents 18ce091 + cf92d62
commit 996b112
Show file tree

Hide file tree

Showing 24 changed files with 477 additions and 83 deletions.
diff --git a/.github/workflows/docs_test.yml b/.github/workflows/docs_test.yml
@@ -8,7 +8,7 @@ jobs:
     strategy:
         fail-fast: false
         matrix:
-            os: ["ubuntu-latest"]
+            os: ["ubuntu-22.04"]
     steps:
     - uses: actions/checkout@v3
     - name: Set up python 3.11

diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
@@ -19,7 +19,7 @@ permissions:
 
 jobs:
   deploy:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04
     permissions:
         id-token: write
     steps: 

diff --git a/.github/workflows/regression_test-numba_cpu.yml b/.github/workflows/regression_test-numba_cpu.yml
@@ -8,7 +8,7 @@ jobs:
     strategy:
         fail-fast: false
         matrix:
-            os: ["ubuntu-latest"]
+            os: ["ubuntu-22.04"]
     steps:
     - uses: actions/checkout@v3
     - name: Set up python 3.11

diff --git a/.github/workflows/regression_test-numba_cpu_mpi.yml b/.github/workflows/regression_test-numba_cpu_mpi.yml
@@ -8,7 +8,7 @@ jobs:
     strategy:
         fail-fast: false
         matrix:
-            os: ["ubuntu-latest"]
+            os: ["ubuntu-22.04"]
     steps:
     - uses: actions/checkout@v3
     - name: Set up python 3.11

diff --git a/.github/workflows/regression_test-python.yml b/.github/workflows/regression_test-python.yml
@@ -8,7 +8,7 @@ jobs:
     strategy:
         fail-fast: false
         matrix:
-            os: ["ubuntu-latest"]
+            os: ["ubuntu-22.04"]
     steps:
     - uses: actions/checkout@v3
     - name: Set up python 3.11

diff --git a/.github/workflows/regression_test-python_mpi.yml b/.github/workflows/regression_test-python_mpi.yml
@@ -8,7 +8,7 @@ jobs:
     strategy:
         fail-fast: false
         matrix:
-            os: ["ubuntu-latest"] #, "macos-latest"
+            os: ["ubuntu-22.04"] #, "macos-latest"
     steps:
     - uses: actions/checkout@v3
     - name: Set up python 3.11

diff --git a/.github/workflows/unit_test.yml b/.github/workflows/unit_test.yml
@@ -8,7 +8,7 @@ jobs:
     strategy:
         fail-fast: false
         matrix:
-            os: ["ubuntu-latest"]
+            os: ["ubuntu-22.04"]
     steps:
     - uses: actions/checkout@v3
     - name: Set up python 3.11

diff --git a/.github/workflows/verification_man_mpi_numba.yml b/.github/workflows/verification_man_mpi_numba.yml
@@ -8,7 +8,7 @@ jobs:
     strategy:
         fail-fast: false
         matrix:
-            os: ["ubuntu-latest"] #, "macos-latest"
+            os: ["ubuntu-22.04"] #, "macos-latest"
     steps:
     - uses: actions/checkout@v3
     - name: Set up python 3.11

diff --git a/mcdc/global_.py b/mcdc/global_.py
@@ -84,10 +84,10 @@ def reset(self):
             "ww_mesh": make_card_mesh(),
             "domain_decomposition": False,
             "dd_idx": 0,
+            "dd_local_rank": 0,
             "dd_mesh": make_card_mesh(),
             "dd_exchange_rate": 0,
             "dd_exchange_rate_padding": 0,
-            "dd_repro": False,
             "dd_work_ratio": np.array([1]),
             "weight_roulette": False,
             "wr_threshold": 0.0,

diff --git a/mcdc/input_.py b/mcdc/input_.py
@@ -1320,7 +1320,6 @@ def domain_decomposition(
     exchange_rate=100000,
     exchange_rate_padding=None,
     work_ratio=None,
-    repro=True,
 ):
     """
     Activate domain decomposition.
@@ -1347,7 +1346,6 @@ def domain_decomposition(
     card["domain_decomposition"] = True
     card["dd_exchange_rate"] = int(exchange_rate)
     card["dd_exchange_rate_padding"] = exchange_rate_padding
-    card["dd_repro"] = repro
     dom_num = 1
     # Set mesh
     if x is not None:
@@ -1359,11 +1357,8 @@ def domain_decomposition(
     if z is not None:
         card["dd_mesh"]["z"] = z
         dom_num += len(z)
-    # Set work ratio
-    if work_ratio is None:
-        card["dd_work_ratio"] = None
-    elif work_ratio is not None:
-        card["dd_work_ratio"] = work_ratio
+
+    card["dd_work_ratio"] = work_ratio
     card["dd_idx"] = 0
     card["dd_xp_neigh"] = []
     card["dd_xn_neigh"] = []

diff --git a/mcdc/kernel.py b/mcdc/kernel.py
@@ -702,12 +702,8 @@ def source_particle_dd(seed, mcdc):
 def distribute_work_dd(N, mcdc, precursor=False):
     # Total # of work
     work_size_total = N
-
-    if not mcdc["technique"]["dd_repro"]:
-        work_size, work_start = domain_work(mcdc, mcdc["dd_idx"], N)
-    else:
-        work_start = 0
-        work_size = work_size_total
+    work_start = 0
+    work_size = work_size_total
 
     if not precursor:
         mcdc["mpi_work_start"] = work_start
@@ -2145,6 +2141,38 @@ def calculate_distance_in_coarse_bin(start, end, distance, center, cs_bin_size):
     return distance_inside
 
 
+@njit
+def dd_reduce(data, mcdc):
+    tally_bin = data[TALLY]
+
+    # find number of subdomains
+    d_Nx = mcdc["technique"]["dd_mesh"]["x"].size - 1
+    d_Ny = mcdc["technique"]["dd_mesh"]["y"].size - 1
+    d_Nz = mcdc["technique"]["dd_mesh"]["z"].size - 1
+
+    with objmode():
+        # assign processors to their subdomain group
+        i = 0
+        for n in range(d_Nx * d_Ny * d_Nz):
+            dd_group = []
+            for r in range(int(mcdc["technique"]["dd_work_ratio"][n])):
+                dd_group.append(i)
+                i += 1
+            # create MPI Comm group out of subdomain processors
+            dd_group = MPI.COMM_WORLD.group.Incl(dd_group)
+            dd_comm = MPI.COMM_WORLD.Create(dd_group)
+            # MPI Reduce on subdomain processors
+            buff = np.zeros_like(tally_bin[TALLY_SCORE])
+            if MPI.COMM_NULL != dd_comm:
+                dd_comm.Reduce(tally_bin[TALLY_SCORE], buff, MPI.SUM, 0)
+            if mcdc["dd_idx"] == n:
+                tally_bin[TALLY_SCORE][:] = buff
+            # free comm group
+            dd_group.Free()
+            if MPI.COMM_NULL != dd_comm:
+                dd_comm.Free()
+
+
 @njit
 def tally_reduce(data, mcdc):
     tally_bin = data[TALLY]
@@ -2162,6 +2190,16 @@ def tally_reduce(data, mcdc):
             MPI.COMM_WORLD.Reduce(tally_bin[TALLY_SCORE], buff, MPI.SUM, 0)
         tally_bin[TALLY_SCORE][:] = buff
 
+    else:
+        # find number of subdomains
+        N_dd = 1
+        N_dd *= mcdc["technique"]["dd_mesh"]["x"].size - 1
+        N_dd *= mcdc["technique"]["dd_mesh"]["y"].size - 1
+        N_dd *= mcdc["technique"]["dd_mesh"]["z"].size - 1
+        # DD Reduce if multiple processors per subdomain
+        if N_dd != mcdc["mpi_size"]:
+            dd_reduce(data, mcdc)
+
 
 @njit
 def tally_accumulate(data, mcdc):
@@ -2178,6 +2216,42 @@ def tally_accumulate(data, mcdc):
         tally_bin[TALLY_SCORE, i] = 0.0
 
 
+@njit
+def dd_closeout(data, mcdc):
+    tally_bin = data[TALLY]
+
+    # find number of subdomains
+    d_Nx = mcdc["technique"]["dd_mesh"]["x"].size - 1
+    d_Ny = mcdc["technique"]["dd_mesh"]["y"].size - 1
+    d_Nz = mcdc["technique"]["dd_mesh"]["z"].size - 1
+
+    with objmode():
+        # assign processors to their subdomain group
+        i = 0
+        for n in range(d_Nx * d_Ny * d_Nz):
+            dd_ranks = []
+            for r in range(int(mcdc["technique"]["dd_work_ratio"][n])):
+                dd_ranks.append(i)
+                i += 1
+            # create MPI Comm group out of subdomain processors
+            dd_group = MPI.COMM_WORLD.group.Incl(dd_ranks)
+            dd_comm = MPI.COMM_WORLD.Create(dd_group)
+            # MPI Reduce on subdomain processors
+            buff = np.zeros_like(tally_bin[TALLY_SUM])
+            buff_sq = np.zeros_like(tally_bin[TALLY_SUM_SQ])
+            if MPI.COMM_NULL != dd_comm:
+                dd_comm.Reduce(tally_bin[TALLY_SUM], buff, MPI.SUM, 0)
+                dd_comm.Reduce(tally_bin[TALLY_SUM_SQ], buff_sq, MPI.SUM, 0)
+            if mcdc["dd_idx"] == n:
+                tally_bin[TALLY_SUM] = buff
+                tally_bin[TALLY_SUM_SQ] = buff_sq
+
+            # free comm group
+            dd_group.Free()
+            if MPI.COMM_NULL != dd_comm:
+                dd_comm.Free()
+
+
 @njit
 def tally_closeout(data, mcdc):
     tally = data[TALLY]
@@ -2199,6 +2273,16 @@ def tally_closeout(data, mcdc):
         tally[TALLY_SUM] = buff
         tally[TALLY_SUM_SQ] = buff_sq
 
+    else:
+        # find number of subdomains
+        N_dd = 1
+        N_dd *= mcdc["technique"]["dd_mesh"]["x"].size - 1
+        N_dd *= mcdc["technique"]["dd_mesh"]["y"].size - 1
+        N_dd *= mcdc["technique"]["dd_mesh"]["z"].size - 1
+        # DD Reduce if multiple processors per subdomain
+        if N_dd != mcdc["mpi_size"]:
+            dd_closeout(data, mcdc)
+        # tally[TALLY_SUM_SQ] /= mcdc["technique"]["dd_work_ratio"][mcdc["dd_idx"]]
     # Calculate and store statistics
     #   sum --> mean
     #   sum_sq --> standard deviation

diff --git a/mcdc/loop.py b/mcdc/loop.py
@@ -200,8 +200,36 @@ def generate_source_particle(work_start, idx_work, seed, prog):
         P = P_arr[0]
 
     # Check if it is beyond current census index
+    hit_census = False
     idx_census = mcdc["idx_census"]
     if P["t"] > mcdc["setting"]["census_time"][idx_census]:
+        hit_census = True
+
+    # Check if particle is in the domain (if decomposed)
+    if mcdc["technique"]["domain_decomposition"]:
+        if not kernel.particle_in_domain(P_arr, mcdc):
+            return
+
+        # Also check if it belongs to the current rank
+        mcdc["dd_N_local_source"] += 1
+        if mcdc["technique"]["dd_work_ratio"][mcdc["dd_idx"]] > 1:
+            if (
+                mcdc["dd_N_local_source"]
+                % mcdc["technique"]["dd_work_ratio"][mcdc["dd_idx"]]
+                != mcdc["dd_local_rank"]
+            ):
+                return
+
+    # Put into the bank
+    if hit_census:
+        # TODO: Need a special bank for source particles in the later census indices.
+        #       This is needed so that those particles are not prematurely
+        #       population controlled.
+        adapt.add_census(P_arr, prog)
+    else:
+        adapt.add_active(P_arr, prog)
+
+    """
         if mcdc["technique"]["domain_decomposition"]:
             if mcdc["technique"]["dd_work_ratio"][mcdc["dd_idx"]] > 0:
                 P["w"] /= mcdc["technique"]["dd_work_ratio"][mcdc["dd_idx"]]
@@ -222,6 +250,7 @@ def generate_source_particle(work_start, idx_work, seed, prog):
         else:
             kernel.recordlike_to_particle(P_new_arr, P_arr)
             adapt.add_active(P_new_arr, prog)
+    """
 
 
 @njit