diff --git a/.github/workflows/frontier/build.sh b/.github/workflows/frontier/build.sh
index a6a51b65f0..65d93d537a 100644
--- a/.github/workflows/frontier/build.sh
+++ b/.github/workflows/frontier/build.sh
@@ -1,4 +1,4 @@
#!/bin/bash
. ./mfc.sh load -c f -m g
-./mfc.sh build -j 8 --gpu
+./mfc.sh test --dry-run -j 8 --gpu
diff --git a/.github/workflows/phoenix/test.sh b/.github/workflows/phoenix/test.sh
index 5cdc57e78c..e89af47214 100644
--- a/.github/workflows/phoenix/test.sh
+++ b/.github/workflows/phoenix/test.sh
@@ -5,7 +5,7 @@ if [ "$job_device" == "gpu" ]; then
build_opts="--gpu"
fi
-./mfc.sh build -j 8 $build_opts
+./mfc.sh test --dry-run -j 8 $build_opts
n_test_threads=8
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 1be80f5c79..b312197e03 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -84,7 +84,7 @@ jobs:
- name: Build
run: |
if [ '${{ matrix.intel }}' == 'true' ]; then . /opt/intel/oneapi/setvars.sh; fi
- /bin/bash mfc.sh build -j $(nproc) --${{ matrix.debug }} --${{ matrix.mpi }}
+ /bin/bash mfc.sh test --dry-run -j $(nproc) --${{ matrix.debug }} --${{ matrix.mpi }}
- name: Test
run: |
diff --git a/CMakeLists.txt b/CMakeLists.txt
index de84d7915d..8a8e19120e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -335,7 +335,7 @@ macro(HANDLE_SOURCES target useCommon)
add_custom_command(
OUTPUT ${f90}
- COMMAND ${FYPP_EXE} -m re
+ COMMAND ${FYPP_EXE} -m re -m itertools
-I "${CMAKE_BINARY_DIR}/include/${target}"
-I "${${target}_DIR}/include"
-I "${common_DIR}/include"
diff --git a/docs/documentation/case.md b/docs/documentation/case.md
index d9461453e4..86f9afb460 100644
--- a/docs/documentation/case.md
+++ b/docs/documentation/case.md
@@ -337,8 +337,8 @@ Details of implementation of viscosity in MFC can be found in [Coralic (2015)](r
| Parameter | Type | Description |
| ---: | :----: | :--- |
| `bc_[x,y,z]%%beg[end]` | Integer | Beginning [ending] boundary condition in the $[x,y,z]$-direction (negative integer, see table [Boundary Conditions](#boundary-conditions)) |
-| `bc_[x,y,z]%%vb[1,2,3]`‡| Real | Velocity in the (x,1), (y, 2), (z,3) direction applied to `bc_[x,y,z]%%beg` |
-| `bc_[x,y,z]%%ve[1,2,3]`‡| Real | Velocity in the (x,1), (y, 2), (z,3) direction applied to `bc_[x,y,z]%%end` |
+| `bc_[x,y,z]%%vel_beg[1,2,3]`‡| Real | Velocity in the (x,1), (y, 2), (z,3) direction applied to `bc_[x,y,z]%%beg` |
+| `bc_[x,y,z]%%vel_end[1,2,3]`‡| Real | Velocity in the (x,1), (y, 2), (z,3) direction applied to `bc_[x,y,z]%%end` |
| `model_eqns` | Integer | Multicomponent model: [1] $\Gamma/\Pi_\infty$; [2] 5-equation; [3] 6-equation; [4] 4-equation |
| `alt_soundspeed` * | Logical | Alternate sound speed and $K \nabla \cdot u$ for 5-equation model |
| `adv_n` | Logical | Solving directly for the number density (in the method of classes) and compute void fraction from the number density |
@@ -463,7 +463,7 @@ The value of `dt` needs to be sufficiently small to satisfy the Courant-Friedric
To newly start the simulation, set `t_step_start = 0`.
To restart the simulation from $k$-th time step, set `t_step_start = k`; see [Restarting Cases](running.md#restarting-cases).
-##### Adaptive Time-Stepping
+##### Adaptive Time-Stepping (optional)
- `cfl_adap_dt` enables adaptive time stepping with a constant CFL when true
@@ -480,6 +480,36 @@ To restart the simulation from $k$-th time step, set `t_step_start = k`; see [Re
To newly start the simulation, set `n_start = 0`.
To restart the simulation from $k$-th time step, see [Restarting Cases](running.md#restarting-cases).
+##### Boundary Condition Patches (optional and experimental)
+
+> [!WARNING]
+> This feature is currently experimental and may not produce physicall correct
+> results when certain other features are turned on.
+
+Boundary condition patches allow you to define boundary conditions with more granularity
+than `bc_[x,y,z]`. When using this feature, any point along the edge of the domain can
+be assigned its own boundary condition type. Since the boundaries of a 3D domain are
+2D surfaces, the concept of patches is re-introduced.
+
+Boundary conditions are applied using the [Painter's algorithm](https://en.wikipedia.org/wiki/Painter%27s_algorithm)
+where patches with higher indices take priority, in layers. The lowest priority is given
+to `bc_[x,y,z]`. This feature is opt-in and enabled by assigning `num_bc_patches` to a
+positive value.
+
+| Parameter | Type | Description |
+| ---: | :----: | :--- |
+| `num_bc_patches` | Integer | Number of boundary condition patches (default 0) |
+| `%%type` * | Integer | Boundary condition type (negative integer, see table [Boundary Conditions](#boundary-conditions)) |
+| `%%dir` * | Integer | About the [1] x; [2] y; [3] z; axis |
+| `%%loc` * | Integer | About the line or surface at the [-1] beginning; [+2] end; of the `%%dir` axis. |
+| `%%geometry` * | Integer | The geometry of the boundary condition patch. See table [Boundary Condition Patch Geometry Types](#boundary-condition-patch-geometry-types) |
+| `%%vel(i)` * | Real | Meaning depends on the boundary condition |
+| `%%centroid(i)` * | Real | Meaning depends on the patch geometry. Index $i = \text{dir}$ is always ignored |
+| `%%length(i)` * | Real | Meaning depends on the patch geometry. Index $i = \text{dir}$ is always ignored |
+| `%%radius` * | Real | Meaning depends on the patch geometry. Index $i = \text{dir}$ is always ignored |
+
+*: These parameters should be prepended with `patch_bc(j)%` where $j$ is the boundary condition patch index.
+
### 7. Formatted Output
| Parameter | Type | Description |
@@ -862,6 +892,13 @@ This includes types exclusive to one-, two-, and three-dimensional problems.
The patch type number (`#`) corresponds to the input value in `input.py` labeled `patch_icpp(j)%%geometry` where $j$ is the patch index.
Each patch requires a different set of parameters, which are also listed in this table.
+### Boundary Condition Patch Geometry Types
+
+| # | Name | Dim. | Requirements |
+| ---: | :----: | :--- | :--- |
+| 1 | Cuboid | 1 & 2 | `%%centroid(1:3)` and `%%length(1:3)` |
+| 2 | Spheroid | 1 & 2 | `%%centroid(1:3)` and `%%radius` |
+
### Immersed Boundary Patch Types
| # | Name | Dim. |
diff --git a/examples/1D_inert_shocktube/export.py b/examples/1D_inert_shocktube/export.py
new file mode 100644
index 0000000000..e676b6cedd
--- /dev/null
+++ b/examples/1D_inert_shocktube/export.py
@@ -0,0 +1,30 @@
+import csv
+import numpy as np
+import statistics
+from tqdm import tqdm
+
+import mfc.viz
+from case import dt, sol_L as sol
+
+
+case = mfc.viz.Case(".", dt)
+
+for name in tqdm(sol.species_names, desc="Loading Variables"):
+ case.load_variable(f"Y_{name}", f"prim.{5 + sol.species_index(name)}")
+case.load_variable("rho", "prim.1")
+case.load_variable("u", "prim.2")
+case.load_variable("p", "prim.3")
+case.load_variable("T", "prim.15")
+
+steps = case.get_timesteps()
+
+for step in [min(steps), max(steps)]:
+ t = step * dt
+
+ with open(f"mfc-{step}.csv", "w") as f:
+ writer = csv.writer(f)
+ keys = ['x'] + list(set(case.get_data()[0].keys()) - set(["x"]))
+ writer.writerow(keys)
+ for ix, x in enumerate(sorted(case.get_coords()[0])):
+ row = [case.get_data()[step][key][ix] for key in keys]
+ writer.writerow(row)
diff --git a/examples/1D_reactive_shocktube/export.py b/examples/1D_reactive_shocktube/export.py
new file mode 100644
index 0000000000..e676b6cedd
--- /dev/null
+++ b/examples/1D_reactive_shocktube/export.py
@@ -0,0 +1,30 @@
+import csv
+import numpy as np
+import statistics
+from tqdm import tqdm
+
+import mfc.viz
+from case import dt, sol_L as sol
+
+
+case = mfc.viz.Case(".", dt)
+
+for name in tqdm(sol.species_names, desc="Loading Variables"):
+ case.load_variable(f"Y_{name}", f"prim.{5 + sol.species_index(name)}")
+case.load_variable("rho", "prim.1")
+case.load_variable("u", "prim.2")
+case.load_variable("p", "prim.3")
+case.load_variable("T", "prim.15")
+
+steps = case.get_timesteps()
+
+for step in [min(steps), max(steps)]:
+ t = step * dt
+
+ with open(f"mfc-{step}.csv", "w") as f:
+ writer = csv.writer(f)
+ keys = ['x'] + list(set(case.get_data()[0].keys()) - set(["x"]))
+ writer.writerow(keys)
+ for ix, x in enumerate(sorted(case.get_coords()[0])):
+ row = [case.get_data()[step][key][ix] for key in keys]
+ writer.writerow(row)
diff --git a/examples/hypotests/case.py b/examples/hypotests/case.py
new file mode 100644
index 0000000000..badaa39155
--- /dev/null
+++ b/examples/hypotests/case.py
@@ -0,0 +1,130 @@
+#!/usr/bin/env python3
+import math
+import json
+
+# Numerical setup
+Nx = 201 # Number of grid points in x
+Ny = 201 # Number of grid points in y
+dx = 1./(1.*(Nx+1)) # Grid spacing in x
+dy = 1./(1.*(Ny+1)) # Grid spacing in y
+
+Tend = 64E-06 # End time
+Nt = 2000 # 2000 # Number of time steps
+mydt = Tend/(1.*Nt) # Time step size
+
+# Configuring case dictionary
+print(json.dumps({
+ # Logistics ================================================
+ 'run_time_info' : 'F',
+ # ==========================================================
+
+ # Computational Domain Parameters ==========================
+ 'x_domain%beg' : 0.E+00, # x start
+ 'x_domain%end' : 2.E+00, # x end
+ 'y_domain%beg' : 0.E+00, # y start
+ 'y_domain%end' : 1.E+00, # y end
+ 'm' : Nx, # Number of grid points in x direction
+ 'n' : Ny, # Number of grid points in y direction
+ 'p' : 0, # Number of grid points in z (for 3D, change this)
+ 'dt' : 1e-6, # Time step size
+ 't_step_start' : 0, # Start time
+ 't_step_stop' : Nt, # End time
+ 't_step_save' : 500, # Save frequency
+ # ==========================================================
+
+ # Simulation Algorithm Parameters ==========================
+ 'num_patches' : 1, # Two patches
+ 'model_eqns' : 2, # Number of model equations
+ 'alt_soundspeed' : 'F',
+ 'num_fluids' : 2,
+ 'low_Mach' : 0,
+ 'mpp_lim' : 'F',
+ # ' mixture_err' : 'F',
+ 'time_stepper' : 3,
+ 'weno_order' : 5,
+ 'weno_eps' : 1.E-16,
+ 'weno_Re_flux' : 'F',
+ 'weno_avg' : 'F',
+ 'mapped_weno' : 'F',
+ 'null_weights' : 'F',
+ 'mp_weno' : 'F',
+ 'riemann_solver' : 1,
+ 'wave_speeds' : 1,
+ 'avg_state' : 2,
+ 'bc_x%beg' : -3,
+ 'bc_x%end' : -3,
+ 'bc_y%beg' : -3, # Boundary conditions for y direction
+ 'bc_y%end' : -3,
+ 'num_bc_patches' : 1,
+ 'patch_bc(1)%type' : -17,
+ 'patch_bc(1)%dir' : 1,
+ 'patch_bc(1)%loc' : -1,
+ 'patch_bc(1)%geometry' : 1,
+ 'patch_bc(1)%centroid(1)' : 0,
+ 'patch_bc(1)%centroid(2)' : 0.5,
+ 'patch_bc(1)%length(2)' : 0.26,
+ 'patch_bc(1)%vel(1)' : 10,
+ 'patch_bc(1)%vel(2)' : 0,
+ # ==========================================================
+
+ # Turning on IB ================================
+ 'ib' : 'T',
+ 'num_ibs' : 2,
+
+ # ==========================================================
+
+ # Formatted Database Files Structure Parameters ============
+ 'format' : 1,
+ 'precision' : 2,
+ 'prim_vars_wrt' :'T',
+ 'parallel_io' :'T',
+ # ==========================================================
+
+ # Patch 1 (background flow) ===================
+ 'patch_icpp(1)%geometry' : 3, # 2D geometry
+ 'patch_icpp(1)%x_centroid' : 1.0, # x-center
+ 'patch_icpp(1)%y_centroid' : 0.5, # y-center
+ 'patch_icpp(1)%length_x' : 2.0, # x-length
+ 'patch_icpp(1)%length_y' : 1.0, # y-length
+ 'patch_icpp(1)%vel(1)' : 0.0,
+ 'patch_icpp(1)%vel(2)' : 0.0, # y-velocity '100*sin(3*x*pi)'
+ 'patch_icpp(1)%pres' : 1.E5, # Pressure
+ 'patch_icpp(1)%alpha_rho(1)' : 1000, # Density
+ 'patch_icpp(1)%alpha_rho(2)' : 0.,
+ 'patch_icpp(1)%alpha(1)' : 1,
+ 'patch_icpp(1)%alpha(2)' : 0.,
+ 'patch_icpp(1)%tau_e(1)' : 0.0,
+
+
+ # ==========================================================
+
+ # Patch 2 (hypo material in the center) ================
+ 'patch_ib(1)%geometry' : 3, # 2D geometry
+ # 'patch_ib(1)%hcid' : 201,
+ 'patch_ib(1)%x_centroid' : 0.5, # x-center
+ 'patch_ib(1)%y_centroid' : 0.65, # y-center
+ 'patch_ib(1)%length_x' : 1.0, # x-length
+ 'patch_ib(1)%length_y' : 0.04, # y-length
+ 'patch_ib(1)%slip' : 'T',
+
+ # ==========================================================
+
+ # Patch 3 (hypo material in the center) ================
+ 'patch_ib(2)%geometry' : 3, # 2D geometry
+ # 'patch_ib(1)%hcid' : 201,
+ 'patch_ib(2)%x_centroid' : 0.5, # x-center
+ 'patch_ib(2)%y_centroid' : 0.35, # y-center
+ 'patch_ib(2)%length_x' : 1.0, # x-length
+ 'patch_ib(2)%length_y' : 0.04, # y-length
+ 'patch_ib(2)%slip' : 'T',
+
+ # Fluids Physical Parameters ===============================
+ 'fluid_pp(1)%gamma' : 1.E+00/(6.12E+00-1.E+00),
+ 'fluid_pp(1)%pi_inf' : 6.12E+00*3.43E+08/(6.12E+00 - 1.E+00),
+ # 'fluid_pp(1)%G' : 0,
+ 'fluid_pp(2)%gamma' : 1.E+00/(1.3E+00-1.E+00),
+ 'fluid_pp(2)%pi_inf' : 1.3E+00*2.E+08/(1.3E+00 - 1.E+00),
+ # 'fluid_pp(2)%G' : 2.7E+05/(2.E+00*(1.E+00 + 0.4E+00)),
+ 'fluid_pp(2)%G' : 1.E7,
+ # ==========================================================
+}))
diff --git a/src/common/include/macros.fpp b/src/common/include/macros.fpp
index c1f447a260..055ba3a5f9 100644
--- a/src/common/include/macros.fpp
+++ b/src/common/include/macros.fpp
@@ -102,3 +102,161 @@
//${message or '"No error description."'}$)
end if
#:enddef
+
+#:def MAKE_LOOPS(impl, loops, gpu = None)
+ #:if gpu != False
+ !$acc parallel loop collapse(${len(loops)}$) gang vector default(present)
+ #:endif
+ #:for index, lbound, hbound in (loops or [])
+ do ${index}$ = ${lbound}$, ${hbound}$
+ #:endfor
+
+ $:impl
+
+ #:for i in range(len(loops or []))
+ end do
+ #:endfor
+ #:enddef
+
+ #:def ITERATE_OVER_BUFFER_REGION_SIDED(impl, dir = None, loc = None, thickness = None, gpu = None, outer_loops = None, inner_loops = None, pack_v_size = None)
+ #:set thickness = thickness or 'buff_size'
+ #:set outer_loops = outer_loops or []
+ #:set inner_loops = inner_loops or []
+ #:set pack_v_size = pack_v_size or 'v_size'
+
+ if (${dir}$ <= num_dims) then
+
+ if ((${dir}$) == +1 .and. (${loc}$) == -1) then
+
+ #:block MAKE_LOOPS(loops = outer_loops + [("l", 0, "p"), ("k", 0, "n"), ("j", 1, thickness)] + inner_loops, gpu=gpu)
+ x = -j; y = k; z = l ! Regular
+ sx = j - 1; sy = k; sz = l ! Symmetry
+ px = m - (j - 1); py = k; pz = l ! Periodic
+ ex = 0; ey = k; ez = l ! Extrapolation
+ locx = -1; locy = 0; locz = 0
+
+ exlhs = ex; eylhs = ey; ezlhs = ez
+ exlhs = f_clamp(exlhs, 0, m); eylhs = f_clamp(eylhs, 0, n); ezlhs = f_clamp(ezlhs, 0, p)
+
+#ifndef MFC_PRE_PROCESS
+ pack_idr = (i - 1) + ${pack_v_size}$*((j - 1) + ${thickness}$*(k + (n + 1)*l))
+#endif
+
+ $:impl
+ #:endblock
+
+ else if ((${dir}$) == +1 .and. (${loc}$) == +1) then
+
+ #:block MAKE_LOOPS(loops = outer_loops + [("l", 0, "p"), ("k", 0, "n"), ("j", 1, thickness)] + inner_loops, gpu=gpu)
+ x = m + j; y = k; z = l ! Regular
+ sx = m - (j - 1); sy = k; sz = l ! Symmetry
+ px = j - 1; py = k; pz = l ! Periodic
+ ex = m; ey = k; ez = l ! Extrapolation
+ locx = +1; locy = 0; locz = 0
+
+ exlhs = 0; eylhs = ey; ezlhs = ez
+ exlhs = f_clamp(exlhs, 0, m); eylhs = f_clamp(eylhs, 0, n); ezlhs = f_clamp(ezlhs, 0, p)
+
+#ifndef MFC_PRE_PROCESS
+ pack_idr = (i - 1) + ${pack_v_size}$*((j - 1) + ${thickness}$*(k + (n + 1)*l))
+#endif
+
+ $:impl
+ #:endblock
+
+ else if ((${dir}$) == +2 .and. (${loc}$) == -1) then
+
+ #:block MAKE_LOOPS(loops = outer_loops + [("k", 0, "p"), ("j", 1, thickness), ("l", f"-({thickness})", f"m + ({thickness})")] + inner_loops, gpu=gpu)
+ x = l; y = -j; z = k ! Regular
+ sx = l; sy = j - 1; sz = k ! Symmetry
+ px = l; py = n - (j - 1); pz = k ! Periodic
+ ex = l; ey = 0; ez = k ! Extrapolation
+ locx = 0; locy = -1; locz = 0
+
+ exlhs = ex; eylhs = ey; ezlhs = ez
+ exlhs = f_clamp(exlhs, 0, m); eylhs = f_clamp(eylhs, 0, n); ezlhs = f_clamp(ezlhs, 0, p)
+
+#ifndef MFC_PRE_PROCESS
+ pack_idr = (i - 1) + ${pack_v_size}$*((l + ${thickness}$) + (m + 2*${thickness}$+1)*(j - 1 + ${thickness}$*k))
+#endif
+
+ $:impl
+ #:endblock
+
+ else if ((${dir}$) == +2 .and. (${loc}$) == +1) then
+
+ #:block MAKE_LOOPS(loops = outer_loops + [("k", 0, "p"), ("j", 1, thickness), ("l", f"-({thickness})", f"m + ({thickness})")] + inner_loops, gpu=gpu)
+ x = l; y = n + j; z = k ! Regular
+ sx = l; sy = n - (j - 1); sz = k ! Symmetry
+ px = l; py = j - 1; pz = k ! Periodic
+ ex = l; ey = n; ez = k ! Extrapolation
+ locx = 0; locy = +1; locz = 0
+
+ exlhs = ex; eylhs = 0; ezlhs = ez
+ exlhs = f_clamp(exlhs, 0, m); eylhs = f_clamp(eylhs, 0, n); ezlhs = f_clamp(ezlhs, 0, p)
+
+#ifndef MFC_PRE_PROCESS
+ pack_idr = (i - 1) + ${pack_v_size}$*((l + ${thickness}$) + (m + 2*${thickness}$+1)*(j - 1 + ${thickness}$*k))
+#endif
+
+ $:impl
+ #:endblock
+
+ else if ((${dir}$) == +3 .and. (${loc}$) == -1) then
+
+ #:block MAKE_LOOPS(loops = outer_loops + [("j", 1, thickness), ("l", f"-({thickness})", f"n + ({thickness})"), ("k", f"-({thickness})", f"m + ({thickness})")] + inner_loops, gpu=gpu)
+ x = k; y = l; z = -j ! Regular
+ sx = k; sy = l; sz = j - 1 ! Symmetry
+ px = k; py = l; pz = p - (j - 1) ! Periodic
+ ex = k; ey = l; ez = 0 ! Extrapolation
+ locx = 0; locy = 0; locz = -1
+
+ exlhs = ex; eylhs = ey; ezlhs = ez
+ exlhs = f_clamp(exlhs, 0, m); eylhs = f_clamp(eylhs, 0, n); ezlhs = f_clamp(ezlhs, 0, p)
+
+#ifndef MFC_PRE_PROCESS
+ pack_idr = (i - 1) + ${pack_v_size}$*((k + ${thickness}$) + (m + 2*${thickness}$+1)*((l + ${thickness}$) + (n + 2*${thickness}$+1)*(j - 1)))
+#endif
+
+ $:impl
+ #:endblock
+
+ else if ((${dir}$) == +3 .and. (${loc}$) == +1) then
+
+ #:block MAKE_LOOPS(loops = outer_loops + [("j", 1, thickness), ("l", f"-({thickness})", f"n + ({thickness})"), ("k", f"-({thickness})", f"m + ({thickness})")] + inner_loops, gpu=gpu)
+ x = k; y = l; z = p + j ! Regular
+ sx = k; sy = l; sz = p - (j - 1) ! Symmetry
+ px = k; py = l; pz = j - 1 ! Periodic
+ ex = k; ey = l; ez = p ! Extrapolation
+ locx = 0; locy = 0; locz = +1
+
+ exlhs = ex; eylhs = ey; ezlhs = 0
+ exlhs = f_clamp(exlhs, 0, m); eylhs = f_clamp(eylhs, 0, n); ezlhs = f_clamp(ezlhs, 0, p)
+
+#ifndef MFC_PRE_PROCESS
+ pack_idr = (i - 1) + ${pack_v_size}$*((k + ${thickness}$) + (m + 2*${thickness}$+1)*((l + ${thickness}$) + (n + 2*${thickness}$+1)*(j - 1)))
+#endif
+
+ $:impl
+ #:endblock
+
+ else
+
+ stop "Invalid boundary condition direction or location."
+
+ end if
+
+ end if
+ #:enddef ITERATE_OVER_BUFFER_REGION_SIDED
+
+ #:def BOUNDARY_CONDITION_INTEGER_DECLARATIONS()
+ integer :: i, j, k, l, q
+
+ integer :: x, y, z
+ integer :: sx, sy, sz
+ integer :: px, py, pz
+ integer :: ex, ey, ez
+ integer :: locx, locy, locz
+ integer :: exlhs, eylhs, ezlhs
+ integer :: pack_idr
+ #:enddef
diff --git a/src/common/m_boundary_conditions_common.fpp b/src/common/m_boundary_conditions_common.fpp
new file mode 100644
index 0000000000..bd17a661ba
--- /dev/null
+++ b/src/common/m_boundary_conditions_common.fpp
@@ -0,0 +1,484 @@
+#:include 'macros.fpp'
+
+module m_boundary_conditions_common
+
+ use m_global_parameters
+
+ use m_mpi_proxy
+
+ use m_helper
+
+ use m_compile_specific
+
+ implicit none
+
+ private; public :: s_initialize_boundary_conditions_module, &
+ s_generate_boundary_condition_patch_buffers, &
+ bc_id_sfs, bc_id_has_bc, &
+ s_read_boundary_condition_files, &
+ s_write_boundary_condition_files
+
+ ! Boundary condition structure. bc_id_sfs(
, )%sf(, , )
+ ! holds the necessary boundary condition information for a point along the
+ ! surface in the direction on the side , to apply said condition.
+ type(t_bc_id_sf), dimension(1:3, -1:1) :: bc_id_sfs
+
+#ifdef MFC_MPI
+ integer :: mpi_bc_id_type
+ integer, dimension(1:3, -1:1) :: mpi_bc_sf_type
+#endif
+
+ ! Optimization structure. Holds whether a boundary condition can be found
+ ! on a surface. bc_id_has_bc(, , 0) is true if MPI communication
+ ! is required for at least one point on the surface. In general,
+ ! bc_id_has_bc(, , ) is true if a boundary condition with type
+ ! can be found on the surface on the side of the direction .
+ logical, dimension(1:3, -1:1, -num_bcs_max:0) :: bc_id_has_bc
+
+!$acc declare create(bc_id_sfs)
+
+#ifndef MFC_PRE_PROCESS
+ public :: s_populate_prim_buffers, s_populate_cons_buffers
+#endif
+
+contains
+
+ subroutine s_initialize_boundary_conditions_module()
+
+ integer :: iter_loc
+
+ do iter_loc = -1, 2, 2
+ @:ALLOCATE(bc_id_sfs(1, iter_loc)%sf(0:0, 0:n, 0:p))
+ if (num_dims > 1) then
+ @:ALLOCATE(bc_id_sfs(2, iter_loc)%sf(0:m, 0:0, 0:p))
+
+ if (num_dims > 2) then
+ @:ALLOCATE(bc_id_sfs(3, iter_loc)%sf(0:m, 0:n, 0:0))
+ end if
+ end if
+ end do
+
+#ifdef MFC_MPI
+ call s_create_mpi_types()
+#endif
+
+ end subroutine s_initialize_boundary_conditions_module
+
+#ifdef MFC_MPI
+ subroutine s_create_mpi_types()
+
+ use mpi
+
+ integer :: blocklengths(2) = (/1, 3/)
+ integer(KIND=MPI_ADDRESS_KIND) :: displacements(2) = (/0, 8/) ! Offset between fields in memory
+ integer :: types(2) = (/MPI_INTEGER, MPI_DOUBLE_PRECISION/)
+ integer, dimension(1:3) :: sf_extents_loc, sf_start_idx
+ integer :: ierr
+
+ integer :: iter_dir, iter_loc
+
+ call MPI_Type_create_struct(2, blocklengths, displacements, types, mpi_bc_id_type, ierr)
+ call MPI_Type_commit(mpi_bc_id_type, ierr)
+
+ do iter_dir = 1, num_dims
+ do iter_loc = -1, 1, 2
+
+ sf_start_idx = (/0, 0, 0/)
+ sf_extents_loc = shape(bc_id_sfs(iter_dir, iter_loc)%sf)
+
+ call MPI_TYPE_CREATE_SUBARRAY(num_dims, sf_extents_loc, sf_extents_loc, sf_start_idx, &
+ MPI_ORDER_FORTRAN, mpi_bc_id_type, mpi_bc_sf_type(iter_dir, iter_loc), ierr)
+ call MPI_TYPE_COMMIT(mpi_bc_sf_type(iter_dir, iter_loc), ierr)
+
+ end do
+ end do
+
+ end subroutine s_create_mpi_types
+#endif
+
+ subroutine s_write_boundary_condition_files(step_dirpath)
+
+ character(LEN=*), intent(in) :: step_dirpath
+
+ integer :: iter_dir, iter_loc
+ character(len=path_len) :: file_path
+
+ character(len=10) :: status
+
+#ifdef MFC_MPI
+ integer :: ierr
+ integer :: file_id
+ integer :: offset
+ character(len=7) :: proc_rank_str
+#endif
+
+#ifdef MFC_PRE_PROCESS
+ if (old_grid) then
+ status = 'old'
+ else
+ status = 'new'
+ end if
+#else
+ status = 'unknown'
+#endif
+
+ if (parallel_io .eqv. .false.) then
+ file_path = trim(step_dirpath)//'/bc.dat'
+ open (1, FILE=trim(file_path), FORM='unformatted', STATUS=status)
+ do iter_dir = 1, num_dims
+ do iter_loc = -1, 1, 2
+ write (1) bc_id_sfs(iter_dir, iter_loc)%sf
+ end do
+ end do
+ close (1)
+ else
+#ifdef MFC_MPI
+ write (proc_rank_str, '(I7.7)') proc_rank
+ file_path = trim(step_dirpath)//'/bc_'//trim(proc_rank_str)//'.dat'
+ call MPI_File_open(MPI_COMM_SELF, trim(file_path), MPI_MODE_CREATE + MPI_MODE_WRONLY, MPI_INFO_NULL, file_id, ierr)
+
+ offset = 0
+ do iter_dir = 1, num_dims
+ do iter_loc = -1, 1, 2
+ call MPI_File_set_view(file_id, int(offset, KIND=MPI_ADDRESS_KIND), mpi_bc_id_type, mpi_bc_sf_type(iter_dir, iter_loc), 'native', MPI_INFO_NULL, ierr)
+ call MPI_File_write_all(file_id, bc_id_sfs(iter_dir, iter_loc)%sf, 1, mpi_bc_sf_type(iter_dir, iter_loc), MPI_STATUS_IGNORE, ierr)
+ offset = offset + sizeof(bc_id_sfs(iter_dir, iter_loc)%sf)
+ end do
+ end do
+
+ call MPI_File_close(file_id, ierr)
+#endif
+ end if
+
+ end subroutine s_write_boundary_condition_files
+
+ subroutine s_read_boundary_condition_files(step_dirpath)
+
+ character(LEN=*), intent(in) :: step_dirpath
+
+ integer :: iter_dir, iter_loc
+ logical :: file_exist
+ character(len=path_len) :: file_path
+
+#ifdef MFC_MPI
+ integer :: ierr
+ integer :: file_id
+ integer :: offset
+ character(len=7) :: proc_rank_str
+#endif
+
+ if (parallel_io .eqv. .false.) then
+ file_path = trim(step_dirpath)//'/bc.dat'
+ else
+#ifdef MFC_MPI
+ write (proc_rank_str, '(I7.7)') proc_rank
+ file_path = trim(step_dirpath)//'/bc_'//trim(proc_rank_str)//'.dat'
+#endif
+ end if
+
+ inquire (FILE=trim(file_path), EXIST=file_exist)
+ if (.not. file_exist) then
+ call s_mpi_abort(trim(file_path)//' is missing. Exiting ...')
+ end if
+
+ if (parallel_io .eqv. .false.) then
+ open (1, FILE=trim(file_path), FORM='unformatted', STATUS='unknown')
+ do iter_dir = 1, num_dims
+ do iter_loc = -1, 1, 2
+ read (1) bc_id_sfs(iter_dir, iter_loc)%sf
+ end do
+ end do
+ close (1)
+ else
+#ifdef MFC_MPI
+ call MPI_File_open(MPI_COMM_SELF, trim(file_path), MPI_MODE_RDONLY, MPI_INFO_NULL, file_id, ierr)
+
+ offset = 0
+ do iter_dir = 1, num_dims
+ do iter_loc = -1, 1, 2
+ call MPI_File_set_view(file_id, int(offset, KIND=MPI_ADDRESS_KIND), mpi_bc_id_type, mpi_bc_sf_type(iter_dir, iter_loc), 'native', MPI_INFO_NULL, ierr)
+ call MPI_File_read_all(file_id, bc_id_sfs(iter_dir, iter_loc)%sf, 1, mpi_bc_sf_type(iter_dir, iter_loc), MPI_STATUS_IGNORE, ierr)
+ offset = offset + sizeof(bc_id_sfs(iter_dir, iter_loc)%sf)
+ end do
+ end do
+
+ call MPI_File_close(file_id, ierr)
+#endif
+ end if
+
+ call s_generate_boundary_condition_lookup_buffers()
+
+ end subroutine s_read_boundary_condition_files
+
+ subroutine s_generate_boundary_condition_patch_buffers()
+
+ type(bc_patch_parameters) :: bc
+ integer :: iter_dir, iter_loc
+ real(kind(0d0)) :: radius2
+
+ type(int_bounds_info), dimension(1:3) :: user_input_bcs
+
+ @:BOUNDARY_CONDITION_INTEGER_DECLARATIONS()
+
+ user_input_bcs = [bc_x, bc_y, bc_z]
+
+ do iter_dir = 1, num_dims
+ #:block ITERATE_OVER_BUFFER_REGION_SIDED(dir="iter_dir", loc="-1", thickness=1, gpu=False)
+ bc_id_sfs(iter_dir, -1)%sf(exlhs, eylhs, ezlhs)%type = user_input_bcs(iter_dir)%beg
+ bc_id_sfs(iter_dir, -1)%sf(exlhs, eylhs, ezlhs)%vel = user_input_bcs(iter_dir)%vel_beg
+ #:endblock
+ #:block ITERATE_OVER_BUFFER_REGION_SIDED(dir="iter_dir", loc="+1", thickness=1, gpu=False)
+ bc_id_sfs(iter_dir, +1)%sf(exlhs, eylhs, ezlhs)%type = user_input_bcs(iter_dir)%end
+ bc_id_sfs(iter_dir, +1)%sf(exlhs, eylhs, ezlhs)%vel = user_input_bcs(iter_dir)%vel_end
+ #:endblock
+ end do
+
+ do iter_dir = 1, num_dims
+ do iter_loc = -1, 1, 2
+ #:block ITERATE_OVER_BUFFER_REGION_SIDED(dir="iter_dir", loc="iter_loc", thickness=1, gpu=False)
+ do i = 1, num_bc_patches
+ bc = patch_bc(i)
+ if (bc%geometry == 1) then ! Cuboid
+ #:for dir, name in [(1, "x"), (2, "y"), (3, "z")]
+ if (${dir}$ /= bc%dir .and. ${dir}$ <= num_dims) then
+ if (${name}$_cc(e${name}$lhs) >= bc%centroid(${dir}$) + 0.5d0*bc%length(${dir}$)) then
+ cycle
+ end if
+ if (${name}$_cc(e${name}$lhs) <= bc%centroid(${dir}$) - 0.5d0*bc%length(${dir}$)) then
+ cycle
+ end if
+ end if
+ #:endfor
+ elseif (bc%geometry == 2) then ! Spheroid
+ radius2 = 0d0
+ #:for dir, name in [(1, "x"), (2, "y"), (3, "z")]
+ if (${dir}$ /= bc%dir .and. ${dir}$ <= num_dims) then
+ radius2 = radius2 + (${name}$_cc(e${name}$lhs) - bc%centroid(${dir}$))**2
+ end if
+ #:endfor
+ if (radius2 > bc%radius**2) then
+ cycle
+ end if
+ end if
+
+ bc_id_sfs(bc%dir, bc%loc)%sf(exlhs, eylhs, ezlhs)%type = bc%type
+ bc_id_sfs(bc%dir, bc%loc)%sf(exlhs, eylhs, ezlhs)%vel = bc%vel
+ end do
+ #:endblock
+ end do
+ end do
+
+ do iter_dir = 1, num_dims
+ do iter_loc = -1, 1, 2
+ #:block ITERATE_OVER_BUFFER_REGION_SIDED(dir="iter_dir", loc="iter_loc", thickness=1, gpu=False)
+ if (proc_nums(iter_dir) > 1 .and. ( &
+ bc_id_sfs(iter_dir, iter_loc)%sf(exlhs, eylhs, ezlhs)%type == -1 &
+ .or. (iter_loc == -1 .and. proc_coords(iter_dir) > 0) &
+ .or. (iter_loc == +1 .and. proc_coords(iter_dir) < proc_nums(iter_dir) - 1) &
+ )) then
+ bc_id_sfs(iter_dir, iter_loc)%sf(exlhs, eylhs, ezlhs)%type = neighbor_procs(iter_dir, iter_loc)
+ end if
+ #:endblock
+
+ !$acc update device(bc_id_sfs(iter_dir, iter_loc)%sf)
+ end do
+ end do
+
+ call s_generate_boundary_condition_lookup_buffers()
+
+ end subroutine s_generate_boundary_condition_patch_buffers
+
+ subroutine s_generate_boundary_condition_lookup_buffers
+
+ integer :: iter_dir, iter_loc
+
+ @:BOUNDARY_CONDITION_INTEGER_DECLARATIONS()
+
+ do iter_dir = 1, num_dims
+ do iter_loc = -1, 1, 2
+ bc_id_has_bc(iter_dir, iter_loc, :) = .false.
+ #:block ITERATE_OVER_BUFFER_REGION_SIDED(dir="iter_dir", loc="iter_loc", thickness=1, gpu=False)
+ bc_id_has_bc(iter_dir, iter_loc, min(0, bc_id_sfs(iter_dir, iter_loc)%sf(exlhs, eylhs, ezlhs)%type)) = .true.
+ #:endblock
+ end do
+ end do
+
+ end subroutine s_generate_boundary_condition_lookup_buffers
+
+#ifndef MFC_PRE_PROCESS
+ !> The purpose of this procedure is to populate the buffers
+ !! of the primitive variables, depending on the selected
+ !! boundary conditions.
+ subroutine s_populate_prim_buffers(q_prim_vf &
+#ifdef MFC_SIMULATION
+ , pb, mv &
+#endif
+ )
+
+ type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf
+#ifdef MFC_SIMULATION
+ real(kind(0d0)), dimension(startx:, starty:, startz:, 1:, 1:), intent(inout) :: pb, mv
+#endif
+
+ integer :: iter_dir, iter_loc
+
+ @:BOUNDARY_CONDITION_INTEGER_DECLARATIONS()
+
+ do iter_dir = 1, num_dims
+ do iter_loc = -1, 1, 2
+ if (any(bc_id_has_bc(iter_dir, iter_loc, :-1))) then
+ #:block ITERATE_OVER_BUFFER_REGION_SIDED(dir="iter_dir", loc="iter_loc")
+ select case (bc_id_sfs(iter_dir, iter_loc)%sf(exlhs, eylhs, ezlhs)%type)
+ case (-13:-3); ! Ghost-cell extrap.
+ !$acc loop seq
+ do i = 1, sys_size
+ q_prim_vf(i)%sf(x, y, z) = q_prim_vf(i)%sf(ex, ey, ez)
+ end do
+ case (-2); ! Symmetry
+ !$acc loop seq
+ do i = 1, momxb + iter_dir - 2
+ q_prim_vf(i)%sf(x, y, z) = q_prim_vf(i)%sf(sx, sy, sz)
+ end do
+
+ q_prim_vf(momxb + iter_dir - 1)%sf(x, y, z) = &
+ -q_prim_vf(momxb + iter_dir - 1)%sf(sx, sy, sz)
+
+ !$acc loop seq
+ do i = momxb + iter_dir, sys_size
+ q_prim_vf(i)%sf(x, y, z) = q_prim_vf(i)%sf(sx, sy, sz)
+ end do
+ case (-1); ! Periodic
+ !$acc loop seq
+ do i = 1, sys_size
+ q_prim_vf(i)%sf(x, y, z) = q_prim_vf(i)%sf(px, py, pz)
+ end do
+ case (-15); ! Splip Wall
+ !$acc loop seq
+ do i = 1, sys_size
+ if (i == momxb + iter_dir - 1) then
+ q_prim_vf(i)%sf(x, y, z) = &
+ -q_prim_vf(i)%sf(sx, sy, sz) + 2d0*bc_id_sfs(iter_dir, iter_loc)%sf(exlhs, eylhs, ezlhs)%vel(iter_dir)
+ else
+ q_prim_vf(i)%sf(x, y, z) = q_prim_vf(i)%sf(ex, ey, ez)
+ end if
+ end do
+ case (-16); ! No Slip Wall
+ !$acc loop seq
+ do i = 1, sys_size
+ if (i >= momxb .and. i <= momxe) then
+ q_prim_vf(i)%sf(x, y, z) = &
+ -q_prim_vf(i)%sf(sx, sy, sz) + 2d0*bc_id_sfs(iter_dir, iter_loc)%sf(exlhs, eylhs, ezlhs)%vel(i - momxb + 1)
+ else
+ q_prim_vf(i)%sf(x, y, z) = q_prim_vf(i)%sf(ex, ey, ez)
+ end if
+ end do
+ case (-17); ! Velocity Inlet
+ !$acc loop seq
+ do i = 1, sys_size
+ if (i >= momxb .and. i <= momxe) then
+ q_prim_vf(i)%sf(x, y, z) = &
+ bc_id_sfs(iter_dir, iter_loc)%sf(exlhs, eylhs, ezlhs)%vel(i - momxb + 1)
+ else
+ q_prim_vf(i)%sf(x, y, z) = q_prim_vf(i)%sf(ex, ey, ez)
+ end if
+ end do
+ case (-14); ! Axis
+ if (z_cc(k) < pi) then
+ !$acc loop seq
+ do i = 1, momxb
+ q_prim_vf(i)%sf(x, y, z) = &
+ q_prim_vf(i)%sf(sx, sy, z + ((p + 1)/2))
+ end do
+
+ q_prim_vf(momxb + 1)%sf(x, y, z) = &
+ -q_prim_vf(momxb + 1)%sf(sx, sy, z + ((p + 1)/2))
+
+ q_prim_vf(momxe)%sf(x, y, z) = &
+ -q_prim_vf(momxe)%sf(sx, sy, z + ((p + 1)/2))
+
+ !$acc loop seq
+ do i = E_idx, sys_size
+ q_prim_vf(i)%sf(x, y, z) = &
+ q_prim_vf(i)%sf(sx, sy, z + ((p + 1)/2))
+ end do
+ else
+ !$acc loop seq
+ do i = 1, momxb
+ q_prim_vf(i)%sf(x, y, z) = &
+ q_prim_vf(i)%sf(sx, sy, z - ((p + 1)/2))
+ end do
+
+ q_prim_vf(momxb + 1)%sf(x, y, z) = &
+ -q_prim_vf(momxb + 1)%sf(sx, sy, z - ((p + 1)/2))
+
+ q_prim_vf(momxe)%sf(x, y, z) = &
+ -q_prim_vf(momxe)%sf(sx, sy, z - ((p + 1)/2))
+
+ !$acc loop seq
+ do i = E_idx, sys_size
+ q_prim_vf(i)%sf(x, y, z) = &
+ q_prim_vf(i)%sf(sx, sy, z - ((p + 1)/2))
+ end do
+ end if
+
+ end select
+ #:endblock
+
+#ifdef MFC_SIMULATION
+ if (qbmm .and. .not. polytropic) then
+ #:block ITERATE_OVER_BUFFER_REGION_SIDED(dir="iter_dir", loc="iter_loc", outer_loops=[("i", 1, "nb"), ("q", 1, "nnode")])
+ select case (bc_id_sfs(iter_dir, iter_loc)%sf(exlhs, eylhs, ezlhs)%type)
+ case (-13:-3); ! Ghost-cell extrap.
+ pb(x, y, z, q, i) = pb(ex, ey, ez, q, i)
+ mv(x, y, z, q, i) = mv(ex, ey, ez, q, i)
+ case (-2); ! Symmetry
+ pb(x, y, z, q, i) = pb(sx, sy, sz, q, i)
+ mv(x, y, z, q, i) = mv(sx, sy, sz, q, i)
+ case (-1); ! Periodic
+ pb(x, y, z, q, i) = pb(px, py, pz, q, i)
+ mv(x, y, z, q, i) = mv(px, py, pz, q, i)
+ case (-14); ! Axis
+ pb(x, y, z, q, i) = pb(sx, sy, z - ((p + 1)/2), q, i)
+ mv(x, y, z, q, i) = mv(sx, sy, z - ((p + 1)/2), q, i)
+ end select
+ #:endblock
+ end if
+#endif
+ end if
+
+ call s_mpi_sendrecv_variables_buffers(q_prim_vf, bc_id_sfs, &
+#ifdef MFC_SIMULATION
+ pb, mv, &
+#endif
+ iter_dir, iter_loc, &
+ bc_id_has_bc)
+
+ end do
+ end do
+
+ end subroutine s_populate_prim_buffers
+
+ !> The purpose of this procedure is to populate the buffers
+ !! of the conservative variables, depending on the selected
+ !! boundary conditions.
+ subroutine s_populate_cons_buffers(q_cons_vf &
+#ifdef MFC_SIMULATION
+ , pb, mv &
+#endif
+ )
+
+ type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf
+#ifdef MFC_SIMULATION
+ real(kind(0d0)), dimension(startx:, starty:, startz:, 1:, 1:), intent(inout) :: pb, mv
+#endif
+
+ call s_populate_prim_buffers(q_cons_vf &
+#ifdef MFC_SIMULATION
+ , pb, mv &
+#endif
+ )
+
+ end subroutine s_populate_cons_buffers
+#endif
+
+end module m_boundary_conditions_common
diff --git a/src/common/m_checker_common.fpp b/src/common/m_checker_common.fpp
index 3623fabba8..0d232ffc44 100644
--- a/src/common/m_checker_common.fpp
+++ b/src/common/m_checker_common.fpp
@@ -222,7 +222,7 @@ contains
#:endif
if (.not. skip_check) then
- @:PROHIBIT(bc_${X}$%${BOUND}$ /= dflt_int .and. (bc_${X}$%${BOUND}$ > -1 .or. bc_${X}$%${BOUND}$ < -16), &
+ @:PROHIBIT(bc_${X}$%${BOUND}$ /= dflt_int .and. (bc_${X}$%${BOUND}$ > -1 .or. bc_${X}$%${BOUND}$ < -17), &
"bc_${X}$%${BOUND}$ must be between -1 and -16")
@:PROHIBIT(bc_${X}$%${BOUND}$ /= dflt_int .and. bc_${X}$%${BOUND}$ == -14, &
@@ -320,35 +320,23 @@ contains
!> Checks constraints on the inputs for moving boundaries.
!! Called by s_check_inputs_common for all three stages
subroutine s_check_inputs_moving_bc
- #:for X, VB2, VB3 in [('x', 'vb2', 'vb3'), ('y', 'vb3', 'vb1'), ('z', 'vb1', 'vb2')]
- if (any((/bc_${X}$%vb1, bc_${X}$%vb2, bc_${X}$%vb3/) /= 0d0)) then
- if (bc_${X}$%beg == -15) then
- if (any((/bc_${X}$%${VB2}$, bc_${X}$%${VB3}$/) /= 0d0)) then
- call s_mpi_abort("bc_${X}$%beg must be -15 if "// &
- "bc_${X}$%${VB2}$ or bc_${X}$%${VB3}$ "// &
- "is set. Exiting ...")
- end if
- elseif (bc_${X}$%beg /= -16) then
- call s_mpi_abort("bc_${X}$%beg must be -15 or -16 if "// &
- "bc_${X}$%vb[1,2,3] is set. Exiting ...")
- end if
- end if
- #:endfor
- #:for X, VE2, VE3 in [('x', 've2', 've3'), ('y', 've3', 've1'), ('z', 've1', 've2')]
- if (any((/bc_${X}$%ve1, bc_${X}$%ve2, bc_${X}$%ve3/) /= 0d0)) then
- if (bc_${X}$%end == -15) then
- if (any((/bc_${X}$%${VE2}$, bc_${X}$%${VE3}$/) /= 0d0)) then
- call s_mpi_abort("bc_${X}$%end must be -15 if "// &
- "bc_${X}$%${VE2}$ or bc_${X}$%${VE3}$ "// &
- "is set. Exiting ...")
+ #:for DIR in ['x', 'y', 'z']
+ #:for LOC in ['beg', 'end']
+ if (any(bc_${DIR}$%vel_${LOC}$ /= 0d0)) then
+ if (bc_${DIR}$%${LOC}$ == -15) then
+ if (any((/bc_${DIR}$%vel_${LOC}$ (2), bc_${DIR}$%vel_${LOC}$ (3)/) /= 0d0)) then
+ call s_mpi_abort("bc_${DIR}$%${LOC}$ must be -15 if "// &
+ "bc_${DIR}$%vel_${LOC}$[2,3] is set. Exiting ...")
+ end if
+ elseif (bc_${DIR}$%${LOC}$ /= -16) then
+ call s_mpi_abort("bc_${DIR}$%${LOC}$ must be -15 or -16 if "// &
+ "bc_${DIR}$%vel_${LOC}$ is set. Exiting ...")
end if
- elseif (bc_${X}$%end /= -16) then
- call s_mpi_abort("bc_${X}$%end must be -15 or -16 if "// &
- "bc_${X}$%ve[1,2,3] is set. Exiting ...")
end if
- end if
+ #:endfor
#:endfor
+
end subroutine s_check_inputs_moving_bc
end module m_checker_common
diff --git a/src/common/m_constants.fpp b/src/common/m_constants.fpp
index 8bc120bef7..a1435b4bfa 100644
--- a/src/common/m_constants.fpp
+++ b/src/common/m_constants.fpp
@@ -20,8 +20,12 @@ module m_constants
integer, parameter :: num_fluids_max = 10 !< Maximum number of fluids in the simulation
integer, parameter :: num_probes_max = 10 !< Maximum number of flow probes in the simulation
integer, parameter :: num_patches_max = 10
+ integer, parameter :: num_bc_patches_max = 20
integer, parameter :: pathlen_max = 400
integer, parameter :: nnode = 4 !< Number of QBMM nodes
+ integer, parameter :: gp_layers = 3 !< Number of ghost point layers
+ integer, parameter :: num_bcs_max = 100 !< Maximum number of boundary condition types.
+
real(kind(0d0)), parameter :: capillary_cutoff = 1e-6 !< color function gradient magnitude at which to apply the surface tension fluxes
real(kind(0d0)), parameter :: acoustic_spatial_support_width = 2.5d0 !< Spatial support width of acoustic source, used in s_source_spatial
real(kind(0d0)), parameter :: dflt_vcfl_dt = 100d0 !< value of vcfl_dt when viscosity is off for computing adaptive timestep size
diff --git a/src/common/m_derived_types.fpp b/src/common/m_derived_types.fpp
index b94ec89b1b..b43a72824b 100644
--- a/src/common/m_derived_types.fpp
+++ b/src/common/m_derived_types.fpp
@@ -73,12 +73,8 @@ module m_derived_types
type int_bounds_info
integer :: beg
integer :: end
- real(kind(0d0)) :: vb1
- real(kind(0d0)) :: vb2
- real(kind(0d0)) :: vb3
- real(kind(0d0)) :: ve1
- real(kind(0d0)) :: ve2
- real(kind(0d0)) :: ve3
+ real(kind(0d0)) :: vel_beg(1:3)
+ real(kind(0d0)) :: vel_end(1:3)
real(kind(0d0)) :: pres_in, pres_out
real(kind(0d0)), dimension(3) :: vel_in, vel_out
real(kind(0d0)), dimension(num_fluids_max) :: alpha_rho_in, alpha_in
@@ -320,7 +316,7 @@ module m_derived_types
!> Ghost Point for Immersed Boundaries
type ghost_point
- real(kind(0d0)), dimension(3) :: loc !< Physical location of the ghost point
+ integer, dimension(3) :: loc !< Physical location of the ghost point
real(kind(0d0)), dimension(3) :: ip_loc !< Physical location of the image point
integer, dimension(3) :: ip_grid !< Top left grid point of IP
real(kind(0d0)), dimension(2, 2, 2) :: interp_coeffs !< Interpolation Coefficients of image point
@@ -348,4 +344,27 @@ module m_derived_types
integer :: gamma_method
end type chemistry_parameters
+ type bc_patch_parameters
+ ! User inputs
+ integer :: type
+ integer :: dir ! [x,y,z] => [1,2,3]
+ integer :: loc ! [beg,end] => [-1,+1]
+ integer :: geometry
+
+ real(kind(0d0)) :: centroid(1:3)
+ real(kind(0d0)) :: length(1:3)
+ real(kind(0d0)) :: radius
+
+ real(kind(0d0)) :: vel(1:3)
+ end type bc_patch_parameters
+
+ type t_bc_id
+ integer :: type
+ real(kind(0d0)) :: vel(1:3)
+ end type t_bc_id
+
+ type t_bc_id_sf
+ type(t_bc_id), dimension(:, :, :), allocatable :: sf
+ end type t_bc_id_sf
+
end module m_derived_types
diff --git a/src/common/m_global_parameters_common.fpp b/src/common/m_global_parameters_common.fpp
new file mode 100644
index 0000000000..9164935d36
--- /dev/null
+++ b/src/common/m_global_parameters_common.fpp
@@ -0,0 +1,27 @@
+module m_global_parameters_common
+
+ use m_constants
+ use m_derived_types
+
+ implicit none
+
+contains
+
+ subroutine s_bc_assign_default_values_to_user_inputs(num_bc_patches, patch_bc)
+
+ integer, intent(inout) :: num_bc_patches
+ type(bc_patch_parameters), intent(inout) :: patch_bc(num_bc_patches_max)
+
+ integer :: i
+
+ num_bc_patches = 0
+ do i = 1, num_bc_patches_max
+ patch_bc(i)%type = dflt_int
+ patch_bc(i)%geometry = dflt_int
+ patch_bc(i)%dir = dflt_int
+ patch_bc(i)%loc = dflt_int
+ end do
+
+ end subroutine s_bc_assign_default_values_to_user_inputs
+
+end module m_global_parameters_common
diff --git a/src/common/m_helper.fpp b/src/common/m_helper.fpp
index dce2711209..83023ec4cb 100644
--- a/src/common/m_helper.fpp
+++ b/src/common/m_helper.fpp
@@ -11,8 +11,6 @@ module m_helper
use m_global_parameters !< Definitions of the global parameters
- use m_mpi_common !< MPI modules
-
use ieee_arithmetic !< For checking NaN
! ==========================================================================
@@ -36,7 +34,7 @@ module m_helper
s_print_2D_array, &
f_xor, &
f_logical_to_int, &
- s_prohibit_abort
+ f_clamp
contains
@@ -69,9 +67,6 @@ contains
nR3 = dot_product(weights, nRtmp**3.d0)
ntmp = DSQRT((4.d0*pi/3.d0)*nR3/vftmp)
- !ntmp = (3.d0/(4.d0*pi))*0.00001
-
- !print *, "nbub", ntmp
end subroutine s_comp_n_from_cons
@@ -466,20 +461,15 @@ contains
end if
end function f_logical_to_int
- subroutine s_prohibit_abort(condition, message)
- character(len=*), intent(in) :: condition, message
+ function f_clamp(x, xmin, xmax) result(y)
- print *, ""
- print *, "===================================================================================================="
- print *, " CASE FILE ERROR "
- print *, "----------------------------------------------------------------------------------------------------"
- print *, "Prohibited condition: ", trim(condition)
- if (len_trim(message) > 0) then
- print *, "Note: ", trim(message)
- end if
- print *, "===================================================================================================="
- print *, ""
- call s_mpi_abort
- end subroutine s_prohibit_abort
+ !$acc routine seq
+
+ integer, intent(in) :: x, xmin, xmax
+ integer :: y
+
+ y = max(xmin, min(x, xmax))
+
+ end function f_clamp
end module m_helper
diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp
index 403ac4f133..fc523922ae 100644
--- a/src/common/m_mpi_common.fpp
+++ b/src/common/m_mpi_common.fpp
@@ -13,9 +13,14 @@ module m_mpi_common
use mpi !< Message passing interface (MPI) module
#endif
+ use m_nvtx
+
+ use m_helper
+
use m_derived_types !< Definitions of the derived types
use m_global_parameters !< Definitions of the global parameters
+
! ==========================================================================
implicit none
@@ -25,6 +30,27 @@ module m_mpi_common
integer, private :: ierr
!> @}
+#ifndef MFC_PRE_PROCESS
+ real(kind(0d0)), private, allocatable, dimension(:), target :: q_cons_buff_send !<
+ !! This variable is utilized to pack and send the buffer of the cell-average
+ !! conservative variables, for a single computational domain boundary at the
+ !! time, to the relevant neighboring processor.
+
+ real(kind(0d0)), private, allocatable, dimension(:), target :: q_cons_buff_recv !<
+ !! q_cons_buff_recv is utilized to receive and unpack the buffer of the cell-
+ !! average conservative variables, for a single computational domain boundary
+ !! at the time, from the relevant neighboring processor.
+
+ !$acc declare create(q_cons_buff_send, q_cons_buff_recv)
+
+ integer :: v_size
+
+ !$acc declare create(v_size)
+#endif
+
+ integer, dimension(3, -1:1) :: neighbor_procs
+ !$acc declare create(neighbor_procs)
+
contains
!> The subroutine initializes the MPI execution environment
@@ -60,6 +86,72 @@ contains
end subroutine s_mpi_initialize
+ subroutine s_initialize_mpi_common_module()
+
+#if defined(MFC_MPI) && !defined(MFC_PRE_PROCESS)
+
+ ! Allocating q_cons_buff_send/recv and ib_buff_send/recv. Please note that
+ ! for the sake of simplicity, both variables are provided sufficient
+ ! storage to hold the largest buffer in the computational domain.
+
+#ifdef MFC_SIMULATION
+ if (qbmm .and. .not. polytropic) then
+ if (n > 0) then
+ if (p > 0) then
+ @:ALLOCATE(q_cons_buff_send(0:-1 + buff_size*(sys_size + 2*nb*4)* &
+ & (m + 2*buff_size + 1)* &
+ & (n + 2*buff_size + 1)* &
+ & (p + 2*buff_size + 1)/ &
+ & (min(m, n, p) + 2*buff_size + 1)))
+ else
+ @:ALLOCATE(q_cons_buff_send(0:-1 + buff_size*(sys_size + 2*nb*4)* &
+ & (max(m, n) + 2*buff_size + 1)))
+ end if
+ else
+ @:ALLOCATE(q_cons_buff_send(0:-1 + buff_size*(sys_size + 2*nb*4)))
+ end if
+
+ @:ALLOCATE(q_cons_buff_recv(0:ubound(q_cons_buff_send, 1)))
+
+ v_size = sys_size + 2*nb*4
+ else
+#endif
+ if (n > 0) then
+ if (p > 0) then
+ @:ALLOCATE(q_cons_buff_send(0:-1 + buff_size*sys_size* &
+ & (m + 2*buff_size + 1)* &
+ & (n + 2*buff_size + 1)* &
+ & (p + 2*buff_size + 1)/ &
+ & (min(m, n, p) + 2*buff_size + 1)))
+ else
+ @:ALLOCATE(q_cons_buff_send(0:-1 + buff_size*sys_size* &
+ & (max(m, n) + 2*buff_size + 1)))
+ end if
+ else
+ @:ALLOCATE(q_cons_buff_send(0:-1 + buff_size*sys_size))
+ end if
+
+ @:ALLOCATE(q_cons_buff_recv(0:ubound(q_cons_buff_send, 1)))
+
+ v_size = sys_size
+#if MFC_SIMULATION
+ end if
+#endif
+
+!$acc update device(v_size)
+
+#endif
+
+ end subroutine s_initialize_mpi_common_module
+
+ subroutine s_finalize_mpi_common_module()
+
+#if defined(MFC_MPI) && !defined(MFC_PRE_PROCESS)
+ @:DEALLOCATE(q_cons_buff_send, q_cons_buff_recv)
+#endif
+
+ end subroutine s_finalize_mpi_common_module
+
!! @param q_cons_vf Conservative variables
!! @param ib_markers track if a cell is within the immersed boundary
!! @param levelset closest distance from every cell to the IB
@@ -475,4 +567,699 @@ contains
end subroutine s_mpi_finalize
+#ifndef MFC_PRE_PROCESS
+ !> The goal of this procedure is to populate the buffers of
+ !! the cell-average conservative variables by communicating
+ !! with the neighboring processors.
+ !! @param q_cons_vf Cell-average conservative variables
+ !! @param mpi_dir MPI communication coordinate direction
+ !! @param pbc_loc Processor boundary condition (PBC) location
+ subroutine s_mpi_sendrecv_variables_buffers(q_cons_vf, &
+ bc_id_sfs, &
+#ifdef MFC_SIMULATION
+ pb, mv, &
+#endif
+ mpi_dir, &
+ pbc_loc, &
+ bc_id_has_bc)
+
+ type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf
+ type(t_bc_id_sf), dimension(1:3, -1:1), intent(in) :: bc_id_sfs
+
+#ifdef MFC_SIMULATION
+ real(kind(0d0)), dimension(startx:, starty:, startz:, 1:, 1:), intent(inout) :: pb, mv
+#endif
+ integer, intent(in) :: mpi_dir, pbc_loc
+
+ logical, dimension(1:3, -1:1, -num_bcs_max:0), intent(in) :: bc_id_has_bc
+
+ @:BOUNDARY_CONDITION_INTEGER_DECLARATIONS()
+
+ type(bc_patch_parameters) :: bc
+
+ integer :: buffer_counts(1:3)
+
+ real(kind(0d0)), pointer :: p_send, p_recv
+
+ integer :: r
+
+#ifdef MFC_MPI
+
+!$acc update device(v_size)
+
+#ifdef MFC_SIMULATION
+ if (qbmm .and. .not. polytropic) then
+ buffer_counts = (/ &
+ buff_size*(sys_size + 2*nb*4)*(n + 1)*(p + 1), &
+ buff_size*(sys_size + 2*nb*4)*(m + 2*buff_size + 1)*(p + 1), &
+ buff_size*v_size*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) &
+ /)
+ else
+#endif
+ buffer_counts = (/ &
+ buff_size*sys_size*(n + 1)*(p + 1), &
+ buff_size*sys_size*(m + 2*buff_size + 1)*(p + 1), &
+ buff_size*v_size*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) &
+ /)
+#ifdef MFC_SIMULATION
+ end if
+#endif
+
+ if (bc_id_has_bc(mpi_dir, -pbc_loc, 0)) then
+ call nvtxStartRange("RHS-COMM-PACKBUF")
+
+ #:block ITERATE_OVER_BUFFER_REGION_SIDED(dir="mpi_dir", loc="-pbc_loc", inner_loops=[("i", 1, "sys_size")])
+ q_cons_buff_send(pack_idr) = q_cons_vf(i)%sf(sx, sy, sz)
+ #:endblock
+
+#ifdef MFC_SIMULATION
+ if (qbmm .and. .not. polytropic) then
+ #:block ITERATE_OVER_BUFFER_REGION_SIDED(dir="mpi_dir", loc="-pbc_loc", inner_loops=[("i", "sys_size + 1", "sys_size + 4"), ("q", 1, "nb")])
+ q_cons_buff_send(pack_idr + (q - 1)*4) = pb(sx, sy, sz, i - sys_size, q)
+ q_cons_buff_send(pack_idr + (q - 1)*4 + nb*4) = mv(sx, sy, sz, i - sys_size, q)
+ #:endblock
+ end if
+#endif
+
+ call nvtxEndRange ! Packbuf
+ end if
+
+ p_send => q_cons_buff_send(0)
+ p_recv => q_cons_buff_recv(0)
+
+ #:for rdma_mpi in [False, True]
+ if (rdma_mpi .eqv. ${'.true.' if rdma_mpi else '.false.'}$) then
+ #:if rdma_mpi
+ !$acc data attach(p_send, p_recv)
+ !$acc host_data use_device(p_send, p_recv)
+
+ call nvtxStartRange("RHS-COMM-SENDRECV-RDMA")
+ #:else
+ call nvtxStartRange("RHS-COMM-DEV2HOST")
+ !$acc update host(q_cons_buff_send)
+ call nvtxEndRange
+
+ call nvtxStartRange("RHS-COMM-SENDRECV-NO-RMDA")
+ #:endif
+
+ call MPI_SENDRECV( &
+ p_send, buffer_counts(mpi_dir), MPI_DOUBLE_PRECISION, neighbor_procs(mpi_dir, -pbc_loc), 0, &
+ p_recv, buffer_counts(mpi_dir), MPI_DOUBLE_PRECISION, neighbor_procs(mpi_dir, +pbc_loc), 0, &
+ MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+
+ call nvtxEndRange ! RHS-MPI-SENDRECV-(NO)-RDMA
+
+ #:if rdma_mpi
+ !$acc end host_data
+ !$acc end data
+ !$acc wait
+ #:else
+ call nvtxStartRange("RHS-COMM-HOST2DEV")
+ !$acc update device(q_cons_buff_recv)
+ call nvtxEndRange
+ #:endif
+ end if
+ #:endfor
+
+ if (bc_id_has_bc(mpi_dir, pbc_loc, 0)) then
+ call nvtxStartRange("RHS-COMM-UNPACKBUF")
+
+ #:block ITERATE_OVER_BUFFER_REGION_SIDED(dir="mpi_dir", loc="pbc_loc", inner_loops=[("i", 1, "sys_size")])
+ if (bc_id_sfs(mpi_dir, pbc_loc)%sf(exlhs, eylhs, ezlhs)%type >= 0) then
+ q_cons_vf(i)%sf(x, y, z) = q_cons_buff_recv(pack_idr)
+ end if
+ #:endblock
+
+#ifdef MFC_SIMULATION
+ if (qbmm .and. .not. polytropic) then
+ #:block ITERATE_OVER_BUFFER_REGION_SIDED(dir="mpi_dir", loc="pbc_loc", inner_loops=[("i", "sys_size + 1", "sys_size + 4"), ("q", 1, "nb")])
+ if (bc_id_sfs(mpi_dir, pbc_loc)%sf(exlhs, eylhs, ezlhs)%type >= 0) then
+ pb(x, y, z, i - sys_size, q) = q_cons_buff_recv(pack_idr + (q - 1)*4)
+ mv(x, y, z, i - sys_size, q) = q_cons_buff_recv(pack_idr + (q - 1)*4 + nb*4)
+ end if
+ #:endblock
+ end if
+#endif
+
+ call nvtxEndRange
+ end if
+#endif
+
+ end subroutine s_mpi_sendrecv_variables_buffers
+
+ !> The goal of this procedure is to populate the buffers of
+ !! the grid variables by communicating with the neighboring
+ !! processors. Note that only the buffers of the cell-width
+ !! distributions are handled in such a way. This is because
+ !! the buffers of cell-boundary locations may be calculated
+ !! directly from those of the cell-width distributions.
+ subroutine s_mpi_sendrecv_grid_spacing_buffers(bc_id_sfs)
+
+ type(t_bc_id_sf), dimension(1:3, -1:1), intent(in) :: bc_id_sfs
+
+ @:BOUNDARY_CONDITION_INTEGER_DECLARATIONS()
+
+ type(t_bc_id) :: bc
+
+ integer :: iter_loc
+
+ integer :: send_offset, recv_offset
+ integer :: extra_cell_count
+
+ #:for cmp, dir_id, extent in zip(['x', 'y', 'z'], [1, 2, 3], ['m', 'n', 'p'])
+
+ if (${dir_id}$ > num_dims) then
+ return
+ end if
+
+ do iter_loc = -1, 1, 2
+
+#ifdef MFC_MPI
+ if (iter_loc == -1) then
+ send_offset = ${extent}$-buff_size + 1
+ recv_offset = -buff_size
+ else
+ send_offset = 0
+ recv_offset = ${extent}$+1
+ end if
+
+ call MPI_SENDRECV( &
+ d${cmp}$ (send_offset), buff_size, &
+ MPI_DOUBLE_PRECISION, neighbor_procs(${dir_id}$, -iter_loc), 0, &
+ d${cmp}$ (recv_offset), buff_size, &
+ MPI_DOUBLE_PRECISION, neighbor_procs(${dir_id}$, +iter_loc), 0, &
+ MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+#endif
+
+ ! Note: This does a little TOO much work (iterating over too many cells)
+ ! but this is because dx, dy, dz SHOULD be two dimensional arrays,
+ ! not one-dimensional arrays. Indeed, the boundary conditions
+ ! can vary along two axes, not just one if the case is 3D.
+ #:block ITERATE_OVER_BUFFER_REGION_SIDED(dir=dir_id, loc="iter_loc", gpu=False)
+ bc = bc_id_sfs(${dir_id}$, iter_loc)%sf(exlhs, eylhs, ezlhs)
+
+ if (bc%type <= -3) then
+ d${cmp}$ (${cmp}$) = d${cmp}$ (e${cmp}$)
+ elseif (bc%type == -2) then
+ d${cmp}$ (${cmp}$) = d${cmp}$ (s${cmp}$)
+ elseif (bc%type == -1) then
+ d${cmp}$ (${cmp}$) = d${cmp}$ (p${cmp}$)
+ end if
+ #:endblock
+
+ end do
+
+#ifdef MFC_POST_PROCESS
+ extra_cell_count = offset_${cmp}$%beg
+#else
+ extra_cell_count = buff_size
+#endif
+
+ do i = 1, extra_cell_count
+ ! Computing the cell-boundary locations buffer, at the beginning of
+ ! the coordinate direction, from the cell-width distribution buffer.
+ ${cmp}$_cb(-1 - i) = ${cmp}$_cb(-i) - d${cmp}$ (-i)
+ end do
+
+#ifdef MFC_POST_PROCESS
+ extra_cell_count = offset_${cmp}$%end
+#else
+ extra_cell_count = buff_size
+#endif
+
+ do i = 1, extra_cell_count
+ ! Populating the cell-boundary locations buffer, at the end of the
+ ! coordinate direction, from buffer of the cell-width distribution.
+ ${cmp}$_cb(${extent}$+i) = ${cmp}$_cb(${extent}$+(i - 1)) + d${cmp}$ (${extent}$+i)
+ end do
+
+ do i = 1, buff_size
+ ! Computing the cell-center locations buffer, at the beginning of
+ ! the coordinate direction, from the cell-width distribution buffer.
+ ${cmp}$_cc(-i) = ${cmp}$_cc(1 - i) - (d${cmp}$ (1 - i) + d${cmp}$ (-i))/2d0
+
+ ! Populating the cell-center locations buffer, at the end of the
+ ! coordinate direction, from buffer of the cell-width distribution.
+ ${cmp}$_cc(${extent}$+i) = ${cmp}$_cc(${extent}$+(i - 1)) + (d${cmp}$ (${extent}$+(i - 1)) + d${cmp}$ (${extent}$+i))/2d0
+ end do
+ #:endfor
+
+ end subroutine s_mpi_sendrecv_grid_spacing_buffers
+#endif
+
+ !> The purpose of this procedure is to optimally decompose
+ !! the computational domain among the available processors.
+ !! This is performed by attempting to award each processor,
+ !! in each of the coordinate directions, approximately the
+ !! same number of cells, and then recomputing the affected
+ !! global parameters.
+ subroutine s_mpi_decompose_computational_domain
+
+#ifdef MFC_MPI
+
+ real(kind(0d0)) :: tmp_num_procs_x, tmp_num_procs_y, tmp_num_procs_z !<
+ !! Non-optimal number of processors in the x-, y- and z-directions
+
+ real(kind(0d0)) :: fct_min !<
+ !! Processor factorization (fct) minimization parameter
+
+ integer :: MPI_COMM_CART !<
+ !! Cartesian processor topology communicator
+
+ integer, dimension(1:3) :: rem_cells !<
+ !! Remaining number of cells, in a particular coordinate direction,
+ !! after the majority is divided up among the available processors
+
+ integer :: i, j !< Generic loop iterators
+
+ integer :: iter_dir
+
+#endif
+
+ proc_coords(:) = 0
+ proc_nums(:) = 1
+
+#ifdef MFC_MPI
+
+ if (num_procs == 1 .and. parallel_io) then
+ do i = 1, num_dims
+ start_idx(i) = 0
+ end do
+ return
+ end if
+
+ ! 3D Cartesian Processor Topology ==================================
+ if (n > 0) then
+
+ if (p > 0) then
+
+ if (cyl_coord .and. p > 0) then
+ ! Implement pencil processor blocking if using cylindrical coordinates so
+ ! that all cells in azimuthal direction are stored on a single processor.
+ ! This is necessary for efficient application of Fourier filter near axis.
+
+ ! Initial values of the processor factorization optimization
+ proc_nums(1) = 1
+ proc_nums(2) = num_procs
+ proc_nums(3) = 1
+ ierr = -1
+
+ ! Computing minimization variable for these initial values
+ tmp_num_procs_x = proc_nums(1)
+ tmp_num_procs_y = proc_nums(2)
+ tmp_num_procs_z = proc_nums(3)
+ fct_min = 10d0*abs((m + 1)/tmp_num_procs_x &
+ - (n + 1)/tmp_num_procs_y)
+
+ ! Searching for optimal computational domain distribution
+ do i = 1, num_procs
+
+ if (mod(num_procs, i) == 0 &
+ .and. &
+ (m + 1)/i >= num_stcls_min*weno_order) then
+
+ tmp_num_procs_x = i
+ tmp_num_procs_y = num_procs/i
+
+ if (fct_min >= abs((m + 1)/tmp_num_procs_x &
+ - (n + 1)/tmp_num_procs_y) &
+ .and. &
+ (n + 1)/tmp_num_procs_y &
+ >= &
+ num_stcls_min*weno_order) then
+
+ proc_nums(1) = i
+ proc_nums(2) = num_procs/i
+ fct_min = abs((m + 1)/tmp_num_procs_x &
+ - (n + 1)/tmp_num_procs_y)
+ ierr = 0
+
+ end if
+
+ end if
+
+ end do
+
+ else
+
+ ! Initial estimate of optimal processor topology
+ proc_nums(1) = 1
+ proc_nums(2) = 1
+ proc_nums(3) = num_procs
+ ierr = -1
+
+ ! Benchmarking the quality of this initial guess
+ tmp_num_procs_x = proc_nums(1)
+ tmp_num_procs_y = proc_nums(2)
+ tmp_num_procs_z = proc_nums(3)
+ fct_min = 10d0*abs((m + 1)/tmp_num_procs_x &
+ - (n + 1)/tmp_num_procs_y) &
+ + 10d0*abs((n + 1)/tmp_num_procs_y &
+ - (p + 1)/tmp_num_procs_z)
+
+ ! Optimization of the initial processor topology
+ do i = 1, num_procs
+
+ if (mod(num_procs, i) == 0 &
+ .and. &
+ (m + 1)/i >= num_stcls_min*weno_order) then
+
+ do j = 1, num_procs/i
+
+ if (mod(num_procs/i, j) == 0 &
+ .and. &
+ (n + 1)/j >= num_stcls_min*weno_order) then
+
+ tmp_num_procs_x = i
+ tmp_num_procs_y = j
+ tmp_num_procs_z = num_procs/(i*j)
+
+ if (fct_min >= abs((m + 1)/tmp_num_procs_x &
+ - (n + 1)/tmp_num_procs_y) &
+ + abs((n + 1)/tmp_num_procs_y &
+ - (p + 1)/tmp_num_procs_z) &
+ .and. &
+ (p + 1)/tmp_num_procs_z &
+ >= &
+ num_stcls_min*weno_order) &
+ then
+
+ proc_nums(1) = i
+ proc_nums(2) = j
+ proc_nums(3) = num_procs/(i*j)
+ fct_min = abs((m + 1)/tmp_num_procs_x &
+ - (n + 1)/tmp_num_procs_y) &
+ + abs((n + 1)/tmp_num_procs_y &
+ - (p + 1)/tmp_num_procs_z)
+ ierr = 0
+
+ end if
+
+ end if
+
+ end do
+
+ end if
+
+ end do
+
+ end if
+
+ ! Verifying that a valid decomposition of the computational
+ ! domain has been established. If not, the simulation exits.
+ if (proc_rank == 0 .and. ierr == -1) then
+ call s_mpi_abort('Unsupported combination of values '// &
+ 'of num_procs, m, n, p and '// &
+ 'weno_order. Exiting ...')
+ end if
+
+ ! Creating new communicator using the Cartesian topology
+ call MPI_CART_CREATE(MPI_COMM_WORLD, 3, (/proc_nums(1), &
+ proc_nums(2), proc_nums(3)/), &
+ (/.true., .true., .true./), &
+ .false., MPI_COMM_CART, ierr)
+
+ ! Finding the Cartesian coordinates of the local process
+ call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 3, &
+ proc_coords, ierr)
+ ! END: 3D Cartesian Processor Topology =============================
+
+ ! Global Parameters for z-direction ================================
+
+ ! Number of remaining cells
+ rem_cells(3) = mod(p + 1, proc_nums(3))
+
+#ifdef MFC_PRE_PROCESS
+ ! Preliminary uniform cell-width spacing
+ if (old_grid .neqv. .true.) then
+ dz = (z_domain%end - z_domain%beg)/real(p + 1, kind(0d0))
+ end if
+#endif
+
+ ! Optimal number of cells per processor
+ p = (p + 1)/proc_nums(3) - 1
+
+ ! Distributing the remaining cells
+ do i = 1, rem_cells(3)
+ if (proc_coords(3) == i - 1) then
+ p = p + 1; exit
+ end if
+ end do
+
+ ! ==================================================================
+
+ ! 2D Cartesian Processor Topology ==================================
+ else
+
+ ! Initial estimate of optimal processor topology
+ proc_nums(1) = 1
+ proc_nums(2) = num_procs
+ ierr = -1
+
+ ! Benchmarking the quality of this initial guess
+ tmp_num_procs_x = proc_nums(1)
+ tmp_num_procs_y = proc_nums(2)
+ fct_min = 10d0*abs((m + 1)/tmp_num_procs_x &
+ - (n + 1)/tmp_num_procs_y)
+
+ ! Optimization of the initial processor topology
+ do i = 1, num_procs
+
+ if (mod(num_procs, i) == 0 &
+ .and. &
+ (m + 1)/i >= num_stcls_min*weno_order) then
+
+ tmp_num_procs_x = i
+ tmp_num_procs_y = num_procs/i
+
+ if (fct_min >= abs((m + 1)/tmp_num_procs_x &
+ - (n + 1)/tmp_num_procs_y) &
+ .and. &
+ (n + 1)/tmp_num_procs_y &
+ >= &
+ num_stcls_min*weno_order) then
+
+ proc_nums(1) = i
+ proc_nums(2) = num_procs/i
+ fct_min = abs((m + 1)/tmp_num_procs_x &
+ - (n + 1)/tmp_num_procs_y)
+ ierr = 0
+
+ end if
+
+ end if
+
+ end do
+
+ ! Verifying that a valid decomposition of the computational
+ ! domain has been established. If not, the simulation exits.
+ if (proc_rank == 0 .and. ierr == -1) then
+ call s_mpi_abort('Unsupported combination of values '// &
+ 'of num_procs, m, n and '// &
+ 'weno_order. Exiting ...')
+ end if
+
+ ! Creating new communicator using the Cartesian topology
+ call MPI_CART_CREATE(MPI_COMM_WORLD, 2, (/proc_nums(1), &
+ proc_nums(2)/), (/.true., &
+ .true./), .false., MPI_COMM_CART, &
+ ierr)
+
+ ! Finding the Cartesian coordinates of the local process
+ call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 2, &
+ proc_coords, ierr)
+
+ end if
+ ! END: 2D Cartesian Processor Topology =============================
+
+ ! Global Parameters for y-direction ================================
+
+ ! Number of remaining cells
+ rem_cells(2) = mod(n + 1, proc_nums(2))
+
+#ifdef MFC_PRE_PROCESS
+ ! Preliminary uniform cell-width spacing
+ if (old_grid .neqv. .true.) then
+ dy = (y_domain%end - y_domain%beg)/real(n + 1, kind(0d0))
+ end if
+#endif
+
+ ! Optimal number of cells per processor
+ n = (n + 1)/proc_nums(2) - 1
+
+ ! Distributing the remaining cells
+ do i = 1, rem_cells(2)
+ if (proc_coords(2) == i - 1) then
+ n = n + 1; exit
+ end if
+ end do
+
+ ! ==================================================================
+
+ ! 1D Cartesian Processor Topology ==================================
+ else
+
+ ! Optimal processor topology
+ proc_nums(1) = num_procs
+
+#ifdef MFC_POST_PROCESS
+ ! Number of cells in undecomposed computational domain needed
+ ! for sub-domain reassembly during formatted data output
+ m_root = m
+#endif
+
+ ! Creating new communicator using the Cartesian topology
+ call MPI_CART_CREATE(MPI_COMM_WORLD, 1, (/proc_nums(1)/), &
+ (/.true./), .false., MPI_COMM_CART, &
+ ierr)
+
+ ! Finding the Cartesian coordinates of the local process
+ call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 1, &
+ proc_coords, ierr)
+
+ end if
+ ! ==================================================================
+
+ ! Global Parameters for x-direction ================================
+
+ ! Number of remaining cells
+ rem_cells(1) = mod(m + 1, proc_nums(1))
+
+#ifdef MFC_PRE_PROCESS
+ ! Preliminary uniform cell-width spacing
+ if (old_grid .neqv. .true.) then
+ dx = (x_domain%end - x_domain%beg)/real(m + 1, kind(0d0))
+ end if
+#endif
+
+ ! Optimal number of cells per processor
+ m = (m + 1)/proc_nums(1) - 1
+
+ ! Distributing the remaining cells
+ do i = 1, rem_cells(1)
+ if (proc_coords(1) == i - 1) then
+ m = m + 1; exit
+ end if
+ end do
+ ! ==================================================================
+
+ #:for cmp, dir, extent in zip(['z', 'y', 'x'], [3, 2, 1], ['p', 'n', 'm'])
+
+ if (${dir}$ <= num_dims) then
+
+#ifdef MFC_PRE_PROCESS
+ ! Beginning and end sub-domain boundary locations
+ if (parallel_io .neqv. .true.) then
+ if (old_grid .neqv. .true.) then
+ if (proc_coords(${dir}$) < rem_cells(${dir}$)) then
+ ${cmp}$_domain%beg = ${cmp}$_domain%beg + d${cmp}$*real((${extent}$+1)* &
+ proc_coords(${dir}$))
+ ${cmp}$_domain%end = ${cmp}$_domain%end - d${cmp}$*real((${extent}$+1)* &
+ (proc_nums(${dir}$) - proc_coords(${dir}$) - 1) &
+ - (proc_nums(${dir}$) - rem_cells(${dir}$)))
+ else
+ ${cmp}$_domain%beg = ${cmp}$_domain%beg + d${cmp}$*real((${extent}$+1)* &
+ proc_coords(${dir}$) + rem_cells(${dir}$))
+ ${cmp}$_domain%end = ${cmp}$_domain%end - d${cmp}$*real((${extent}$+1)* &
+ (proc_nums(${dir}$) - proc_coords(${dir}$) - 1))
+ end if
+ end if
+ else
+ if (proc_coords(${dir}$) < rem_cells(${dir}$)) then
+ start_idx(${dir}$) = (${extent}$+1)*proc_coords(${dir}$)
+ else
+ start_idx(${dir}$) = (${extent}$+1)*proc_coords(${dir}$) + rem_cells(${dir}$)
+ end if
+ end if
+#endif
+
+#ifdef MFC_POST_PROCESS
+ ! Ghost zone at the beginning
+ if (proc_coords(${dir}$) > 0 .and. format == 1) then
+ offset_${cmp}$%beg = 2
+ else
+ offset_${cmp}$%beg = 0
+ end if
+
+ ! Ghost zone at the end
+ if (proc_coords(${dir}$) < proc_nums(${dir}$) - 1 .and. format == 1) then
+ offset_${cmp}$%end = 2
+ else
+ offset_${cmp}$%end = 0
+ end if
+#endif
+
+#ifndef MFC_PRE_PROCESS
+ if (parallel_io) then
+ if (proc_coords(${dir}$) < rem_cells(${dir}$)) then
+ start_idx(${dir}$) = (${extent}$+1)*proc_coords(${dir}$)
+ else
+ start_idx(${dir}$) = (${extent}$+1)*proc_coords(${dir}$) + rem_cells(${dir}$)
+ end if
+ end if
+#endif
+
+ end if
+
+ #:endfor
+
+ do iter_dir = 1, num_dims
+ proc_coords(iter_dir) = proc_coords(iter_dir) - 1
+ call MPI_CART_RANK(MPI_COMM_CART, proc_coords, neighbor_procs(iter_dir, -1), ierr)
+ proc_coords(iter_dir) = proc_coords(iter_dir) + 1
+ proc_coords(iter_dir) = proc_coords(iter_dir) + 1
+ call MPI_CART_RANK(MPI_COMM_CART, proc_coords, neighbor_procs(iter_dir, +1), ierr)
+ proc_coords(iter_dir) = proc_coords(iter_dir) - 1
+ end do
+
+ if (proc_nums(1) > 1) then
+ if (bc_x%beg == -1 .or. proc_coords(1) > 0) then
+ bc_x%beg = neighbor_procs(1, -1)
+ end if
+
+ if (bc_x%end == +1 .or. proc_coords(1) < proc_nums(1) - 1) then
+ bc_x%end = neighbor_procs(1, +1)
+ end if
+ end if
+
+ if (num_dims >= 2 .and. proc_nums(2) > 1) then
+ if (bc_y%beg == -1 .or. proc_coords(2) > 0) then
+ bc_y%beg = neighbor_procs(2, -1)
+ end if
+
+ if (bc_y%end == +1 .or. proc_coords(2) < proc_nums(2) - 1) then
+ bc_y%end = neighbor_procs(2, +1)
+ end if
+ end if
+
+ if (num_dims >= 3 .and. proc_nums(3) > 1) then
+ if (bc_z%beg == -1 .or. proc_coords(3) > 0) then
+ bc_z%beg = neighbor_procs(3, -1)
+ end if
+
+ if (bc_z%end == +1 .or. proc_coords(3) < proc_nums(3) - 1) then
+ bc_z%end = neighbor_procs(3, +1)
+ end if
+ end if
+
+#endif
+
+ end subroutine s_mpi_decompose_computational_domain
+
+ subroutine s_prohibit_abort(condition, message)
+ character(len=*), intent(in) :: condition, message
+
+ print *, ""
+ print *, "===================================================================================================="
+ print *, " CASE FILE ERROR "
+ print *, "----------------------------------------------------------------------------------------------------"
+ print *, "Prohibited condition: ", trim(condition)
+ if (len_trim(message) > 0) then
+ print *, "Note: ", trim(message)
+ end if
+ print *, "===================================================================================================="
+ print *, ""
+ call s_mpi_abort
+ end subroutine s_prohibit_abort
+
end module m_mpi_common
diff --git a/src/simulation/m_nvtx.f90 b/src/common/m_nvtx.f90
similarity index 100%
rename from src/simulation/m_nvtx.f90
rename to src/common/m_nvtx.f90
diff --git a/src/common/m_variables_conversion.fpp b/src/common/m_variables_conversion.fpp
index ffc6e02ba6..f6fd7e5542 100644
--- a/src/common/m_variables_conversion.fpp
+++ b/src/common/m_variables_conversion.fpp
@@ -23,7 +23,7 @@ module m_variables_conversion
use m_helper
use m_thermochem, only: &
- num_species, get_temperature, get_pressure, &
+ num_species, get_temperature, get_pressure, gas_constant, &
get_mixture_molecular_weight, get_mixture_energy_mass
! ==========================================================================
@@ -1114,7 +1114,7 @@ contains
end do
call get_mixture_molecular_weight(Ys, mix_mol_weight)
- T = q_prim_vf(T_idx)%sf(j, k, l)
+ T = q_prim_vf(E_idx)%sf(j, k, l)*mix_mol_weight/(gas_constant*rho)
call get_mixture_energy_mass(T, Ys, e_mix)
q_cons_vf(E_idx)%sf(j, k, l) = &
diff --git a/src/post_process/m_data_input.f90 b/src/post_process/m_data_input.fpp
similarity index 53%
rename from src/post_process/m_data_input.f90
rename to src/post_process/m_data_input.fpp
index 567816f73b..9421865870 100644
--- a/src/post_process/m_data_input.f90
+++ b/src/post_process/m_data_input.fpp
@@ -2,6 +2,8 @@
!! @file m_data_input.f90
!> @brief Contains module m_data_input
+#:include 'macros.fpp'
+
!> @brief This module features procedures, which for a specific time-step,
!! read in the raw simulation data for the grid and the conservative
!! variables and fill out their buffer regions.
@@ -21,6 +23,8 @@ module m_data_input
use m_compile_specific
use m_helper
+
+ use m_boundary_conditions_common
! ==========================================================================
implicit none
@@ -29,8 +33,6 @@ module m_data_input
s_read_data_files, &
s_read_serial_data_files, &
s_read_parallel_data_files, &
- s_populate_grid_variables_buffer_regions, &
- s_populate_conservative_variables_buffer_regions, &
s_finalize_data_input_module
abstract interface ! ===================================================
@@ -89,6 +91,8 @@ subroutine s_read_serial_data_files(t_step)
logical :: file_check !<
!! Generic logical used to test the existence of a particular file
+ integer :: iter_dir, iter_loc
+
integer :: i !< Generic loop iterator
! Setting location of time-step folder based on current time-step
@@ -226,6 +230,10 @@ subroutine s_read_serial_data_files(t_step)
end do
+ if (t_step == 0) then
+ call s_read_boundary_condition_files(t_step_dir)
+ end if
+
if (ib) then
write (file_loc_ib, '(A,I0,A)') &
trim(t_step_ib_dir)//'/ib.dat'
@@ -235,7 +243,8 @@ subroutine s_read_serial_data_files(t_step)
FORM='unformatted', &
ACTION='read', &
STATUS='old')
- call s_mpi_abort(trim(file_loc)//' is missing. Exiting ...')
+ else
+ call s_mpi_abort(trim(file_loc_ib)//' is missing. Exiting ...')
end if
end if
@@ -340,6 +349,10 @@ subroutine s_read_parallel_data_files(t_step)
end if
end if
+ if (t_step == 0) then
+ call s_read_boundary_condition_files(trim(trim(case_dir)//'/restart_data'))
+ end if
+
if (file_per_process) then
call s_int_to_str(t_step, t_step_string)
! Open the file to read conservative variables
@@ -506,635 +519,6 @@ subroutine s_read_parallel_data_files(t_step)
end subroutine s_read_parallel_data_files
- !> The following subroutine populates the buffer regions of
- !! the cell-width spacings, the cell-boundary locations and
- !! the cell-center locations. Note that the buffer regions
- !! of the last two variables should be interpreted slightly
- !! differently than usual. They are really ghost zones that
- !! are used in aiding the multidimensional visualization of
- !! Silo database files, in VisIt, when processor boundary
- !! conditions are present.
- subroutine s_populate_grid_variables_buffer_regions
-
- integer :: i !< Generic loop iterator
-
- ! Populating Buffer Regions in the x-direction =====================
-
- ! Ghost-cell extrapolation BC at the beginning
- if (bc_x%beg <= -3) then
-
- do i = 1, buff_size
- dx(-i) = dx(0)
- end do
-
- ! Symmetry BC at the beginning
- elseif (bc_x%beg == -2) then
-
- do i = 1, buff_size
- dx(-i) = dx(i - 1)
- end do
-
- ! Periodic BC at the beginning
- elseif (bc_x%beg == -1) then
-
- do i = 1, buff_size
- dx(-i) = dx((m + 1) - i)
- end do
-
- ! Processor BC at the beginning
- else
-
- call s_mpi_sendrecv_grid_vars_buffer_regions('beg', 'x')
-
- end if
-
- do i = 1, offset_x%beg
- x_cb(-1 - i) = x_cb(-i) - dx(-i)
- end do
-
- do i = 1, buff_size
- x_cc(-i) = x_cc(1 - i) - (dx(1 - i) + dx(-i))/2d0
- end do
-
- ! Ghost-cell extrapolation BC at the end
- if (bc_x%end <= -3) then
-
- do i = 1, buff_size
- dx(m + i) = dx(m)
- end do
-
- ! Symmetry BC at the end
- elseif (bc_x%end == -2) then
-
- do i = 1, buff_size
- dx(m + i) = dx((m + 1) - i)
- end do
-
- ! Periodic BC at the end
- elseif (bc_x%end == -1) then
-
- do i = 1, buff_size
- dx(m + i) = dx(i - 1)
- end do
-
- ! Processor BC at the end
- else
-
- call s_mpi_sendrecv_grid_vars_buffer_regions('end', 'x')
-
- end if
-
- do i = 1, offset_x%end
- x_cb(m + i) = x_cb(m + (i - 1)) + dx(m + i)
- end do
-
- do i = 1, buff_size
- x_cc(m + i) = x_cc(m + (i - 1)) + (dx(m + (i - 1)) + dx(m + i))/2d0
- end do
-
- ! END: Populating Buffer Regions in the x-direction ================
-
- ! Populating Buffer Regions in the y-direction =====================
-
- if (n > 0) then
-
- ! Ghost-cell extrapolation BC at the beginning
- if (bc_y%beg <= -3 .and. bc_y%beg /= -14) then
-
- do i = 1, buff_size
- dy(-i) = dy(0)
- end do
-
- ! Symmetry BC at the beginning
- elseif (bc_y%beg == -2 .or. bc_y%beg == -14) then
-
- do i = 1, buff_size
- dy(-i) = dy(i - 1)
- end do
-
- ! Periodic BC at the beginning
- elseif (bc_y%beg == -1) then
-
- do i = 1, buff_size
- dy(-i) = dy((n + 1) - i)
- end do
-
- ! Processor BC at the beginning
- else
-
- call s_mpi_sendrecv_grid_vars_buffer_regions('beg', 'y')
-
- end if
-
- do i = 1, offset_y%beg
- y_cb(-1 - i) = y_cb(-i) - dy(-i)
- end do
-
- do i = 1, buff_size
- y_cc(-i) = y_cc(1 - i) - (dy(1 - i) + dy(-i))/2d0
- end do
-
- ! Ghost-cell extrapolation BC at the end
- if (bc_y%end <= -3) then
-
- do i = 1, buff_size
- dy(n + i) = dy(n)
- end do
-
- ! Symmetry BC at the end
- elseif (bc_y%end == -2) then
-
- do i = 1, buff_size
- dy(n + i) = dy((n + 1) - i)
- end do
-
- ! Periodic BC at the end
- elseif (bc_y%end == -1) then
-
- do i = 1, buff_size
- dy(n + i) = dy(i - 1)
- end do
-
- ! Processor BC at the end
- else
-
- call s_mpi_sendrecv_grid_vars_buffer_regions('end', 'y')
-
- end if
-
- do i = 1, offset_y%end
- y_cb(n + i) = y_cb(n + (i - 1)) + dy(n + i)
- end do
-
- do i = 1, buff_size
- y_cc(n + i) = y_cc(n + (i - 1)) + (dy(n + (i - 1)) + dy(n + i))/2d0
- end do
-
- ! END: Populating Buffer Regions in the y-direction ================
-
- ! Populating Buffer Regions in the z-direction =====================
-
- if (p > 0) then
-
- ! Ghost-cell extrapolation BC at the beginning
- if (bc_z%beg <= -3) then
-
- do i = 1, buff_size
- dz(-i) = dz(0)
- end do
-
- ! Symmetry BC at the beginning
- elseif (bc_z%beg == -2) then
-
- do i = 1, buff_size
- dz(-i) = dz(i - 1)
- end do
-
- ! Periodic BC at the beginning
- elseif (bc_z%beg == -1) then
-
- do i = 1, buff_size
- dz(-i) = dz((p + 1) - i)
- end do
-
- ! Processor BC at the beginning
- else
-
- call s_mpi_sendrecv_grid_vars_buffer_regions('beg', 'z')
-
- end if
-
- do i = 1, offset_z%beg
- z_cb(-1 - i) = z_cb(-i) - dz(-i)
- end do
-
- do i = 1, buff_size
- z_cc(-i) = z_cc(1 - i) - (dz(1 - i) + dz(-i))/2d0
- end do
-
- ! Ghost-cell extrapolation BC at the end
- if (bc_z%end <= -3) then
-
- do i = 1, buff_size
- dz(p + i) = dz(p)
- end do
-
- ! Symmetry BC at the end
- elseif (bc_z%end == -2) then
-
- do i = 1, buff_size
- dz(p + i) = dz((p + 1) - i)
- end do
-
- ! Periodic BC at the end
- elseif (bc_z%end == -1) then
-
- do i = 1, buff_size
- dz(p + i) = dz(i - 1)
- end do
-
- ! Processor BC at the end
- else
-
- call s_mpi_sendrecv_grid_vars_buffer_regions('end', 'z')
-
- end if
-
- do i = 1, offset_z%end
- z_cb(p + i) = z_cb(p + (i - 1)) + dz(p + i)
- end do
-
- do i = 1, buff_size
- z_cc(p + i) = z_cc(p + (i - 1)) + (dz(p + (i - 1)) + dz(p + i))/2d0
- end do
-
- end if
-
- end if
-
- ! END: Populating Buffer Regions in the z-direction ================
-
- end subroutine s_populate_grid_variables_buffer_regions
-
- !> The purpose of this procedure is to populate the buffers
- !! of the cell-average conservative variables, depending on
- !! the boundary conditions.
- subroutine s_populate_conservative_variables_buffer_regions
-
- integer :: i, j, k !< Generic loop iterators
-
- ! Populating Buffer Regions in the x-direction =====================
-
- ! Ghost-cell extrapolation BC at the beginning
- if (bc_x%beg <= -3) then
-
- do j = 1, buff_size
- do i = 1, sys_size
- q_cons_vf(i)%sf(-j, 0:n, 0:p) = q_cons_vf(i)%sf(0, 0:n, 0:p)
- end do
- end do
-
- ! Symmetry BC at the beginning
- elseif (bc_x%beg == -2) then
-
- do j = 1, buff_size
-
- ! Density or partial densities
- do i = 1, cont_idx%end
- q_cons_vf(i)%sf(-j, 0:n, 0:p) = &
- q_cons_vf(i)%sf(j - 1, 0:n, 0:p)
- end do
-
- ! x-component of momentum
- q_cons_vf(mom_idx%beg)%sf(-j, 0:n, 0:p) = &
- -q_cons_vf(mom_idx%beg)%sf(j - 1, 0:n, 0:p)
-
- ! Remaining momentum component(s), if any, as well as the
- ! energy and the variable(s) from advection equation(s)
- do i = mom_idx%beg + 1, sys_size
- q_cons_vf(i)%sf(-j, 0:n, 0:p) = &
- q_cons_vf(i)%sf(j - 1, 0:n, 0:p)
- end do
-
- end do
-
- ! Periodic BC at the beginning
- elseif (bc_x%beg == -1) then
-
- do j = 1, buff_size
- do i = 1, sys_size
- q_cons_vf(i)%sf(-j, 0:n, 0:p) = &
- q_cons_vf(i)%sf((m + 1) - j, 0:n, 0:p)
- end do
- end do
-
- ! Processor BC at the beginning
- else
-
- call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, &
- 'beg', 'x')
-
- end if
-
- ! Ghost-cell extrapolation BC at the end
- if (bc_x%end <= -3) then
-
- do j = 1, buff_size
- do i = 1, sys_size
- q_cons_vf(i)%sf(m + j, 0:n, 0:p) = &
- q_cons_vf(i)%sf(m, 0:n, 0:p)
- end do
- end do
-
- ! Symmetry BC at the end
- elseif (bc_x%end == -2) then
-
- do j = 1, buff_size
-
- ! Density or partial densities
- do i = 1, cont_idx%end
- q_cons_vf(i)%sf(m + j, 0:n, 0:p) = &
- q_cons_vf(i)%sf((m + 1) - j, 0:n, 0:p)
- end do
-
- ! x-component of momentum
- q_cons_vf(mom_idx%beg)%sf(m + j, 0:n, 0:p) = &
- -q_cons_vf(mom_idx%beg)%sf((m + 1) - j, 0:n, 0:p)
-
- ! Remaining momentum component(s), if any, as well as the
- ! energy and the variable(s) from advection equation(s)
- do i = mom_idx%beg + 1, sys_size
- q_cons_vf(i)%sf(m + j, 0:n, 0:p) = &
- q_cons_vf(i)%sf((m + 1) - j, 0:n, 0:p)
- end do
-
- end do
-
- ! Perodic BC at the end
- elseif (bc_x%end == -1) then
-
- do j = 1, buff_size
- do i = 1, sys_size
- q_cons_vf(i)%sf(m + j, 0:n, 0:p) = &
- q_cons_vf(i)%sf(j - 1, 0:n, 0:p)
- end do
- end do
-
- ! Processor BC at the end
- else
-
- call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, &
- 'end', 'x')
-
- end if
-
- ! END: Populating Buffer Regions in the x-direction ================
-
- ! Populating Buffer Regions in the y-direction =====================
-
- if (n > 0) then
-
- ! Ghost-cell extrapolation BC at the beginning
- if (bc_y%beg <= -3 .and. bc_y%beg /= -14) then
-
- do j = 1, buff_size
- do i = 1, sys_size
- q_cons_vf(i)%sf(:, -j, 0:p) = q_cons_vf(i)%sf(:, 0, 0:p)
- end do
- end do
-
- ! Axis BC at the beginning
- elseif (bc_y%beg == -14) then
-
- do j = 1, buff_size
- do k = 0, p
- if (z_cc(k) < pi) then
- do i = 1, mom_idx%beg
- q_cons_vf(i)%sf(:, -j, k) = &
- q_cons_vf(i)%sf(:, j - 1, k + ((p + 1)/2))
- end do
-
- q_cons_vf(mom_idx%beg + 1)%sf(:, -j, k) = &
- -q_cons_vf(mom_idx%beg + 1)%sf(:, j - 1, k + ((p + 1)/2))
-
- q_cons_vf(mom_idx%end)%sf(:, -j, k) = &
- -q_cons_vf(mom_idx%end)%sf(:, j - 1, k + ((p + 1)/2))
-
- do i = E_idx, sys_size
- q_cons_vf(i)%sf(:, -j, k) = &
- q_cons_vf(i)%sf(:, j - 1, k + ((p + 1)/2))
- end do
- else
- do i = 1, mom_idx%beg
- q_cons_vf(i)%sf(:, -j, k) = &
- q_cons_vf(i)%sf(:, j - 1, k - ((p + 1)/2))
- end do
-
- q_cons_vf(mom_idx%beg + 1)%sf(:, -j, k) = &
- -q_cons_vf(mom_idx%beg + 1)%sf(:, j - 1, k - ((p + 1)/2))
-
- q_cons_vf(mom_idx%end)%sf(:, -j, k) = &
- -q_cons_vf(mom_idx%end)%sf(:, j - 1, k - ((p + 1)/2))
-
- do i = E_idx, sys_size
- q_cons_vf(i)%sf(:, -j, k) = &
- q_cons_vf(i)%sf(:, j - 1, k - ((p + 1)/2))
- end do
- end if
- end do
- end do
-
- ! Symmetry BC at the beginning
- elseif (bc_y%beg == -2) then
-
- do j = 1, buff_size
-
- ! Density or partial densities and x-momentum component
- do i = 1, mom_idx%beg
- q_cons_vf(i)%sf(:, -j, 0:p) = &
- q_cons_vf(i)%sf(:, j - 1, 0:p)
- end do
-
- ! y-component of momentum
- q_cons_vf(mom_idx%beg + 1)%sf(:, -j, 0:p) = &
- -q_cons_vf(mom_idx%beg + 1)%sf(:, j - 1, 0:p)
-
- ! Remaining z-momentum component, if any, as well as the
- ! energy and variable(s) from advection equation(s)
- do i = mom_idx%beg + 2, sys_size
- q_cons_vf(i)%sf(:, -j, 0:p) = &
- q_cons_vf(i)%sf(:, j - 1, 0:p)
- end do
-
- end do
-
- ! Periodic BC at the beginning
- elseif (bc_y%beg == -1) then
-
- do j = 1, buff_size
- do i = 1, sys_size
- q_cons_vf(i)%sf(:, -j, 0:p) = &
- q_cons_vf(i)%sf(:, (n + 1) - j, 0:p)
- end do
- end do
-
- ! Processor BC at the beginning
- else
-
- call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, &
- 'beg', 'y')
-
- end if
-
- ! Ghost-cell extrapolation BC at the end
- if (bc_y%end <= -3) then
-
- do j = 1, buff_size
- do i = 1, sys_size
- q_cons_vf(i)%sf(:, n + j, 0:p) = &
- q_cons_vf(i)%sf(:, n, 0:p)
- end do
- end do
-
- ! Symmetry BC at the end
- elseif (bc_y%end == -2) then
-
- do j = 1, buff_size
-
- ! Density or partial densities and x-momentum component
- do i = 1, mom_idx%beg
- q_cons_vf(i)%sf(:, n + j, 0:p) = &
- q_cons_vf(i)%sf(:, (n + 1) - j, 0:p)
- end do
-
- ! y-component of momentum
- q_cons_vf(mom_idx%beg + 1)%sf(:, n + j, 0:p) = &
- -q_cons_vf(mom_idx%beg + 1)%sf(:, (n + 1) - j, 0:p)
-
- ! Remaining z-momentum component, if any, as well as the
- ! energy and variable(s) from advection equation(s)
- do i = mom_idx%beg + 2, sys_size
- q_cons_vf(i)%sf(:, n + j, 0:p) = &
- q_cons_vf(i)%sf(:, (n + 1) - j, 0:p)
- end do
-
- end do
-
- ! Perodic BC at the end
- elseif (bc_y%end == -1) then
-
- do j = 1, buff_size
- do i = 1, sys_size
- q_cons_vf(i)%sf(:, n + j, 0:p) = &
- q_cons_vf(i)%sf(:, j - 1, 0:p)
- end do
- end do
-
- ! Processor BC at the end
- else
-
- call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, &
- 'end', 'y')
-
- end if
-
- ! END: Populating Buffer Regions in the y-direction ================
-
- ! Populating Buffer Regions in the z-direction =====================
-
- if (p > 0) then
-
- ! Ghost-cell extrapolation BC at the beginning
- if (bc_z%beg <= -3) then
-
- do j = 1, buff_size
- do i = 1, sys_size
- q_cons_vf(i)%sf(:, :, -j) = q_cons_vf(i)%sf(:, :, 0)
- end do
- end do
-
- ! Symmetry BC at the beginning
- elseif (bc_z%beg == -2) then
-
- do j = 1, buff_size
-
- ! Density or the partial densities and the momentum
- ! components in x- and y-directions
- do i = 1, mom_idx%beg + 1
- q_cons_vf(i)%sf(:, :, -j) = &
- q_cons_vf(i)%sf(:, :, j - 1)
- end do
-
- ! z-component of momentum
- q_cons_vf(mom_idx%end)%sf(:, :, -j) = &
- -q_cons_vf(mom_idx%end)%sf(:, :, j - 1)
-
- ! Energy and advection equation(s) variable(s)
- do i = E_idx, sys_size
- q_cons_vf(i)%sf(:, :, -j) = &
- q_cons_vf(i)%sf(:, :, j - 1)
- end do
-
- end do
-
- ! Periodic BC at the beginning
- elseif (bc_z%beg == -1) then
-
- do j = 1, buff_size
- do i = 1, sys_size
- q_cons_vf(i)%sf(:, :, -j) = &
- q_cons_vf(i)%sf(:, :, (p + 1) - j)
- end do
- end do
-
- ! Processor BC at the beginning
- else
-
- call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, &
- 'beg', 'z')
-
- end if
-
- ! Ghost-cell extrapolation BC at the end
- if (bc_z%end <= -3) then
-
- do j = 1, buff_size
- do i = 1, sys_size
- q_cons_vf(i)%sf(:, :, p + j) = &
- q_cons_vf(i)%sf(:, :, p)
- end do
- end do
-
- ! Symmetry BC at the end
- elseif (bc_z%end == -2) then
-
- do j = 1, buff_size
-
- ! Density or the partial densities and the momentum
- ! components in x- and y-directions
- do i = 1, mom_idx%beg + 1
- q_cons_vf(i)%sf(:, :, p + j) = &
- q_cons_vf(i)%sf(:, :, (p + 1) - j)
- end do
-
- ! z-component of momentum
- q_cons_vf(mom_idx%end)%sf(:, :, p + j) = &
- -q_cons_vf(mom_idx%end)%sf(:, :, (p + 1) - j)
-
- ! Energy and advection equation(s) variable(s)
- do i = E_idx, sys_size
- q_cons_vf(i)%sf(:, :, p + j) = &
- q_cons_vf(i)%sf(:, :, (p + 1) - j)
- end do
-
- end do
-
- ! Perodic BC at the end
- elseif (bc_z%end == -1) then
-
- do j = 1, buff_size
- do i = 1, sys_size
- q_cons_vf(i)%sf(:, :, p + j) = &
- q_cons_vf(i)%sf(:, :, j - 1)
- end do
- end do
-
- ! Processor BC at the end
- else
-
- call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, &
- 'end', 'z')
-
- end if
-
- end if
-
- end if
-
- ! END: Populating Buffer Regions in the z-direction ================
-
- end subroutine s_populate_conservative_variables_buffer_regions
-
!> Computation of parameters, allocation procedures, and/or
!! any other tasks needed to properly setup the module
subroutine s_initialize_data_input_module
diff --git a/src/post_process/m_derived_variables.fpp b/src/post_process/m_derived_variables.fpp
index e08973bd21..8f9bb01922 100644
--- a/src/post_process/m_derived_variables.fpp
+++ b/src/post_process/m_derived_variables.fpp
@@ -410,6 +410,7 @@ contains
- fd_coeff_z(r, l)* &
q_prim_vf(mom_idx%beg + 1)%sf(j, k, r + l))
else
+
q_sf(j, k, l) = &
q_sf(j, k, l) + fd_coeff_y(r, k)* &
q_prim_vf(mom_idx%end)%sf(j, r + k, l) &
diff --git a/src/post_process/m_global_parameters.fpp b/src/post_process/m_global_parameters.fpp
index b150f23e7d..7730c7c073 100644
--- a/src/post_process/m_global_parameters.fpp
+++ b/src/post_process/m_global_parameters.fpp
@@ -20,6 +20,8 @@ module m_global_parameters
use m_thermochem, only: num_species, species_names
+ use m_global_parameters_common
+
! ==========================================================================
implicit none
@@ -109,6 +111,7 @@ module m_global_parameters
logical :: mixture_err !< Mixture error limiter
logical :: alt_soundspeed !< Alternate sound speed
logical :: hypoelasticity !< Turn hypoelasticity on
+ logical :: rdma_mpi !< Turn on RDMA for MPI
logical, parameter :: chemistry = .${chemistry}$. !< Chemistry modeling
!> @}
@@ -144,14 +147,20 @@ module m_global_parameters
!> @name Boundary conditions in the x-, y- and z-coordinate directions
!> @{
type(int_bounds_info) :: bc_x, bc_y, bc_z
+ integer :: num_bc_patches
+ type(bc_patch_parameters) :: patch_bc(num_bc_patches_max)
+ type(bounds_info) :: x_domain, y_domain, z_domain
!> @}
logical :: parallel_io !< Format of the data files
logical :: file_per_process !< output format
- integer, allocatable, dimension(:) :: proc_coords !<
+ integer, dimension(1:3) :: proc_coords !<
!! Processor coordinates in MPI_CART_COMM
+ integer, dimension(1:3) :: proc_nums !<
+ !! Processor dimensions in MPI_CART_COMM
+
integer, allocatable, dimension(:) :: start_idx !<
!! Starting cell-center index of local processor in global grid
@@ -331,18 +340,19 @@ contains
relax = .false.
relax_model = dflt_int
hypoelasticity = .false.
+ rdma_mpi = .false.
bc_x%beg = dflt_int; bc_x%end = dflt_int
bc_y%beg = dflt_int; bc_y%end = dflt_int
bc_z%beg = dflt_int; bc_z%end = dflt_int
#:for DIM in ['x', 'y', 'z']
- #:for DIR in [1, 2, 3]
- bc_${DIM}$%vb${DIR}$ = 0d0
- bc_${DIM}$%ve${DIR}$ = 0d0
- #:endfor
+ bc_${DIM}$%vel_beg = 0d0
+ bc_${DIM}$%vel_end = 0d0
#:endfor
+ call s_bc_assign_default_values_to_user_inputs(num_bc_patches, patch_bc)
+
! Fluids physical parameters
do i = 1, num_fluids_max
fluid_pp(i)%gamma = dflt_real
@@ -772,8 +782,6 @@ contains
num_dims = 1 + min(1, n) + min(1, p)
- allocate (proc_coords(1:num_dims))
-
if (parallel_io .neqv. .true.) return
#ifdef MFC_MPI
@@ -818,8 +826,6 @@ contains
end if
- deallocate (proc_coords)
-
deallocate (adv)
#ifdef MFC_MPI
diff --git a/src/post_process/m_mpi_proxy.fpp b/src/post_process/m_mpi_proxy.fpp
index 20c5346d21..2bfe9c91d1 100644
--- a/src/post_process/m_mpi_proxy.fpp
+++ b/src/post_process/m_mpi_proxy.fpp
@@ -199,1214 +199,6 @@ contains
end subroutine s_mpi_bcast_user_inputs
- !> This subroutine takes care of efficiently distributing
- !! the computational domain among the available processors
- !! as well as recomputing some of the global parameters so
- !! that they reflect the configuration of sub-domain that
- !! is overseen by the local processor.
- subroutine s_mpi_decompose_computational_domain
-
-#ifdef MFC_MPI
-
- ! # of processors in the x-, y- and z-coordinate directions
- integer :: num_procs_x, num_procs_y, num_procs_z
-
- ! Temporary # of processors in x-, y- and z-coordinate directions
- ! used during the processor factorization optimization procedure
- real(kind(0d0)) :: tmp_num_procs_x, tmp_num_procs_y, tmp_num_procs_z
-
- ! Processor factorization (fct) minimization parameter
- real(kind(0d0)) :: fct_min
-
- ! Cartesian processor topology communicator
- integer :: MPI_COMM_CART
-
- ! Number of remaining cells for a particular coordinate direction
- ! after the bulk has evenly been distributed among the available
- ! processors for that coordinate direction
- integer :: rem_cells
-
- ! Generic loop iterators
- integer :: i, j
-
- if (num_procs == 1 .and. parallel_io) then
- do i = 1, num_dims
- start_idx(i) = 0
- end do
- return
- end if
-
- ! Performing the computational domain decomposition. The procedure
- ! is optimized by ensuring that each processor contains a close to
- ! equivalent piece of the computational domain. Note that explicit
- ! type-casting is omitted here for code legibility purposes.
-
- ! Generating 3D Cartesian Processor Topology =======================
-
- if (n > 0) then
-
- if (p > 0) then
-
- if (cyl_coord .and. p > 0) then
- ! Implement pencil processor blocking if using cylindrical coordinates so
- ! that all cells in azimuthal direction are stored on a single processor.
- ! This is necessary for efficient application of Fourier filter near axis.
-
- ! Initial values of the processor factorization optimization
- num_procs_x = 1
- num_procs_y = num_procs
- num_procs_z = 1
- ierr = -1
-
- ! Computing minimization variable for these initial values
- tmp_num_procs_x = num_procs_x
- tmp_num_procs_y = num_procs_y
- tmp_num_procs_z = num_procs_z
- fct_min = 10d0*abs((m + 1)/tmp_num_procs_x &
- - (n + 1)/tmp_num_procs_y)
-
- ! Searching for optimal computational domain distribution
- do i = 1, num_procs
-
- if (mod(num_procs, i) == 0 &
- .and. &
- (m + 1)/i >= num_stcls_min*weno_order) then
-
- tmp_num_procs_x = i
- tmp_num_procs_y = num_procs/i
-
- if (fct_min >= abs((m + 1)/tmp_num_procs_x &
- - (n + 1)/tmp_num_procs_y) &
- .and. &
- (n + 1)/tmp_num_procs_y &
- >= &
- num_stcls_min*weno_order) then
-
- num_procs_x = i
- num_procs_y = num_procs/i
- fct_min = abs((m + 1)/tmp_num_procs_x &
- - (n + 1)/tmp_num_procs_y)
- ierr = 0
-
- end if
-
- end if
-
- end do
-
- else
-
- ! Initial values of the processor factorization optimization
- num_procs_x = 1
- num_procs_y = 1
- num_procs_z = num_procs
- ierr = -1
-
- ! Computing minimization variable for these initial values
- tmp_num_procs_x = num_procs_x
- tmp_num_procs_y = num_procs_y
- tmp_num_procs_z = num_procs_z
- fct_min = 10d0*abs((m + 1)/tmp_num_procs_x &
- - (n + 1)/tmp_num_procs_y) &
- + 10d0*abs((n + 1)/tmp_num_procs_y &
- - (p + 1)/tmp_num_procs_z)
-
- ! Searching for optimal computational domain distribution
- do i = 1, num_procs
-
- if (mod(num_procs, i) == 0 &
- .and. &
- (m + 1)/i >= num_stcls_min*weno_order) then
-
- do j = 1, (num_procs/i)
-
- if (mod(num_procs/i, j) == 0 &
- .and. &
- (n + 1)/j >= num_stcls_min*weno_order) then
-
- tmp_num_procs_x = i
- tmp_num_procs_y = j
- tmp_num_procs_z = num_procs/(i*j)
-
- if (fct_min >= abs((m + 1)/tmp_num_procs_x &
- - (n + 1)/tmp_num_procs_y) &
- + abs((n + 1)/tmp_num_procs_y &
- - (p + 1)/tmp_num_procs_z) &
- .and. &
- (p + 1)/tmp_num_procs_z &
- >= &
- num_stcls_min*weno_order) &
- then
-
- num_procs_x = i
- num_procs_y = j
- num_procs_z = num_procs/(i*j)
- fct_min = abs((m + 1)/tmp_num_procs_x &
- - (n + 1)/tmp_num_procs_y) &
- + abs((n + 1)/tmp_num_procs_y &
- - (p + 1)/tmp_num_procs_z)
- ierr = 0
-
- end if
-
- end if
-
- end do
-
- end if
-
- end do
-
- end if
-
- ! Checking whether the decomposition of the computational
- ! domain was successful
- if (proc_rank == 0 .and. ierr == -1) then
- print '(A)', 'Unable to decompose computational '// &
- 'domain for selected number of '// &
- 'processors. Exiting ...'
- call MPI_ABORT(MPI_COMM_WORLD, 1, ierr)
- end if
-
- ! Creating a new communicator using Cartesian topology
- call MPI_CART_CREATE(MPI_COMM_WORLD, 3, (/num_procs_x, &
- num_procs_y, num_procs_z/), &
- (/.true., .true., .true./), &
- .false., MPI_COMM_CART, ierr)
-
- ! Finding corresponding Cartesian coordinates of the local
- ! processor rank in newly declared cartesian communicator
- call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 3, &
- proc_coords, ierr)
-
- ! END: Generating 3D Cartesian Processor Topology ==================
-
- ! Sub-domain Global Parameters in z-direction ======================
-
- ! Number of remaining cells after majority is distributed
- rem_cells = mod(p + 1, num_procs_z)
-
- ! Optimal number of cells per processor
- p = (p + 1)/num_procs_z - 1
-
- ! Distributing any remaining cells
- do i = 1, rem_cells
- if (proc_coords(3) == i - 1) then
- p = p + 1
- exit
- end if
- end do
-
- ! Boundary condition at the beginning
- if (proc_coords(3) > 0 .or. bc_z%beg == -1) then
- proc_coords(3) = proc_coords(3) - 1
- call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
- bc_z%beg, ierr)
- proc_coords(3) = proc_coords(3) + 1
- end if
-
- ! Ghost zone at the beginning
- if (proc_coords(3) > 0 .and. format == 1) then
- offset_z%beg = 2
- else
- offset_z%beg = 0
- end if
-
- ! Boundary condition at the end
- if (proc_coords(3) < num_procs_z - 1 .or. bc_z%end == -1) then
- proc_coords(3) = proc_coords(3) + 1
- call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
- bc_z%end, ierr)
- proc_coords(3) = proc_coords(3) - 1
- end if
-
- ! Ghost zone at the end
- if (proc_coords(3) < num_procs_z - 1 .and. format == 1) then
- offset_z%end = 2
- else
- offset_z%end = 0
- end if
-
- if (parallel_io) then
- if (proc_coords(3) < rem_cells) then
- start_idx(3) = (p + 1)*proc_coords(3)
- else
- start_idx(3) = (p + 1)*proc_coords(3) + rem_cells
- end if
- end if
- ! ==================================================================
-
- ! Generating 2D Cartesian Processor Topology =======================
-
- else
-
- ! Initial values of the processor factorization optimization
- num_procs_x = 1
- num_procs_y = num_procs
- ierr = -1
-
- ! Computing minimization variable for these initial values
- tmp_num_procs_x = num_procs_x
- tmp_num_procs_y = num_procs_y
- fct_min = 10d0*abs((m + 1)/tmp_num_procs_x &
- - (n + 1)/tmp_num_procs_y)
-
- ! Searching for optimal computational domain distribution
- do i = 1, num_procs
-
- if (mod(num_procs, i) == 0 &
- .and. &
- (m + 1)/i >= num_stcls_min*weno_order) then
-
- tmp_num_procs_x = i
- tmp_num_procs_y = num_procs/i
-
- if (fct_min >= abs((m + 1)/tmp_num_procs_x &
- - (n + 1)/tmp_num_procs_y) &
- .and. &
- (n + 1)/tmp_num_procs_y &
- >= &
- num_stcls_min*weno_order) then
-
- num_procs_x = i
- num_procs_y = num_procs/i
- fct_min = abs((m + 1)/tmp_num_procs_x &
- - (n + 1)/tmp_num_procs_y)
- ierr = 0
-
- end if
-
- end if
-
- end do
-
- ! Checking whether the decomposition of the computational
- ! domain was successful
- if (proc_rank == 0 .and. ierr == -1) then
- print '(A)', 'Unable to decompose computational '// &
- 'domain for selected number of '// &
- 'processors. Exiting ...'
- call MPI_ABORT(MPI_COMM_WORLD, 1, ierr)
- end if
-
- ! Creating a new communicator using Cartesian topology
- call MPI_CART_CREATE(MPI_COMM_WORLD, 2, (/num_procs_x, &
- num_procs_y/), (/.true., &
- .true./), .false., MPI_COMM_CART, &
- ierr)
-
- ! Finding corresponding Cartesian coordinates of the local
- ! processor rank in newly declared cartesian communicator
- call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 2, &
- proc_coords, ierr)
-
- end if
-
- ! END: Generating 2D Cartesian Processor Topology ==================
-
- ! Sub-domain Global Parameters in y-direction ======================
-
- ! Number of remaining cells after majority has been distributed
- rem_cells = mod(n + 1, num_procs_y)
-
- ! Optimal number of cells per processor
- n = (n + 1)/num_procs_y - 1
-
- ! Distributing any remaining cells
- do i = 1, rem_cells
- if (proc_coords(2) == i - 1) then
- n = n + 1
- exit
- end if
- end do
-
- ! Boundary condition at the beginning
- if (proc_coords(2) > 0 .or. bc_y%beg == -1) then
- proc_coords(2) = proc_coords(2) - 1
- call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
- bc_y%beg, ierr)
- proc_coords(2) = proc_coords(2) + 1
- end if
-
- ! Ghost zone at the beginning
- if (proc_coords(2) > 0 .and. format == 1) then
- offset_y%beg = 2
- else
- offset_y%beg = 0
- end if
-
- ! Boundary condition at the end
- if (proc_coords(2) < num_procs_y - 1 .or. bc_y%end == -1) then
- proc_coords(2) = proc_coords(2) + 1
- call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
- bc_y%end, ierr)
- proc_coords(2) = proc_coords(2) - 1
- end if
-
- ! Ghost zone at the end
- if (proc_coords(2) < num_procs_y - 1 .and. format == 1) then
- offset_y%end = 2
- else
- offset_y%end = 0
- end if
-
- if (parallel_io) then
- if (proc_coords(2) < rem_cells) then
- start_idx(2) = (n + 1)*proc_coords(2)
- else
- start_idx(2) = (n + 1)*proc_coords(2) + rem_cells
- end if
- end if
- ! ==================================================================
-
- ! Generating 1D Cartesian Processor Topology =======================
-
- else
-
- ! Number of processors in the coordinate direction is equal to
- ! the total number of processors available
- num_procs_x = num_procs
-
- ! Number of cells in undecomposed computational domain needed
- ! for sub-domain reassembly during formatted data output
- m_root = m
-
- ! Creating a new communicator using Cartesian topology
- call MPI_CART_CREATE(MPI_COMM_WORLD, 1, (/num_procs_x/), &
- (/.true./), .false., MPI_COMM_CART, &
- ierr)
-
- ! Finding the corresponding Cartesian coordinates of the local
- ! processor rank in the newly declared cartesian communicator
- call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 1, &
- proc_coords, ierr)
-
- end if
-
- ! ==================================================================
-
- ! Sub-domain Global Parameters in x-direction ======================
-
- ! Number of remaining cells after majority has been distributed
- rem_cells = mod(m + 1, num_procs_x)
-
- ! Optimal number of cells per processor
- m = (m + 1)/num_procs_x - 1
-
- ! Distributing any remaining cells
- do i = 1, rem_cells
- if (proc_coords(1) == i - 1) then
- m = m + 1
- exit
- end if
- end do
-
- ! Boundary condition at the beginning
- if (proc_coords(1) > 0 .or. bc_x%beg == -1) then
- proc_coords(1) = proc_coords(1) - 1
- call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%beg, ierr)
- proc_coords(1) = proc_coords(1) + 1
- end if
-
- ! Ghost zone at the beginning
- if (proc_coords(1) > 0 .and. format == 1 .and. n > 0) then
- offset_x%beg = 2
- else
- offset_x%beg = 0
- end if
-
- ! Boundary condition at the end
- if (proc_coords(1) < num_procs_x - 1 .or. bc_x%end == -1) then
- proc_coords(1) = proc_coords(1) + 1
- call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%end, ierr)
- proc_coords(1) = proc_coords(1) - 1
- end if
-
- ! Ghost zone at the end
- if (proc_coords(1) < num_procs_x - 1 .and. format == 1 .and. n > 0) then
- offset_x%end = 2
- else
- offset_x%end = 0
- end if
-
- if (parallel_io) then
- if (proc_coords(1) < rem_cells) then
- start_idx(1) = (m + 1)*proc_coords(1)
- else
- start_idx(1) = (m + 1)*proc_coords(1) + rem_cells
- end if
- end if
- ! ==================================================================
-
-#endif
-
- end subroutine s_mpi_decompose_computational_domain
-
- !> Communicates the buffer regions associated with the grid
- !! variables with processors in charge of the neighboring
- !! sub-domains. Note that only cell-width spacings feature
- !! buffer regions so that no information relating to the
- !! cell-boundary locations is communicated.
- !! @param pbc_loc Processor boundary condition (PBC) location
- !! @param sweep_coord Coordinate direction normal to the processor boundary
- subroutine s_mpi_sendrecv_grid_vars_buffer_regions(pbc_loc, sweep_coord)
-
- character(LEN=3), intent(in) :: pbc_loc
- character, intent(in) :: sweep_coord
-
-#ifdef MFC_MPI
-
- ! Communications in the x-direction ================================
-
- if (sweep_coord == 'x') then
-
- if (pbc_loc == 'beg') then ! Buffer region at the beginning
-
- ! PBC at both ends of the sub-domain
- if (bc_x%end >= 0) then
-
- ! Sending/receiving the data to/from bc_x%end/bc_x%beg
- call MPI_SENDRECV(dx(m - buff_size + 1), buff_size, &
- MPI_DOUBLE_PRECISION, bc_x%end, 0, &
- dx(-buff_size), buff_size, &
- MPI_DOUBLE_PRECISION, bc_x%beg, 0, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
- ierr)
-
- ! PBC only at beginning of the sub-domain
- else
-
- ! Sending/receiving the data to/from bc_x%beg/bc_x%beg
- call MPI_SENDRECV(dx(0), buff_size, &
- MPI_DOUBLE_PRECISION, bc_x%beg, 1, &
- dx(-buff_size), buff_size, &
- MPI_DOUBLE_PRECISION, bc_x%beg, 0, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
- ierr)
-
- end if
-
- else ! Buffer region at the end
-
- ! PBC at both ends of the sub-domain
- if (bc_x%beg >= 0) then
-
- ! Sending/receiving the data to/from bc_x%beg/bc_x%end
- call MPI_SENDRECV(dx(0), buff_size, &
- MPI_DOUBLE_PRECISION, bc_x%beg, 1, &
- dx(m + 1), buff_size, &
- MPI_DOUBLE_PRECISION, bc_x%end, 1, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
- ierr)
-
- ! PBC only at end of the sub-domain
- else
-
- ! Sending/receiving the data to/from bc_x%end/bc_x%end
- call MPI_SENDRECV(dx(m - buff_size + 1), buff_size, &
- MPI_DOUBLE_PRECISION, bc_x%end, 0, &
- dx(m + 1), buff_size, &
- MPI_DOUBLE_PRECISION, bc_x%end, 1, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
- ierr)
-
- end if
-
- end if
-
- ! END: Communications in the x-direction ===========================
-
- ! Communications in the y-direction ================================
-
- elseif (sweep_coord == 'y') then
-
- if (pbc_loc == 'beg') then ! Buffer region at the beginning
-
- ! PBC at both ends of the sub-domain
- if (bc_y%end >= 0) then
-
- ! Sending/receiving the data to/from bc_y%end/bc_y%beg
- call MPI_SENDRECV(dy(n - buff_size + 1), buff_size, &
- MPI_DOUBLE_PRECISION, bc_y%end, 0, &
- dy(-buff_size), buff_size, &
- MPI_DOUBLE_PRECISION, bc_y%beg, 0, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
- ierr)
-
- ! PBC only at beginning of the sub-domain
- else
-
- ! Sending/receiving the data to/from bc_y%beg/bc_y%beg
- call MPI_SENDRECV(dy(0), buff_size, &
- MPI_DOUBLE_PRECISION, bc_y%beg, 1, &
- dy(-buff_size), buff_size, &
- MPI_DOUBLE_PRECISION, bc_y%beg, 0, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
- ierr)
-
- end if
-
- else ! Buffer region at the end
-
- ! PBC at both ends of the sub-domain
- if (bc_y%beg >= 0) then
-
- ! Sending/receiving the data to/from bc_y%beg/bc_y%end
- call MPI_SENDRECV(dy(0), buff_size, &
- MPI_DOUBLE_PRECISION, bc_y%beg, 1, &
- dy(n + 1), buff_size, &
- MPI_DOUBLE_PRECISION, bc_y%end, 1, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
- ierr)
-
- ! PBC only at end of the sub-domain
- else
-
- ! Sending/receiving the data to/from bc_y%end/bc_y%end
- call MPI_SENDRECV(dy(n - buff_size + 1), buff_size, &
- MPI_DOUBLE_PRECISION, bc_y%end, 0, &
- dy(n + 1), buff_size, &
- MPI_DOUBLE_PRECISION, bc_y%end, 1, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
- ierr)
-
- end if
-
- end if
-
- ! END: Communications in the y-direction ===========================
-
- ! Communications in the z-direction ================================
-
- else
-
- if (pbc_loc == 'beg') then ! Buffer region at the beginning
-
- ! PBC at both ends of the sub-domain
- if (bc_z%end >= 0) then
-
- ! Sending/receiving the data to/from bc_z%end/bc_z%beg
- call MPI_SENDRECV(dz(p - buff_size + 1), buff_size, &
- MPI_DOUBLE_PRECISION, bc_z%end, 0, &
- dz(-buff_size), buff_size, &
- MPI_DOUBLE_PRECISION, bc_z%beg, 0, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
- ierr)
-
- ! PBC only at beginning of the sub-domain
- else
-
- ! Sending/receiving the data to/from bc_z%beg/bc_z%beg
- call MPI_SENDRECV(dz(0), buff_size, &
- MPI_DOUBLE_PRECISION, bc_z%beg, 1, &
- dz(-buff_size), buff_size, &
- MPI_DOUBLE_PRECISION, bc_z%beg, 0, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
- ierr)
-
- end if
-
- else ! Buffer region at the end
-
- ! PBC at both ends of the sub-domain
- if (bc_z%beg >= 0) then
-
- ! Sending/receiving the data to/from bc_z%beg/bc_z%end
- call MPI_SENDRECV(dz(0), buff_size, &
- MPI_DOUBLE_PRECISION, bc_z%beg, 1, &
- dz(p + 1), buff_size, &
- MPI_DOUBLE_PRECISION, bc_z%end, 1, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
- ierr)
-
- ! PBC only at end of the sub-domain
- else
-
- ! Sending/receiving the data to/from bc_z%end/bc_z%end
- call MPI_SENDRECV(dz(p - buff_size + 1), buff_size, &
- MPI_DOUBLE_PRECISION, bc_z%end, 0, &
- dz(p + 1), buff_size, &
- MPI_DOUBLE_PRECISION, bc_z%end, 1, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
- ierr)
-
- end if
-
- end if
-
- end if
-
- ! END: Communications in the z-direction ===========================
-
-#endif
-
- end subroutine s_mpi_sendrecv_grid_vars_buffer_regions
-
- !> Communicates buffer regions associated with conservative
- !! variables with processors in charge of the neighboring
- !! sub-domains
- !! @param q_cons_vf Conservative variables
- !! @param pbc_loc Processor boundary condition (PBC) location
- !! @param sweep_coord Coordinate direction normal to the processor boundary
- subroutine s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, pbc_loc, &
- sweep_coord)
-
- type(scalar_field), &
- dimension(sys_size), &
- intent(inout) :: q_cons_vf
-
- character(LEN=3), intent(in) :: pbc_loc
-
- character, intent(in) :: sweep_coord
-
-#ifdef MFC_MPI
-
- integer :: i, j, k, l, r !< Generic loop iterators
-
- ! Communications in the x-direction ================================
-
- if (sweep_coord == 'x') then
-
- if (pbc_loc == 'beg') then ! Buffer region at the beginning
-
- ! PBC at both ends of the sub-domain
- if (bc_x%end >= 0) then
-
- ! Packing the data to be sent to bc_x%end
- do l = 0, p
- do k = 0, n
- do j = m - buff_size + 1, m
- do i = 1, sys_size
- r = sys_size*(j - m + buff_size - 1) &
- + sys_size*buff_size*k + (i - 1) &
- + sys_size*buff_size*(n + 1)*l
- q_cons_buffer_out(r) = &
- q_cons_vf(i)%sf(j, k, l)
- end do
- end do
- end do
- end do
-
- ! Sending/receiving the data to/from bc_x%end/bc_x%beg
- call MPI_SENDRECV(q_cons_buffer_out(0), &
- buff_size*sys_size*(n + 1)*(p + 1), &
- MPI_DOUBLE_PRECISION, bc_x%end, 0, &
- q_cons_buffer_in(0), &
- buff_size*sys_size*(n + 1)*(p + 1), &
- MPI_DOUBLE_PRECISION, bc_x%beg, 0, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
- ierr)
-
- ! PBC only at beginning of the sub-domain
- else
-
- ! Packing the data to be sent to bc_x%beg
- do l = 0, p
- do k = 0, n
- do j = 0, buff_size - 1
- do i = 1, sys_size
- r = (i - 1) + sys_size*j &
- + sys_size*buff_size*k &
- + sys_size*buff_size*(n + 1)*l
- q_cons_buffer_out(r) = &
- q_cons_vf(i)%sf(j, k, l)
- end do
- end do
- end do
- end do
-
- ! Sending/receiving the data to/from bc_x%beg/bc_x%beg
- call MPI_SENDRECV(q_cons_buffer_out(0), &
- buff_size*sys_size*(n + 1)*(p + 1), &
- MPI_DOUBLE_PRECISION, bc_x%beg, 1, &
- q_cons_buffer_in(0), &
- buff_size*sys_size*(n + 1)*(p + 1), &
- MPI_DOUBLE_PRECISION, bc_x%beg, 0, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
- ierr)
-
- end if
-
- ! Unpacking the data received from bc_x%beg
- do l = 0, p
- do k = 0, n
- do j = -buff_size, -1
- do i = 1, sys_size
- r = sys_size*(j + buff_size) &
- + sys_size*buff_size*k + (i - 1) &
- + sys_size*buff_size*(n + 1)*l
- q_cons_vf(i)%sf(j, k, l) = q_cons_buffer_in(r)
-#if defined(__INTEL_COMPILER)
- if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then
- print *, "Error", j, k, l, i
- error stop "NaN(s) in recv"
- end if
-#endif
- end do
- end do
- end do
- end do
-
- else ! Buffer region at the end
-
- ! PBC at both ends of the sub-domain
- if (bc_x%beg >= 0) then
-
- ! Packing the data to be sent to bc_x%beg
- do l = 0, p
- do k = 0, n
- do j = 0, buff_size - 1
- do i = 1, sys_size
- r = (i - 1) + sys_size*j &
- + sys_size*buff_size*k &
- + sys_size*buff_size*(n + 1)*l
- q_cons_buffer_out(r) = &
- q_cons_vf(i)%sf(j, k, l)
- end do
- end do
- end do
- end do
-
- ! Sending/receiving the data to/from bc_x%beg/bc_x%end
- call MPI_SENDRECV(q_cons_buffer_out(0), &
- buff_size*sys_size*(n + 1)*(p + 1), &
- MPI_DOUBLE_PRECISION, bc_x%beg, 1, &
- q_cons_buffer_in(0), &
- buff_size*sys_size*(n + 1)*(p + 1), &
- MPI_DOUBLE_PRECISION, bc_x%end, 1, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
- ierr)
-
- ! PBC only at end of the sub-domain
- else
-
- ! Packing the data to be sent to bc_x%end
- do l = 0, p
- do k = 0, n
- do j = m - buff_size + 1, m
- do i = 1, sys_size
- r = sys_size*(j - m + buff_size - 1) &
- + sys_size*buff_size*k + (i - 1) &
- + sys_size*buff_size*(n + 1)*l
- q_cons_buffer_out(r) = &
- q_cons_vf(i)%sf(j, k, l)
- end do
- end do
- end do
- end do
-
- ! Sending/receiving the data to/from bc_x%end/bc_x%end
- call MPI_SENDRECV(q_cons_buffer_out(0), &
- buff_size*sys_size*(n + 1)*(p + 1), &
- MPI_DOUBLE_PRECISION, bc_x%end, 0, &
- q_cons_buffer_in(0), &
- buff_size*sys_size*(n + 1)*(p + 1), &
- MPI_DOUBLE_PRECISION, bc_x%end, 1, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
- ierr)
-
- end if
-
- ! Unpacking the data received from bc_x%end
- do l = 0, p
- do k = 0, n
- do j = m + 1, m + buff_size
- do i = 1, sys_size
- r = (i - 1) + sys_size*(j - m - 1) &
- + sys_size*buff_size*k &
- + sys_size*buff_size*(n + 1)*l
- q_cons_vf(i)%sf(j, k, l) = q_cons_buffer_in(r)
-#if defined(__INTEL_COMPILER)
- if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then
- print *, "Error", j, k, l, i
- error stop "NaN(s) in recv"
- end if
-#endif
- end do
- end do
- end do
- end do
-
- end if
-
- ! END: Communications in the x-direction ===========================
-
- ! Communications in the y-direction ================================
-
- elseif (sweep_coord == 'y') then
-
- if (pbc_loc == 'beg') then ! Buffer region at the beginning
-
- ! PBC at both ends of the sub-domain
- if (bc_y%end >= 0) then
-
- ! Packing the data to be sent to bc_y%end
- do l = 0, p
- do k = n - buff_size + 1, n
- do j = -buff_size, m + buff_size
- do i = 1, sys_size
- r = sys_size*(j + buff_size) &
- + sys_size*(m + 2*buff_size + 1)* &
- (k - n + buff_size - 1) + (i - 1) &
- + sys_size*(m + 2*buff_size + 1)* &
- buff_size*l
- q_cons_buffer_out(r) = &
- q_cons_vf(i)%sf(j, k, l)
- end do
- end do
- end do
- end do
-
- ! Sending/receiving the data to/from bc_y%end/bc_y%beg
- call MPI_SENDRECV(q_cons_buffer_out(0), buff_size* &
- sys_size*(m + 2*buff_size + 1)* &
- (p + 1), MPI_DOUBLE_PRECISION, &
- bc_y%end, 0, q_cons_buffer_in(0), &
- buff_size*sys_size* &
- (m + 2*buff_size + 1)*(p + 1), &
- MPI_DOUBLE_PRECISION, bc_y%beg, 0, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
- ierr)
-
- ! PBC only at beginning of the sub-domain
- else
-
- ! Packing the data to be sent to bc_y%beg
- do l = 0, p
- do k = 0, buff_size - 1
- do j = -buff_size, m + buff_size
- do i = 1, sys_size
- r = sys_size*(j + buff_size) &
- + sys_size*(m + 2*buff_size + 1)*k &
- + sys_size*(m + 2*buff_size + 1)* &
- buff_size*l + (i - 1)
- q_cons_buffer_out(r) = &
- q_cons_vf(i)%sf(j, k, l)
- end do
- end do
- end do
- end do
-
- ! Sending/receiving the data to/from bc_y%beg/bc_y%beg
- call MPI_SENDRECV(q_cons_buffer_out(0), buff_size* &
- sys_size*(m + 2*buff_size + 1)* &
- (p + 1), MPI_DOUBLE_PRECISION, &
- bc_y%beg, 1, q_cons_buffer_in(0), &
- buff_size*sys_size* &
- (m + 2*buff_size + 1)*(p + 1), &
- MPI_DOUBLE_PRECISION, bc_y%beg, 0, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
- ierr)
-
- end if
-
- ! Unpacking the data received from bc_y%beg
- do l = 0, p
- do k = -buff_size, -1
- do j = -buff_size, m + buff_size
- do i = 1, sys_size
- r = (i - 1) + sys_size*(j + buff_size) &
- + sys_size*(m + 2*buff_size + 1)* &
- (k + buff_size) + sys_size* &
- (m + 2*buff_size + 1)*buff_size*l
- q_cons_vf(i)%sf(j, k, l) = q_cons_buffer_in(r)
-#if defined(__INTEL_COMPILER)
- if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then
- print *, "Error", j, k, l, i
- error stop "NaN(s) in recv"
- end if
-#endif
- end do
- end do
- end do
- end do
-
- else ! Buffer region at the end
-
- ! PBC at both ends of the sub-domain
- if (bc_y%beg >= 0) then
-
- ! Packing the data to be sent to bc_y%beg
- do l = 0, p
- do k = 0, buff_size - 1
- do j = -buff_size, m + buff_size
- do i = 1, sys_size
- r = sys_size*(j + buff_size) &
- + sys_size*(m + 2*buff_size + 1)*k &
- + sys_size*(m + 2*buff_size + 1)* &
- buff_size*l + (i - 1)
- q_cons_buffer_out(r) = &
- q_cons_vf(i)%sf(j, k, l)
- end do
- end do
- end do
- end do
-
- ! Sending/receiving the data to/from bc_y%beg/bc_y%end
- call MPI_SENDRECV(q_cons_buffer_out(0), buff_size* &
- sys_size*(m + 2*buff_size + 1)* &
- (p + 1), MPI_DOUBLE_PRECISION, &
- bc_y%beg, 1, q_cons_buffer_in(0), &
- buff_size*sys_size* &
- (m + 2*buff_size + 1)*(p + 1), &
- MPI_DOUBLE_PRECISION, bc_y%end, 1, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
- ierr)
-
- ! PBC only at end of the sub-domain
- else
-
- ! Packing the data to be sent to bc_y%end
- do l = 0, p
- do k = n - buff_size + 1, n
- do j = -buff_size, m + buff_size
- do i = 1, sys_size
- r = sys_size*(j + buff_size) &
- + sys_size*(m + 2*buff_size + 1)* &
- (k - n + buff_size - 1) + (i - 1) &
- + sys_size*(m + 2*buff_size + 1)* &
- buff_size*l
- q_cons_buffer_out(r) = &
- q_cons_vf(i)%sf(j, k, l)
- end do
- end do
- end do
- end do
-
- ! Sending/receiving the data to/from bc_y%end/bc_y%end
- call MPI_SENDRECV(q_cons_buffer_out(0), buff_size* &
- sys_size*(m + 2*buff_size + 1)* &
- (p + 1), MPI_DOUBLE_PRECISION, &
- bc_y%end, 0, q_cons_buffer_in(0), &
- buff_size*sys_size* &
- (m + 2*buff_size + 1)*(p + 1), &
- MPI_DOUBLE_PRECISION, bc_y%end, 1, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
- ierr)
-
- end if
-
- ! Unpacking the data received form bc_y%end
- do l = 0, p
- do k = n + 1, n + buff_size
- do j = -buff_size, m + buff_size
- do i = 1, sys_size
- r = (i - 1) + sys_size*(j + buff_size) &
- + sys_size*(m + 2*buff_size + 1)* &
- (k - n - 1) + sys_size* &
- (m + 2*buff_size + 1)*buff_size*l
- q_cons_vf(i)%sf(j, k, l) = q_cons_buffer_in(r)
-#if defined(__INTEL_COMPILER)
- if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then
- print *, "Error", j, k, l, i
- error stop "NaN(s) in recv"
- end if
-#endif
- end do
- end do
- end do
- end do
-
- end if
-
- ! END: Communications in the y-direction ===========================
-
- ! Communications in the z-direction ================================
-
- else
-
- if (pbc_loc == 'beg') then ! Buffer region at the beginning
-
- ! PBC at both ends of the sub-domain
- if (bc_z%end >= 0) then
-
- ! Packing the data to be sent to bc_z%end
- do l = p - buff_size + 1, p
- do k = -buff_size, n + buff_size
- do j = -buff_size, m + buff_size
- do i = 1, sys_size
- r = sys_size*(j + buff_size) &
- + sys_size*(m + 2*buff_size + 1)* &
- (k + buff_size) + sys_size* &
- (m + 2*buff_size + 1)* &
- (n + 2*buff_size + 1)* &
- (l - p + buff_size - 1) + (i - 1)
- q_cons_buffer_out(r) = &
- q_cons_vf(i)%sf(j, k, l)
- end do
- end do
- end do
- end do
-
- ! Sending/receiving the data to/from bc_z%end/bc_z%beg
- call MPI_SENDRECV(q_cons_buffer_out(0), buff_size* &
- sys_size*(m + 2*buff_size + 1)* &
- (n + 2*buff_size + 1), &
- MPI_DOUBLE_PRECISION, bc_z%end, 0, &
- q_cons_buffer_in(0), buff_size* &
- sys_size*(m + 2*buff_size + 1)* &
- (n + 2*buff_size + 1), &
- MPI_DOUBLE_PRECISION, bc_z%beg, 0, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
- ierr)
-
- ! PBC only at beginning of the sub-domain
- else
-
- ! Packing the data to be sent to bc_z%beg
- do l = 0, buff_size - 1
- do k = -buff_size, n + buff_size
- do j = -buff_size, m + buff_size
- do i = 1, sys_size
- r = sys_size*(j + buff_size) &
- + sys_size*(m + 2*buff_size + 1)* &
- (k + buff_size) + (i - 1) &
- + sys_size*(m + 2*buff_size + 1)* &
- (n + 2*buff_size + 1)*l
- q_cons_buffer_out(r) = &
- q_cons_vf(i)%sf(j, k, l)
- end do
- end do
- end do
- end do
-
- ! Sending/receiving the data to/from bc_z%beg/bc_z%beg
- call MPI_SENDRECV(q_cons_buffer_out(0), buff_size* &
- sys_size*(m + 2*buff_size + 1)* &
- (n + 2*buff_size + 1), &
- MPI_DOUBLE_PRECISION, bc_z%beg, 1, &
- q_cons_buffer_in(0), buff_size* &
- sys_size*(m + 2*buff_size + 1)* &
- (n + 2*buff_size + 1), &
- MPI_DOUBLE_PRECISION, bc_z%beg, 0, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
- ierr)
-
- end if
-
- ! Unpacking the data from bc_z%beg
- do l = -buff_size, -1
- do k = -buff_size, n + buff_size
- do j = -buff_size, m + buff_size
- do i = 1, sys_size
- r = sys_size*(j + buff_size) &
- + sys_size*(m + 2*buff_size + 1)* &
- (k + buff_size) + (i - 1) &
- + sys_size*(m + 2*buff_size + 1)* &
- (n + 2*buff_size + 1)*(l + buff_size)
- q_cons_vf(i)%sf(j, k, l) = q_cons_buffer_in(r)
-#if defined(__INTEL_COMPILER)
- if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then
- print *, "Error", j, k, l, i
- error stop "NaN(s) in recv"
- end if
-#endif
- end do
- end do
- end do
- end do
-
- else ! Buffer region at the end
-
- ! PBC at both ends of the sub-domain
- if (bc_z%beg >= 0) then
-
- ! Packing the data to be sent to bc_z%beg
- do l = 0, buff_size - 1
- do k = -buff_size, n + buff_size
- do j = -buff_size, m + buff_size
- do i = 1, sys_size
- r = sys_size*(j + buff_size) &
- + sys_size*(m + 2*buff_size + 1)* &
- (k + buff_size) + (i - 1) &
- + sys_size*(m + 2*buff_size + 1)* &
- (n + 2*buff_size + 1)*l
- q_cons_buffer_out(r) = &
- q_cons_vf(i)%sf(j, k, l)
- end do
- end do
- end do
- end do
-
- ! Sending/receiving the data to/from bc_z%beg/bc_z%end
- call MPI_SENDRECV(q_cons_buffer_out(0), buff_size* &
- sys_size*(m + 2*buff_size + 1)* &
- (n + 2*buff_size + 1), &
- MPI_DOUBLE_PRECISION, bc_z%beg, 1, &
- q_cons_buffer_in(0), buff_size* &
- sys_size*(m + 2*buff_size + 1)* &
- (n + 2*buff_size + 1), &
- MPI_DOUBLE_PRECISION, bc_z%end, 1, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
- ierr)
-
- ! PBC only at end of the sub-domain
- else
-
- ! Packing the data to be sent to bc_z%end
- do l = p - buff_size + 1, p
- do k = -buff_size, n + buff_size
- do j = -buff_size, m + buff_size
- do i = 1, sys_size
- r = sys_size*(j + buff_size) &
- + sys_size*(m + 2*buff_size + 1)* &
- (k + buff_size) + sys_size* &
- (m + 2*buff_size + 1)* &
- (n + 2*buff_size + 1)* &
- (l - p + buff_size - 1) + (i - 1)
- q_cons_buffer_out(r) = &
- q_cons_vf(i)%sf(j, k, l)
- end do
- end do
- end do
- end do
-
- ! Sending/receiving the data to/from bc_z%end/bc_z%end
- call MPI_SENDRECV(q_cons_buffer_out(0), buff_size* &
- sys_size*(m + 2*buff_size + 1)* &
- (n + 2*buff_size + 1), &
- MPI_DOUBLE_PRECISION, bc_z%end, 0, &
- q_cons_buffer_in(0), buff_size* &
- sys_size*(m + 2*buff_size + 1)* &
- (n + 2*buff_size + 1), &
- MPI_DOUBLE_PRECISION, bc_z%end, 1, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, &
- ierr)
-
- end if
-
- ! Unpacking the data received from bc_z%end
- do l = p + 1, p + buff_size
- do k = -buff_size, n + buff_size
- do j = -buff_size, m + buff_size
- do i = 1, sys_size
- r = sys_size*(j + buff_size) &
- + sys_size*(m + 2*buff_size + 1)* &
- (k + buff_size) + (i - 1) &
- + sys_size*(m + 2*buff_size + 1)* &
- (n + 2*buff_size + 1)*(l - p - 1)
- q_cons_vf(i)%sf(j, k, l) = q_cons_buffer_in(r)
-#if defined(__INTEL_COMPILER)
- if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then
- print *, "Error", j, k, l, i
- error stop "NaN(s) in recv"
- end if
-#endif
- end do
- end do
- end do
- end do
-
- end if
-
- end if
-
- ! END: Communications in the z-direction ===========================
-
-#endif
-
- end subroutine s_mpi_sendrecv_cons_vars_buffer_regions
-
!> This subroutine gathers the Silo database metadata for
!! the spatial extents in order to boost the performance of
!! the multidimensional visualization.
diff --git a/src/post_process/m_start_up.f90 b/src/post_process/m_start_up.f90
index 301e385d33..97e7062dc7 100644
--- a/src/post_process/m_start_up.f90
+++ b/src/post_process/m_start_up.f90
@@ -39,6 +39,8 @@ module m_start_up
use m_finite_differences
+ use m_boundary_conditions_common
+
! ==========================================================================
implicit none
@@ -66,9 +68,12 @@ subroutine s_read_input_file
namelist /user_inputs/ case_dir, m, n, p, t_step_start, &
t_step_stop, t_step_save, model_eqns, &
num_fluids, mpp_lim, &
- weno_order, bc_x, &
- bc_y, bc_z, fluid_pp, format, precision, &
- hypoelasticity, G, &
+ weno_order, &
+ bc_x, bc_y, bc_z, &
+ num_bc_patches, patch_bc, &
+ fluid_pp, format, precision, &
+ x_domain, y_domain, z_domain, &
+ hypoelasticity, rdma_mpi, G, &
chem_wrt_Y, chem_wrt_T, avg_state, &
alpha_rho_wrt, rho_wrt, mom_wrt, vel_wrt, &
E_wrt, pres_wrt, alpha_wrt, gamma_wrt, &
@@ -170,12 +175,12 @@ subroutine s_perform_time_step(t_step)
call s_read_data_files(t_step)
! Populating the buffer regions of the grid variables
if (buff_size > 0) then
- call s_populate_grid_variables_buffer_regions()
+ call s_mpi_sendrecv_grid_spacing_buffers(bc_id_sfs)
end if
! Populating the buffer regions of the conservative variables
if (buff_size > 0) then
- call s_populate_conservative_variables_buffer_regions()
+ call s_populate_cons_buffers(q_cons_vf)
end if
! Converting the conservative variables to the primitive ones
@@ -679,13 +684,17 @@ subroutine s_initialize_modules
! Computation of parameters, allocation procedures, and/or any other tasks
! needed to properly setup the modules
call s_initialize_global_parameters_module()
+ call s_initialize_boundary_conditions_module()
if (bubbles .and. nb > 1) then
call s_simpson
end if
if (bubbles .and. .not. polytropic) then
call s_initialize_nonpoly()
end if
- if (num_procs > 1) call s_initialize_mpi_proxy_module()
+ if (num_procs > 1) then
+ call s_initialize_mpi_common_module()
+ call s_initialize_mpi_proxy_module()
+ end if
call s_initialize_variables_conversion_module()
call s_initialize_data_input_module()
call s_initialize_derived_variables_module()
@@ -733,7 +742,10 @@ subroutine s_finalize_modules
call s_finalize_derived_variables_module()
call s_finalize_data_input_module()
call s_finalize_variables_conversion_module()
- if (num_procs > 1) call s_finalize_mpi_proxy_module()
+ if (num_procs > 1) then
+ call s_finalize_mpi_proxy_module()
+ call s_finalize_mpi_common_module()
+ end if
call s_finalize_global_parameters_module()
! Finalizing the MPI environment
diff --git a/src/pre_process/m_compute_levelset.fpp b/src/pre_process/m_compute_levelset.fpp
index 18faf7a469..499a88cc97 100644
--- a/src/pre_process/m_compute_levelset.fpp
+++ b/src/pre_process/m_compute_levelset.fpp
@@ -315,23 +315,25 @@ contains
if (side_dists(2) == 0) then
levelset_norm%sf(i, j, 0, ib_patch_id, 1) = 0d0
else
- levelset_norm%sf(i, j, 0, ib_patch_id, 1) = side_dists(2)/ &
+ levelset_norm%sf(i, j, 0, ib_patch_id, 1) = -side_dists(2)/ &
abs(side_dists(2))
end if
else if (min_dist == abs(side_dists(3))) then
+ levelset%sf(i, j, 0, ib_patch_id) = side_dists(3)
if (side_dists(3) == 0) then
- levelset_norm%sf(i, j, 0, ib_patch_id, 1) = 0d0
+ levelset_norm%sf(i, j, 0, ib_patch_id, 2) = 0d0
else
- levelset_norm%sf(i, j, 0, ib_patch_id, 1) = side_dists(3)/ &
+ levelset_norm%sf(i, j, 0, ib_patch_id, 2) = side_dists(3)/ &
abs(side_dists(3))
end if
else if (min_dist == abs(side_dists(4))) then
+ levelset%sf(i, j, 0, ib_patch_id) = side_dists(4)
if (side_dists(4) == 0) then
- levelset_norm%sf(i, j, 0, ib_patch_id, 1) = 0d0
+ levelset_norm%sf(i, j, 0, ib_patch_id, 2) = 0d0
else
- levelset_norm%sf(i, j, 0, ib_patch_id, 1) = side_dists(4)/ &
+ levelset_norm%sf(i, j, 0, ib_patch_id, 2) = -side_dists(4)/ &
abs(side_dists(4))
end if
diff --git a/src/pre_process/m_data_output.fpp b/src/pre_process/m_data_output.fpp
index 8c6147adef..541b49eba5 100644
--- a/src/pre_process/m_data_output.fpp
+++ b/src/pre_process/m_data_output.fpp
@@ -32,6 +32,8 @@ module m_data_output
use m_thermochem, only: species_names
+ use m_boundary_conditions_common
+
! ==========================================================================
implicit none
@@ -124,6 +126,8 @@ contains
integer :: i, j, k, l, r, c, dir !< Generic loop iterator
integer :: t_step
+ integer :: iter_dir, iter_loc
+
real(kind(0d0)), dimension(nb) :: nRtmp !< Temporary bubble concentration
real(kind(0d0)) :: nbub !< Temporary bubble number density
real(kind(0d0)) :: gamma, lit_gamma, pi_inf, qv !< Temporary EOS params
@@ -145,6 +149,9 @@ contains
status = 'new'
end if
+ ! Boundary Conditions
+ call s_write_boundary_condition_files(t_step_dir)
+
! x-coordinate direction
file_loc = trim(t_step_dir)//'/x_cb.dat'
open (1, FILE=trim(file_loc), FORM='unformatted', STATUS=status)
@@ -561,6 +568,8 @@ contains
! Generic loop iterator
integer :: i
+ call s_write_boundary_condition_files(trim(case_dir)//'/restart_data')
+
if (file_per_process) then
if (proc_rank == 0) then
file_loc = trim(case_dir)//'/restart_data/lustre_0'
diff --git a/src/pre_process/m_global_parameters.fpp b/src/pre_process/m_global_parameters.fpp
index ef240f21d2..a4ae6b801b 100644
--- a/src/pre_process/m_global_parameters.fpp
+++ b/src/pre_process/m_global_parameters.fpp
@@ -20,6 +20,8 @@ module m_global_parameters
use m_thermochem, only: num_species
+ use m_global_parameters_common
+
! ==========================================================================
implicit none
@@ -92,6 +94,8 @@ module m_global_parameters
logical :: hypoelasticity !< activate hypoelasticity
logical, parameter :: chemistry = .${chemistry}$. !< Chemistry modeling
+ logical :: rdma_mpi !< Use RDMA for MPI communication
+
! Annotations of the structure, i.e. the organization, of the state vectors
type(int_bounds_info) :: cont_idx !< Indexes of first & last continuity eqns.
type(int_bounds_info) :: mom_idx !< Indexes of first & last momentum eqns.
@@ -118,6 +122,8 @@ module m_global_parameters
type(int_bounds_info) :: idwbuff(1:3)
type(int_bounds_info) :: bc_x, bc_y, bc_z !<
+ integer :: num_bc_patches
+ type(bc_patch_parameters) :: patch_bc(num_bc_patches_max)
!! Boundary conditions in the x-, y- and z-coordinate directions
logical :: parallel_io !< Format of the data files
@@ -139,9 +145,12 @@ module m_global_parameters
integer :: perturb_sph_fluid !< Fluid to be perturbed with perturb_sph flag
real(kind(0d0)), dimension(num_fluids_max) :: fluid_rho
- integer, allocatable, dimension(:) :: proc_coords !<
+ integer, dimension(1:3) :: proc_coords !<
!! Processor coordinates in MPI_CART_COMM
+ integer, dimension(1:3) :: proc_nums
+ !! Processor dimensions in MPI_CART_COMM
+
integer, allocatable, dimension(:) :: start_idx !<
!! Starting cell-center index of local processor in global grid
@@ -309,16 +318,15 @@ contains
weno_order = dflt_int
hypoelasticity = .false.
+ rdma_mpi = .false.
bc_x%beg = dflt_int; bc_x%end = dflt_int
bc_y%beg = dflt_int; bc_y%end = dflt_int
bc_z%beg = dflt_int; bc_z%end = dflt_int
#:for DIM in ['x', 'y', 'z']
- #:for DIR in [1, 2, 3]
- bc_${DIM}$%vb${DIR}$ = 0d0
- bc_${DIM}$%ve${DIR}$ = 0d0
- #:endfor
+ bc_${DIM}$%vel_beg = 0d0
+ bc_${DIM}$%vel_end = 0d0
#:endfor
parallel_io = .false.
@@ -386,6 +394,8 @@ contains
end if
end do
+ call s_bc_assign_default_values_to_user_inputs(num_bc_patches, patch_bc)
+
! Tait EOS
rhoref = dflt_real
pref = dflt_real
@@ -793,8 +803,6 @@ contains
num_dims = 1 + min(1, n) + min(1, p)
- allocate (proc_coords(1:num_dims))
-
if (parallel_io .neqv. .true.) return
#ifdef MFC_MPI
@@ -830,8 +838,6 @@ contains
end if
end if
- deallocate (proc_coords)
-
#ifdef MFC_MPI
if (parallel_io) then
diff --git a/src/pre_process/m_initial_condition.fpp b/src/pre_process/m_initial_condition.fpp
index dc51829069..13848be7e5 100644
--- a/src/pre_process/m_initial_condition.fpp
+++ b/src/pre_process/m_initial_condition.fpp
@@ -142,7 +142,7 @@ contains
do i = 1, num_patches
if (proc_rank == 0) then
- print *, 'Processing patch', i
+ print *, 'Processing 3D patch', i
end if
!> ICPP Patches
@@ -218,7 +218,7 @@ contains
do i = 1, num_patches
if (proc_rank == 0) then
- print *, 'Processing patch', i
+ print *, 'Processing 2D patch', i
end if
!> ICPP Patches
@@ -296,7 +296,7 @@ contains
do i = 1, num_patches
if (proc_rank == 0) then
- print *, 'Processing patch', i
+ print *, 'Processing 1D patch', i
end if
! Line segment patch
diff --git a/src/pre_process/m_mpi_proxy.fpp b/src/pre_process/m_mpi_proxy.fpp
index 33895c7c4c..b84610bd9e 100644
--- a/src/pre_process/m_mpi_proxy.fpp
+++ b/src/pre_process/m_mpi_proxy.fpp
@@ -72,6 +72,25 @@ contains
call MPI_BCAST(${VAR}$, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
#:endfor
+ #:for VAR in ['x', 'y', 'z']
+ call MPI_BCAST(bc_${VAR}$%vel_beg, 3, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+ call MPI_BCAST(bc_${VAR}$%vel_end, 3, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+ #:endfor
+
+ do i = 1, num_bc_patches_max
+ #:for VAR in [ 'type', 'dir', 'loc', 'geometry' ]
+ call MPI_BCAST(patch_bc(i)%${VAR}$, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+ #:endfor
+
+ #:for VAR in [ 'centroid', 'length', 'vel' ]
+ call MPI_BCAST(patch_bc(i)%${VAR}$, size(patch_bc(i)%${VAR}$), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+ #:endfor
+
+ #:for VAR in [ 'radius' ]
+ call MPI_BCAST(patch_bc(i)%${VAR}$, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+ #:endfor
+ end do
+
do i = 1, num_patches_max
#:for VAR in [ 'geometry', 'smooth_patch_id']
call MPI_BCAST(patch_icpp(i)%${VAR}$, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
@@ -116,421 +135,4 @@ contains
end subroutine s_mpi_bcast_user_inputs
- !> Description: This subroutine takes care of efficiently distributing
- !! the computational domain among the available processors
- !! as well as recomputing some of the global parameters so
- !! that they reflect the configuration of sub-domain that is
- !! overseen by the local processor.
- subroutine s_mpi_decompose_computational_domain
-
-#ifdef MFC_MPI
-
- ! # of processors in the x-, y- and z-coordinate directions
- integer :: num_procs_x, num_procs_y, num_procs_z
-
- ! Temporary # of processors in x-, y- and z-coordinate directions
- ! used during the processor factorization optimization procedure
- real(kind(0d0)) :: tmp_num_procs_x, tmp_num_procs_y, tmp_num_procs_z
-
- ! Processor factorization (fct) minimization parameter
- real(kind(0d0)) :: fct_min
-
- ! Cartesian processor topology communicator
- integer :: MPI_COMM_CART
-
- ! Number of remaining cells for a particular coordinate direction
- ! after the bulk has evenly been distributed among the available
- ! processors for that coordinate direction
- integer :: rem_cells
-
- ! Generic loop iterators
- integer :: i, j
-
- if (num_procs == 1 .and. parallel_io) then
- do i = 1, num_dims
- start_idx(i) = 0
- end do
- return
- end if
-
- ! Performing the computational domain decomposition. The procedure
- ! is optimized by ensuring that each processor contains a close to
- ! equivalent piece of the computational domain. Note that explicit
- ! type-casting is omitted here for code legibility purposes.
-
- ! Generating 3D Cartesian Processor Topology =======================
-
- if (n > 0) then
-
- if (p > 0) then
-
- if (cyl_coord .and. p > 0) then
- ! Implement pencil processor blocking if using cylindrical coordinates so
- ! that all cells in azimuthal direction are stored on a single processor.
- ! This is necessary for efficient application of Fourier filter near axis.
-
- ! Initial values of the processor factorization optimization
- num_procs_x = 1
- num_procs_y = num_procs
- num_procs_z = 1
- ierr = -1
-
- ! Computing minimization variable for these initial values
- tmp_num_procs_x = num_procs_x
- tmp_num_procs_y = num_procs_y
- tmp_num_procs_z = num_procs_z
- fct_min = 10d0*abs((m + 1)/tmp_num_procs_x &
- - (n + 1)/tmp_num_procs_y)
-
- ! Searching for optimal computational domain distribution
- do i = 1, num_procs
-
- if (mod(num_procs, i) == 0 &
- .and. &
- (m + 1)/i >= num_stcls_min*weno_order) then
-
- tmp_num_procs_x = i
- tmp_num_procs_y = num_procs/i
-
- if (fct_min >= abs((m + 1)/tmp_num_procs_x &
- - (n + 1)/tmp_num_procs_y) &
- .and. &
- (n + 1)/tmp_num_procs_y &
- >= &
- num_stcls_min*weno_order) then
-
- num_procs_x = i
- num_procs_y = num_procs/i
- fct_min = abs((m + 1)/tmp_num_procs_x &
- - (n + 1)/tmp_num_procs_y)
- ierr = 0
-
- end if
-
- end if
-
- end do
-
- else
-
- ! Initial values of the processor factorization optimization
- num_procs_x = 1
- num_procs_y = 1
- num_procs_z = num_procs
- ierr = -1
-
- ! Computing minimization variable for these initial values
- tmp_num_procs_x = num_procs_x
- tmp_num_procs_y = num_procs_y
- tmp_num_procs_z = num_procs_z
- fct_min = 10d0*abs((m + 1)/tmp_num_procs_x &
- - (n + 1)/tmp_num_procs_y) &
- + 10d0*abs((n + 1)/tmp_num_procs_y &
- - (p + 1)/tmp_num_procs_z)
-
- ! Searching for optimal computational domain distribution
- do i = 1, num_procs
-
- if (mod(num_procs, i) == 0 &
- .and. &
- (m + 1)/i >= num_stcls_min*weno_order) then
-
- do j = 1, (num_procs/i)
-
- if (mod(num_procs/i, j) == 0 &
- .and. &
- (n + 1)/j >= num_stcls_min*weno_order) then
-
- tmp_num_procs_x = i
- tmp_num_procs_y = j
- tmp_num_procs_z = num_procs/(i*j)
-
- if (fct_min >= abs((m + 1)/tmp_num_procs_x &
- - (n + 1)/tmp_num_procs_y) &
- + abs((n + 1)/tmp_num_procs_y &
- - (p + 1)/tmp_num_procs_z) &
- .and. &
- (p + 1)/tmp_num_procs_z &
- >= &
- num_stcls_min*weno_order) &
- then
-
- num_procs_x = i
- num_procs_y = j
- num_procs_z = num_procs/(i*j)
- fct_min = abs((m + 1)/tmp_num_procs_x &
- - (n + 1)/tmp_num_procs_y) &
- + abs((n + 1)/tmp_num_procs_y &
- - (p + 1)/tmp_num_procs_z)
- ierr = 0
-
- end if
-
- end if
-
- end do
-
- end if
-
- end do
-
- end if
-
- ! Checking whether the decomposition of the computational
- ! domain was successful
- if (proc_rank == 0 .and. ierr == -1) then
- print '(A)', 'Unable to decompose computational '// &
- 'domain for selected number of '// &
- 'processors. Exiting ...'
- call MPI_ABORT(MPI_COMM_WORLD, 1, ierr)
- end if
-
- ! Creating a new communicator using Cartesian topology
- call MPI_CART_CREATE(MPI_COMM_WORLD, 3, (/num_procs_x, &
- num_procs_y, num_procs_z/), &
- (/.true., .true., .true./), &
- .false., MPI_COMM_CART, ierr)
-
- ! Finding corresponding Cartesian coordinates of the local
- ! processor rank in newly declared cartesian communicator
- call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 3, &
- proc_coords, ierr)
-
- ! END: Generating 3D Cartesian Processor Topology ==================
-
- ! Sub-domain Global Parameters in z-direction ======================
-
- ! Number of remaining cells after majority is distributed
- rem_cells = mod(p + 1, num_procs_z)
-
- ! Preliminary uniform cell-width spacing
- if (old_grid .neqv. .true.) then
- dz = (z_domain%end - z_domain%beg)/real(p + 1, kind(0d0))
- end if
-
- ! Optimal number of cells per processor
- p = (p + 1)/num_procs_z - 1
-
- ! Distributing any remaining cells
- do i = 1, rem_cells
- if (proc_coords(3) == i - 1) then
- p = p + 1
- exit
- end if
- end do
-
- ! Beginning and end sub-domain boundary locations
- if (parallel_io .neqv. .true.) then
- if (old_grid .neqv. .true.) then
- if (proc_coords(3) < rem_cells) then
- z_domain%beg = z_domain%beg + dz*real((p + 1)* &
- proc_coords(3))
- z_domain%end = z_domain%end - dz*real((p + 1)* &
- (num_procs_z - proc_coords(3) - 1) &
- - (num_procs_z - rem_cells))
- else
- z_domain%beg = z_domain%beg + dz*real((p + 1)* &
- proc_coords(3) + rem_cells)
- z_domain%end = z_domain%end - dz*real((p + 1)* &
- (num_procs_z - proc_coords(3) - 1))
- end if
- end if
- else
- if (proc_coords(3) < rem_cells) then
- start_idx(3) = (p + 1)*proc_coords(3)
- else
- start_idx(3) = (p + 1)*proc_coords(3) + rem_cells
- end if
- end if
-
- ! ==================================================================
-
- ! Generating 2D Cartesian Processor Topology =======================
-
- else
-
- ! Initial values of the processor factorization optimization
- num_procs_x = 1
- num_procs_y = num_procs
- ierr = -1
-
- ! Computing minimization variable for these initial values
- tmp_num_procs_x = num_procs_x
- tmp_num_procs_y = num_procs_y
- fct_min = 10d0*abs((m + 1)/tmp_num_procs_x &
- - (n + 1)/tmp_num_procs_y)
-
- ! Searching for optimal computational domain distribution
- do i = 1, num_procs
-
- if (mod(num_procs, i) == 0 &
- .and. &
- (m + 1)/i >= num_stcls_min*weno_order) then
-
- tmp_num_procs_x = i
- tmp_num_procs_y = num_procs/i
-
- if (fct_min >= abs((m + 1)/tmp_num_procs_x &
- - (n + 1)/tmp_num_procs_y) &
- .and. &
- (n + 1)/tmp_num_procs_y &
- >= &
- num_stcls_min*weno_order) then
-
- num_procs_x = i
- num_procs_y = num_procs/i
- fct_min = abs((m + 1)/tmp_num_procs_x &
- - (n + 1)/tmp_num_procs_y)
- ierr = 0
-
- end if
-
- end if
-
- end do
-
- ! Checking whether the decomposition of the computational
- ! domain was successful
- if (proc_rank == 0 .and. ierr == -1) then
- print '(A)', 'Unable to decompose computational '// &
- 'domain for selected number of '// &
- 'processors. Exiting ...'
- call MPI_ABORT(MPI_COMM_WORLD, 1, ierr)
- end if
-
- ! Creating a new communicator using Cartesian topology
- call MPI_CART_CREATE(MPI_COMM_WORLD, 2, (/num_procs_x, &
- num_procs_y/), (/.true., &
- .true./), .false., MPI_COMM_CART, &
- ierr)
-
- ! Finding corresponding Cartesian coordinates of the local
- ! processor rank in newly declared cartesian communicator
- call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 2, &
- proc_coords, ierr)
-
- end if
-
- ! END: Generating 2D Cartesian Processor Topology ==================
-
- ! Sub-domain Global Parameters in y-direction ======================
-
- ! Number of remaining cells after majority has been distributed
- rem_cells = mod(n + 1, num_procs_y)
-
- ! Preliminary uniform cell-width spacing
- if (old_grid .neqv. .true.) then
- dy = (y_domain%end - y_domain%beg)/real(n + 1, kind(0d0))
- end if
-
- ! Optimal number of cells per processor
- n = (n + 1)/num_procs_y - 1
-
- ! Distributing any remaining cells
- do i = 1, rem_cells
- if (proc_coords(2) == i - 1) then
- n = n + 1
- exit
- end if
- end do
-
- ! Beginning and end sub-domain boundary locations
- if (parallel_io .neqv. .true.) then
- if (old_grid .neqv. .true.) then
- if (proc_coords(2) < rem_cells) then
- y_domain%beg = y_domain%beg + dy*real((n + 1)* &
- proc_coords(2))
- y_domain%end = y_domain%end - dy*real((n + 1)* &
- (num_procs_y - proc_coords(2) - 1) &
- - (num_procs_y - rem_cells))
- else
- y_domain%beg = y_domain%beg + dy*real((n + 1)* &
- proc_coords(2) + rem_cells)
- y_domain%end = y_domain%end - dy*real((n + 1)* &
- (num_procs_y - proc_coords(2) - 1))
- end if
- end if
- else
- if (proc_coords(2) < rem_cells) then
- start_idx(2) = (n + 1)*proc_coords(2)
- else
- start_idx(2) = (n + 1)*proc_coords(2) + rem_cells
- end if
- end if
-
- ! ==================================================================
-
- ! Generating 1D Cartesian Processor Topology =======================
-
- else
-
- ! Number of processors in the coordinate direction is equal to
- ! the total number of processors available
- num_procs_x = num_procs
-
- ! Creating a new communicator using Cartesian topology
- call MPI_CART_CREATE(MPI_COMM_WORLD, 1, (/num_procs_x/), &
- (/.true./), .false., MPI_COMM_CART, &
- ierr)
-
- ! Finding the corresponding Cartesian coordinates of the local
- ! processor rank in the newly declared cartesian communicator
- call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 1, &
- proc_coords, ierr)
-
- end if
-
- ! ==================================================================
-
- ! Sub-domain Global Parameters in x-direction ======================
-
- ! Number of remaining cells after majority has been distributed
- rem_cells = mod(m + 1, num_procs_x)
-
- ! Preliminary uniform cell-width spacing
- if (old_grid .neqv. .true.) then
- dx = (x_domain%end - x_domain%beg)/real(m + 1, kind(0d0))
- end if
-
- ! Optimal number of cells per processor
- m = (m + 1)/num_procs_x - 1
-
- ! Distributing any remaining cells
- do i = 1, rem_cells
- if (proc_coords(1) == i - 1) then
- m = m + 1
- exit
- end if
- end do
-
- ! Beginning and end sub-domain boundary locations
- if (parallel_io .neqv. .true.) then
- if (old_grid .neqv. .true.) then
- if (proc_coords(1) < rem_cells) then
- x_domain%beg = x_domain%beg + dx*real((m + 1)* &
- proc_coords(1))
- x_domain%end = x_domain%end - dx*real((m + 1)* &
- (num_procs_x - proc_coords(1) - 1) &
- - (num_procs_x - rem_cells))
- else
- x_domain%beg = x_domain%beg + dx*real((m + 1)* &
- proc_coords(1) + rem_cells)
- x_domain%end = x_domain%end - dx*real((m + 1)* &
- (num_procs_x - proc_coords(1) - 1))
- end if
- end if
- else
- if (proc_coords(1) < rem_cells) then
- start_idx(1) = (m + 1)*proc_coords(1)
- else
- start_idx(1) = (m + 1)*proc_coords(1) + rem_cells
- end if
- end if
-
- ! ==================================================================
-
-#endif
-
- end subroutine s_mpi_decompose_computational_domain
-
end module m_mpi_proxy
diff --git a/src/pre_process/m_start_up.fpp b/src/pre_process/m_start_up.fpp
index 3d78432591..6a0738438e 100644
--- a/src/pre_process/m_start_up.fpp
+++ b/src/pre_process/m_start_up.fpp
@@ -50,6 +50,8 @@ module m_start_up
use m_checker_common
use m_checker
+
+ use m_boundary_conditions_common
! ==========================================================================
implicit none
@@ -128,8 +130,8 @@ contains
stretch_x, stretch_y, stretch_z, a_x, a_y, &
a_z, x_a, y_a, z_a, x_b, y_b, z_b, &
model_eqns, num_fluids, mpp_lim, &
- weno_order, bc_x, bc_y, bc_z, num_patches, &
- hypoelasticity, patch_icpp, fluid_pp, precision, parallel_io, &
+ weno_order, bc_x, bc_y, bc_z, patch_bc, num_patches, num_bc_patches, &
+ hypoelasticity, rdma_mpi, patch_icpp, fluid_pp, precision, parallel_io, &
mixlayer_vel_profile, mixlayer_vel_coef, mixlayer_domain, &
mixlayer_perturb, &
pi_fac, perturb_flow, perturb_flow_fluid, perturb_flow_mag, &
@@ -735,6 +737,8 @@ contains
call s_mpi_abort('File '//trim(file_loc)//' is missing. Exiting... ')
end if
+ call s_read_boundary_condition_files(restart_dir)
+
if (ib) then
write (file_loc, '(A)') 'ib.dat'
@@ -768,6 +772,8 @@ contains
! Computation of parameters, allocation procedures, and/or any other tasks
! needed to properly setup the modules
call s_initialize_global_parameters_module()
+ call s_initialize_boundary_conditions_module()
+
!Quadrature weights and nodes for polydisperse simulations
if (bubbles .and. nb > 1) then
call s_simpson
@@ -823,6 +829,8 @@ contains
call s_read_grid_data_files()
call s_check_grid_data_files()
end if
+
+ call s_generate_boundary_condition_patch_buffers()
end if
end subroutine s_read_grid
@@ -922,6 +930,7 @@ contains
call s_mpi_bcast_user_inputs()
call s_initialize_parallel_io()
call s_mpi_decompose_computational_domain()
+
end subroutine s_initialize_mpi_domain
subroutine s_finalize_modules
diff --git a/src/simulation/m_boundary_conditions.fpp b/src/simulation/m_boundary_conditions.fpp
index 266be8ed00..85a85da31a 100644
--- a/src/simulation/m_boundary_conditions.fpp
+++ b/src/simulation/m_boundary_conditions.fpp
@@ -2,6 +2,8 @@
!! @file m_boundary_conditions.fpp
!! @brief Contains module m_boundary_conditions
+#:include 'macros.fpp'
+
!> @brief The purpose of the module is to apply noncharacteristic and processor
!! boundary condiitons
module m_boundary_conditions
@@ -11,1628 +13,49 @@ module m_boundary_conditions
use m_global_parameters !< Definitions of the global parameters
- use m_mpi_proxy
+ use m_mpi_proxy !< Definitions of the MPI proxy
use m_constants
+
+ use m_boundary_conditions_common
+
! ==========================================================================
implicit none
private;
- public :: s_populate_variables_buffers, &
- s_populate_capillary_buffers
+ public :: s_populate_prim_buffers, &
+ s_populate_capillary_buffers, &
+ s_initialize_boundary_conditions_module
contains
- !> The purpose of this procedure is to populate the buffers
- !! of the primitive variables, depending on the selected
- !! boundary conditions.
- subroutine s_populate_variables_buffers(q_prim_vf, pb, mv)
-
- type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf
- real(kind(0d0)), dimension(startx:, starty:, startz:, 1:, 1:), intent(inout) :: pb, mv
-
- integer :: bc_loc, bc_dir
-
- ! Population of Buffers in x-direction =============================
-
- select case (bc_x%beg)
- case (-13:-3) ! Ghost-cell extrap. BC at beginning
- call s_ghost_cell_extrapolation(q_prim_vf, pb, mv, 1, -1)
- case (-2) ! Symmetry BC at beginning
- call s_symmetry(q_prim_vf, pb, mv, 1, -1)
- case (-1) ! Periodic BC at beginning
- call s_periodic(q_prim_vf, pb, mv, 1, -1)
- case (-15) ! Slip wall BC at beginning
- call s_slip_wall(q_prim_vf, pb, mv, 1, -1)
- case (-16) ! No-slip wall BC at beginning
- call s_no_slip_wall(q_prim_vf, pb, mv, 1, -1)
- case default ! Processor BC at beginning
- call s_mpi_sendrecv_variables_buffers( &
- q_prim_vf, pb, mv, 1, -1)
- end select
-
- select case (bc_x%end)
- case (-13:-3) ! Ghost-cell extrap. BC at end
- call s_ghost_cell_extrapolation(q_prim_vf, pb, mv, 1, 1)
- case (-2) ! Symmetry BC at end
- call s_symmetry(q_prim_vf, pb, mv, 1, 1)
- case (-1) ! Periodic BC at end
- call s_periodic(q_prim_vf, pb, mv, 1, 1)
- case (-15) ! Slip wall BC at end
- call s_slip_wall(q_prim_vf, pb, mv, 1, 1)
- case (-16) ! No-slip wall bc at end
- call s_no_slip_wall(q_prim_vf, pb, mv, 1, 1)
- case default ! Processor BC at end
- call s_mpi_sendrecv_variables_buffers( &
- q_prim_vf, pb, mv, 1, 1)
- end select
-
- if (qbmm .and. .not. polytropic) then
- select case (bc_x%beg)
- case (-13:-3) ! Ghost-cell extrap. BC at beginning
- call s_qbmm_extrapolation(pb, mv, 1, -1)
- case (-15) ! Slip wall BC at beginning
- call s_qbmm_extrapolation(pb, mv, 1, -1)
- case (-16) ! No-slip wall BC at beginning
- call s_qbmm_extrapolation(pb, mv, 1, -1)
- end select
-
- select case (bc_x%end)
- case (-13:-3) ! Ghost-cell extrap. BC at end
- call s_qbmm_extrapolation(pb, mv, 1, 1)
- case (-15) ! Slip wall BC at end
- call s_qbmm_extrapolation(pb, mv, 1, 1)
- case (-16) ! No-slip wall bc at end
- call s_qbmm_extrapolation(pb, mv, 1, 1)
- end select
- end if
-
- ! END: Population of Buffers in x-direction ========================
-
- ! Population of Buffers in y-direction =============================
-
- if (n == 0) return
-
- select case (bc_y%beg)
- case (-13:-3) ! Ghost-cell extrap. BC at beginning
- call s_ghost_cell_extrapolation(q_prim_vf, pb, mv, 2, -1)
- case (-14) ! Axis BC at beginning
- call s_axis(q_prim_vf, pb, mv, 2, -1)
- case (-2) ! Symmetry BC at beginning
- call s_symmetry(q_prim_vf, pb, mv, 2, -1)
- case (-1) ! Periodic BC at beginning
- call s_periodic(q_prim_vf, pb, mv, 2, -1)
- case (-15) ! Slip wall BC at beginning
- call s_slip_wall(q_prim_vf, pb, mv, 2, -1)
- case (-16) ! No-slip wall BC at beginning
- call s_no_slip_wall(q_prim_vf, pb, mv, 2, -1)
- case default ! Processor BC at beginning
- call s_mpi_sendrecv_variables_buffers( &
- q_prim_vf, pb, mv, 2, -1)
- end select
-
- select case (bc_y%end)
- case (-13:-3) ! Ghost-cell extrap. BC at end
- call s_ghost_cell_extrapolation(q_prim_vf, pb, mv, 2, 1)
- case (-2) ! Symmetry BC at end
- call s_symmetry(q_prim_vf, pb, mv, 2, 1)
- case (-1) ! Periodic BC at end
- call s_periodic(q_prim_vf, pb, mv, 2, 1)
- case (-15) ! Slip wall BC at end
- call s_slip_wall(q_prim_vf, pb, mv, 2, 1)
- case (-16) ! No-slip wall BC at end
- call s_no_slip_wall(q_prim_vf, pb, mv, 2, 1)
- case default ! Processor BC at end
- call s_mpi_sendrecv_variables_buffers( &
- q_prim_vf, pb, mv, 2, 1)
- end select
-
- if (qbmm .and. .not. polytropic) then
-
- select case (bc_y%beg)
- case (-13:-3) ! Ghost-cell extrap. BC at beginning
- call s_qbmm_extrapolation(pb, mv, 2, -1)
- case (-15) ! Slip wall BC at beginning
- call s_qbmm_extrapolation(pb, mv, 2, -1)
- case (-16) ! No-slip wall BC at beginning
- call s_qbmm_extrapolation(pb, mv, 2, -1)
- end select
-
- select case (bc_y%end)
- case (-13:-3) ! Ghost-cell extrap. BC at end
- call s_qbmm_extrapolation(pb, mv, 2, 1)
- case (-15) ! Slip wall BC at end
- call s_qbmm_extrapolation(pb, mv, 2, 1)
- case (-16) ! No-slip wall BC at end
- call s_qbmm_extrapolation(pb, mv, 2, 1)
- end select
-
- end if
-
- ! END: Population of Buffers in y-direction ========================
-
- ! Population of Buffers in z-direction =============================
-
- if (p == 0) return
-
- select case (bc_z%beg)
- case (-13:-3) ! Ghost-cell extrap. BC at beginning
- call s_ghost_cell_extrapolation(q_prim_vf, pb, mv, 3, -1)
- case (-2) ! Symmetry BC at beginning
- call s_symmetry(q_prim_vf, pb, mv, 3, -1)
- case (-1) ! Periodic BC at beginning
- call s_periodic(q_prim_vf, pb, mv, 3, -1)
- case (-15) ! Slip wall BC at beginning
- call s_slip_wall(q_prim_vf, pb, mv, 3, -1)
- case (-16) ! No-slip wall BC at beginning
- call s_no_slip_wall(q_prim_vf, pb, mv, 3, -1)
- case default ! Processor BC at beginning
- call s_mpi_sendrecv_variables_buffers( &
- q_prim_vf, pb, mv, 3, -1)
- end select
-
- select case (bc_z%end)
- case (-13:-3) ! Ghost-cell extrap. BC at end
- call s_ghost_cell_extrapolation(q_prim_vf, pb, mv, 3, 1)
- case (-2) ! Symmetry BC at end
- call s_symmetry(q_prim_vf, pb, mv, 3, 1)
- case (-1) ! Periodic BC at end
- call s_periodic(q_prim_vf, pb, mv, 3, 1)
- case (-15) ! Slip wall BC at end
- call s_slip_wall(q_prim_vf, pb, mv, 3, 1)
- case (-16) ! No-slip wall BC at end
- call s_no_slip_wall(q_prim_vf, pb, mv, 3, 1)
- case default ! Processor BC at end
- call s_mpi_sendrecv_variables_buffers( &
- q_prim_vf, pb, mv, 3, 1)
- end select
-
- if (qbmm .and. .not. polytropic) then
-
- select case (bc_z%beg)
- case (-13:-3) ! Ghost-cell extrap. BC at beginning
- call s_qbmm_extrapolation(pb, mv, 3, -1)
- case (-15) ! Slip wall BC at beginning
- call s_qbmm_extrapolation(pb, mv, 3, -1)
- case (-16) ! No-slip wall BC at beginning
- call s_qbmm_extrapolation(pb, mv, 3, -1)
- end select
-
- select case (bc_z%end)
- case (-13:-3) ! Ghost-cell extrap. BC at end
- call s_qbmm_extrapolation(pb, mv, 3, 1)
- case (-15) ! Slip wall BC at end
- call s_qbmm_extrapolation(pb, mv, 3, 1)
- case (-16) ! No-slip wall BC at end
- call s_qbmm_extrapolation(pb, mv, 3, 1)
- end select
-
- end if
-
- ! END: Population of Buffers in z-direction ========================
-
- end subroutine s_populate_variables_buffers
-
- subroutine s_ghost_cell_extrapolation(q_prim_vf, pb, mv, bc_dir, bc_loc)
-
- type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf
- real(kind(0d0)), dimension(startx:, starty:, startz:, 1:, 1:), intent(inout) :: pb, mv
- integer, intent(in) :: bc_dir, bc_loc
- integer :: j, k, l, q, i
-
- !< x-direction =========================================================
- if (bc_dir == 1) then !< x-direction
-
- if (bc_loc == -1) then !bc_x%beg
-
- !$acc parallel loop collapse(4) gang vector default(present)
- do i = 1, sys_size
- do l = 0, p
- do k = 0, n
- do j = 1, buff_size
- q_prim_vf(i)%sf(-j, k, l) = &
- q_prim_vf(i)%sf(0, k, l)
- end do
- end do
- end do
- end do
-
- else !< bc_x%end
-
- !$acc parallel loop collapse(4) gang vector default(present)
- do i = 1, sys_size
- do l = 0, p
- do k = 0, n
- do j = 1, buff_size
- q_prim_vf(i)%sf(m + j, k, l) = &
- q_prim_vf(i)%sf(m, k, l)
- end do
- end do
- end do
- end do
-
- end if
-
- !< y-direction =========================================================
- elseif (bc_dir == 2) then !< y-direction
-
- if (bc_loc == -1) then !< bc_y%beg
-
- !$acc parallel loop collapse(4) gang vector default(present)
- do i = 1, sys_size
- do k = 0, p
- do j = 1, buff_size
- do l = -buff_size, m + buff_size
- q_prim_vf(i)%sf(l, -j, k) = &
- q_prim_vf(i)%sf(l, 0, k)
- end do
- end do
- end do
- end do
-
- else !< bc_y%end
-
- !$acc parallel loop collapse(4) gang vector default(present)
- do i = 1, sys_size
- do k = 0, p
- do j = 1, buff_size
- do l = -buff_size, m + buff_size
- q_prim_vf(i)%sf(l, n + j, k) = &
- q_prim_vf(i)%sf(l, n, k)
- end do
- end do
- end do
- end do
-
- end if
-
- !< z-direction =========================================================
- elseif (bc_dir == 3) then !< z-direction
-
- if (bc_loc == -1) then !< bc_z%beg
-
- !$acc parallel loop collapse(4) gang vector default(present)
- do i = 1, sys_size
- do j = 1, buff_size
- do l = -buff_size, n + buff_size
- do k = -buff_size, m + buff_size
- q_prim_vf(i)%sf(k, l, -j) = &
- q_prim_vf(i)%sf(k, l, 0)
- end do
- end do
- end do
- end do
-
- else !< bc_z%end
-
- !$acc parallel loop collapse(4) gang vector default(present)
- do i = 1, sys_size
- do j = 1, buff_size
- do l = -buff_size, n + buff_size
- do k = -buff_size, m + buff_size
- q_prim_vf(i)%sf(k, l, p + j) = &
- q_prim_vf(i)%sf(k, l, p)
- end do
- end do
- end do
- end do
-
- end if
-
- end if
- !< =====================================================================
-
- end subroutine s_ghost_cell_extrapolation
-
- subroutine s_symmetry(q_prim_vf, pb, mv, bc_dir, bc_loc)
-
- type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf
- real(kind(0d0)), dimension(startx:, starty:, startz:, 1:, 1:), intent(inout) :: pb, mv
- integer, intent(in) :: bc_dir, bc_loc
-
- integer :: j, k, l, q, i
-
- !< x-direction =========================================================
- if (bc_dir == 1) then
-
- if (bc_loc == -1) then !< bc_x%beg
-
- !$acc parallel loop collapse(3) gang vector default(present)
- do l = 0, p
- do k = 0, n
- do j = 1, buff_size
- !$acc loop seq
- do i = 1, contxe
- q_prim_vf(i)%sf(-j, k, l) = &
- q_prim_vf(i)%sf(j - 1, k, l)
- end do
-
- q_prim_vf(momxb)%sf(-j, k, l) = &
- -q_prim_vf(momxb)%sf(j - 1, k, l)
-
- !$acc loop seq
- do i = momxb + 1, sys_size
- q_prim_vf(i)%sf(-j, k, l) = &
- q_prim_vf(i)%sf(j - 1, k, l)
- end do
- end do
- end do
- end do
-
- if (qbmm .and. .not. polytropic) then
- !$acc parallel loop collapse(5) gang vector default(present)
- do i = 1, nb
- do q = 1, nnode
- do l = 0, p
- do k = 0, n
- do j = 1, buff_size
- pb(-j, k, l, q, i) = &
- pb(j - 1, k, l, q, i)
- mv(-j, k, l, q, i) = &
- mv(j - 1, k, l, q, i)
- end do
- end do
- end do
- end do
- end do
- end if
-
- else !< bc_x%end
-
- !$acc parallel loop collapse(3) default(present)
- do l = 0, p
- do k = 0, n
- do j = 1, buff_size
-
- !$acc loop seq
- do i = 1, contxe
- q_prim_vf(i)%sf(m + j, k, l) = &
- q_prim_vf(i)%sf(m - (j - 1), k, l)
- end do
-
- q_prim_vf(momxb)%sf(m + j, k, l) = &
- -q_prim_vf(momxb)%sf(m - (j - 1), k, l)
-
- !$acc loop seq
- do i = momxb + 1, sys_size
- q_prim_vf(i)%sf(m + j, k, l) = &
- q_prim_vf(i)%sf(m - (j - 1), k, l)
- end do
-
- end do
- end do
- end do
-
- if (qbmm .and. .not. polytropic) then
- !$acc parallel loop collapse(5) gang vector default(present)
- do i = 1, nb
- do q = 1, nnode
- do l = 0, p
- do k = 0, n
- do j = 1, buff_size
- pb(m + j, k, l, q, i) = &
- pb(m - (j - 1), k, l, q, i)
- mv(m + j, k, l, q, i) = &
- mv(m - (j - 1), k, l, q, i)
- end do
- end do
- end do
- end do
- end do
- end if
-
- end if
-
- !< y-direction =========================================================
- elseif (bc_dir == 2) then
-
- if (bc_loc == -1) then !< bc_y%beg
-
- !$acc parallel loop collapse(3) gang vector default(present)
- do k = 0, p
- do j = 1, buff_size
- do l = -buff_size, m + buff_size
- !$acc loop seq
- do i = 1, momxb
- q_prim_vf(i)%sf(l, -j, k) = &
- q_prim_vf(i)%sf(l, j - 1, k)
- end do
-
- q_prim_vf(momxb + 1)%sf(l, -j, k) = &
- -q_prim_vf(momxb + 1)%sf(l, j - 1, k)
-
- !$acc loop seq
- do i = momxb + 2, sys_size
- q_prim_vf(i)%sf(l, -j, k) = &
- q_prim_vf(i)%sf(l, j - 1, k)
- end do
- end do
- end do
- end do
-
- if (qbmm .and. .not. polytropic) then
- !$acc parallel loop collapse(5) gang vector default(present)
- do i = 1, nb
- do q = 1, nnode
- do k = 0, p
- do j = 1, buff_size
- do l = -buff_size, m + buff_size
- pb(l, -j, k, q, i) = &
- pb(l, j - 1, k, q, i)
- mv(l, -j, k, q, i) = &
- mv(l, j - 1, k, q, i)
- end do
- end do
- end do
- end do
- end do
- end if
-
- else !< bc_y%end
-
- !$acc parallel loop collapse(3) gang vector default(present)
- do k = 0, p
- do j = 1, buff_size
- do l = -buff_size, m + buff_size
- !$acc loop seq
- do i = 1, momxb
- q_prim_vf(i)%sf(l, n + j, k) = &
- q_prim_vf(i)%sf(l, n - (j - 1), k)
- end do
-
- q_prim_vf(momxb + 1)%sf(l, n + j, k) = &
- -q_prim_vf(momxb + 1)%sf(l, n - (j - 1), k)
-
- !$acc loop seq
- do i = momxb + 2, sys_size
- q_prim_vf(i)%sf(l, n + j, k) = &
- q_prim_vf(i)%sf(l, n - (j - 1), k)
- end do
- end do
- end do
- end do
-
- if (qbmm .and. .not. polytropic) then
- !$acc parallel loop collapse(5) gang vector default(present)
- do i = 1, nb
- do q = 1, nnode
- do k = 0, p
- do j = 1, buff_size
- do l = -buff_size, m + buff_size
- pb(l, n + j, k, q, i) = &
- pb(l, n - (j - 1), k, q, i)
- mv(l, n + j, k, q, i) = &
- mv(l, n - (j - 1), k, q, i)
- end do
- end do
- end do
- end do
- end do
- end if
-
- end if
-
- !< z-direction =========================================================
- elseif (bc_dir == 3) then
-
- if (bc_loc == -1) then !< bc_z%beg
-
- !$acc parallel loop collapse(3) gang vector default(present)
- do j = 1, buff_size
- do l = -buff_size, n + buff_size
- do k = -buff_size, m + buff_size
- !$acc loop seq
- do i = 1, momxb + 1
- q_prim_vf(i)%sf(k, l, -j) = &
- q_prim_vf(i)%sf(k, l, j - 1)
- end do
-
- q_prim_vf(momxe)%sf(k, l, -j) = &
- -q_prim_vf(momxe)%sf(k, l, j - 1)
-
- !$acc loop seq
- do i = E_idx, sys_size
- q_prim_vf(i)%sf(k, l, -j) = &
- q_prim_vf(i)%sf(k, l, j - 1)
- end do
- end do
- end do
- end do
-
- if (qbmm .and. .not. polytropic) then
- !$acc parallel loop collapse(5) gang vector default(present)
- do i = 1, nb
- do q = 1, nnode
- do j = 1, buff_size
- do l = -buff_size, n + buff_size
- do k = -buff_size, m + buff_size
- pb(k, l, -j, q, i) = &
- pb(k, l, j - 1, q, i)
- mv(k, l, -j, q, i) = &
- mv(k, l, j - 1, q, i)
- end do
- end do
- end do
- end do
- end do
- end if
-
- else !< bc_z%end
-
- !$acc parallel loop collapse(3) gang vector default(present)
- do j = 1, buff_size
- do l = -buff_size, n + buff_size
- do k = -buff_size, m + buff_size
- !$acc loop seq
- do i = 1, momxb + 1
- q_prim_vf(i)%sf(k, l, p + j) = &
- q_prim_vf(i)%sf(k, l, p - (j - 1))
- end do
-
- q_prim_vf(momxe)%sf(k, l, p + j) = &
- -q_prim_vf(momxe)%sf(k, l, p - (j - 1))
-
- !$acc loop seq
- do i = E_idx, sys_size
- q_prim_vf(i)%sf(k, l, p + j) = &
- q_prim_vf(i)%sf(k, l, p - (j - 1))
- end do
- end do
- end do
- end do
-
- if (qbmm .and. .not. polytropic) then
- !$acc parallel loop collapse(5) gang vector default(present)
- do i = 1, nb
- do q = 1, nnode
- do j = 1, buff_size
- do l = -buff_size, n + buff_size
- do k = -buff_size, m + buff_size
- pb(k, l, p + j, q, i) = &
- pb(k, l, p - (j - 1), q, i)
- mv(k, l, p + j, q, i) = &
- mv(k, l, p - (j - 1), q, i)
- end do
- end do
- end do
- end do
- end do
- end if
-
- end if
-
- end if
- !< =====================================================================
-
- end subroutine s_symmetry
-
- subroutine s_periodic(q_prim_vf, pb, mv, bc_dir, bc_loc)
-
- type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf
- real(kind(0d0)), dimension(startx:, starty:, startz:, 1:, 1:), intent(inout) :: pb, mv
- integer, intent(in) :: bc_dir, bc_loc
-
- integer :: j, k, l, q, i
-
- !< x-direction =========================================================
- if (bc_dir == 1) then
-
- if (bc_loc == -1) then !< bc_x%beg
-
- !$acc parallel loop collapse(4) gang vector default(present)
- do i = 1, sys_size
- do l = 0, p
- do k = 0, n
- do j = 1, buff_size
- q_prim_vf(i)%sf(-j, k, l) = &
- q_prim_vf(i)%sf(m - (j - 1), k, l)
- end do
- end do
- end do
- end do
-
- if (qbmm .and. .not. polytropic) then
- !$acc parallel loop collapse(5) gang vector default(present)
- do i = 1, nb
- do q = 1, nnode
- do l = 0, p
- do k = 0, n
- do j = 1, buff_size
- pb(-j, k, l, q, i) = &
- pb(m - (j - 1), k, l, q, i)
- mv(-j, k, l, q, i) = &
- mv(m - (j - 1), k, l, q, i)
- end do
- end do
- end do
- end do
- end do
- end if
-
- else !< bc_x%end
-
- !$acc parallel loop collapse(4) gang vector default(present)
- do i = 1, sys_size
- do l = 0, p
- do k = 0, n
- do j = 1, buff_size
- q_prim_vf(i)%sf(m + j, k, l) = &
- q_prim_vf(i)%sf(j - 1, k, l)
- end do
- end do
- end do
- end do
-
- if (qbmm .and. .not. polytropic) then
- !$acc parallel loop collapse(5) gang vector default(present)
- do i = 1, nb
- do q = 1, nnode
- do l = 0, p
- do k = 0, n
- do j = 1, buff_size
- pb(m + j, k, l, q, i) = &
- pb(j - 1, k, l, q, i)
- mv(m + j, k, l, q, i) = &
- mv(j - 1, k, l, q, i)
- end do
- end do
- end do
- end do
- end do
- end if
-
- end if
-
- !< y-direction =========================================================
- elseif (bc_dir == 2) then
-
- if (bc_loc == -1) then !< bc_y%beg
-
- !$acc parallel loop collapse(4) gang vector default(present)
- do i = 1, sys_size
- do k = 0, p
- do j = 1, buff_size
- do l = -buff_size, m + buff_size
- q_prim_vf(i)%sf(l, -j, k) = &
- q_prim_vf(i)%sf(l, n - (j - 1), k)
- end do
- end do
- end do
- end do
-
- if (qbmm .and. .not. polytropic) then
- !$acc parallel loop collapse(4) gang vector default(present)
- do i = 1, nb
- do q = 1, nnode
- do k = 0, p
- do j = 1, buff_size
- do l = -buff_size, m + buff_size
- pb(l, -j, k, q, i) = &
- pb(l, n - (j - 1), k, q, i)
- mv(l, -j, k, q, i) = &
- mv(l, n - (j - 1), k, q, i)
- end do
- end do
- end do
- end do
- end do
- end if
-
- else !< bc_y%end
-
- !$acc parallel loop collapse(4) gang vector default(present)
- do i = 1, sys_size
- do k = 0, p
- do j = 1, buff_size
- do l = -buff_size, m + buff_size
- q_prim_vf(i)%sf(l, n + j, k) = &
- q_prim_vf(i)%sf(l, j - 1, k)
- end do
- end do
- end do
- end do
-
- if (qbmm .and. .not. polytropic) then
- !$acc parallel loop collapse(5) gang vector default(present)
- do i = 1, nb
- do q = 1, nnode
- do k = 0, p
- do j = 1, buff_size
- do l = -buff_size, m + buff_size
- pb(l, n + j, k, q, i) = &
- pb(l, (j - 1), k, q, i)
- mv(l, n + j, k, q, i) = &
- mv(l, (j - 1), k, q, i)
- end do
- end do
- end do
- end do
- end do
- end if
-
- end if
-
- !< z-direction =========================================================
- elseif (bc_dir == 3) then
-
- if (bc_loc == -1) then !< bc_z%beg
-
- !$acc parallel loop collapse(4) gang vector default(present)
- do i = 1, sys_size
- do j = 1, buff_size
- do l = -buff_size, n + buff_size
- do k = -buff_size, m + buff_size
- q_prim_vf(i)%sf(k, l, -j) = &
- q_prim_vf(i)%sf(k, l, p - (j - 1))
- end do
- end do
- end do
- end do
-
- if (qbmm .and. .not. polytropic) then
- !$acc parallel loop collapse(5) gang vector default(present)
- do i = 1, nb
- do q = 1, nnode
- do j = 1, buff_size
- do l = -buff_size, n + buff_size
- do k = -buff_size, m + buff_size
- pb(k, l, -j, q, i) = &
- pb(k, l, p - (j - 1), q, i)
- mv(k, l, -j, q, i) = &
- mv(k, l, p - (j - 1), q, i)
- end do
- end do
- end do
- end do
- end do
- end if
-
- else !< bc_z%end
-
- !$acc parallel loop collapse(4) gang vector default(present)
- do i = 1, sys_size
- do j = 1, buff_size
- do l = -buff_size, n + buff_size
- do k = -buff_size, m + buff_size
- q_prim_vf(i)%sf(k, l, p + j) = &
- q_prim_vf(i)%sf(k, l, j - 1)
- end do
- end do
- end do
- end do
-
- if (qbmm .and. .not. polytropic) then
- !$acc parallel loop collapse(5) gang vector default(present)
- do i = 1, nb
- do q = 1, nnode
- do j = 1, buff_size
- do l = -buff_size, n + buff_size
- do k = -buff_size, m + buff_size
- pb(k, l, p + j, q, i) = &
- pb(k, l, j - 1, q, i)
- mv(k, l, p + j, q, i) = &
- mv(k, l, j - 1, q, i)
- end do
- end do
- end do
- end do
- end do
- end if
-
- end if
-
- end if
- !< =====================================================================
-
- end subroutine s_periodic
-
- subroutine s_axis(q_prim_vf, pb, mv, bc_dir, bc_loc)
-
- type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf
- real(kind(0d0)), dimension(startx:, starty:, startz:, 1:, 1:), intent(inout) :: pb, mv
- integer, intent(in) :: bc_dir, bc_loc
-
- integer :: j, k, l, q, i
-
- !$acc parallel loop collapse(3) gang vector default(present)
- do k = 0, p
- do j = 1, buff_size
- do l = -buff_size, m + buff_size
- if (z_cc(k) < pi) then
- !$acc loop seq
- do i = 1, momxb
- q_prim_vf(i)%sf(l, -j, k) = &
- q_prim_vf(i)%sf(l, j - 1, k + ((p + 1)/2))
- end do
+ subroutine s_populate_capillary_buffers(c_divs)
- q_prim_vf(momxb + 1)%sf(l, -j, k) = &
- -q_prim_vf(momxb + 1)%sf(l, j - 1, k + ((p + 1)/2))
+ type(scalar_field), dimension(num_dims + 1), intent(inout) :: c_divs
- q_prim_vf(momxe)%sf(l, -j, k) = &
- -q_prim_vf(momxe)%sf(l, j - 1, k + ((p + 1)/2))
+ @:BOUNDARY_CONDITION_INTEGER_DECLARATIONS()
- !$acc loop seq
- do i = E_idx, sys_size
- q_prim_vf(i)%sf(l, -j, k) = &
- q_prim_vf(i)%sf(l, j - 1, k + ((p + 1)/2))
- end do
+ #:for dir, loc in itertools.product([1, 2, 3], [-1, 1])
+ #:block ITERATE_OVER_BUFFER_REGION_SIDED(dir=dir, loc=loc, outer_loops=[("i", 1, "num_dims + 1")])
+ select case (bc_id_sfs(${dir}$, ${loc}$)%sf(exlhs, eylhs, ezlhs)%type)
+ case (-13:-3);
+ c_divs(i)%sf(x, y, z) = c_divs(i)%sf(ex, ey, ez)
+ case (-2); !< slip wall or reflective
+ if (i == 1) then
+ c_divs(i)%sf(x, y, z) = -c_divs(i)%sf(sx, sy, sz)
else
- !$acc loop seq
- do i = 1, momxb
- q_prim_vf(i)%sf(l, -j, k) = &
- q_prim_vf(i)%sf(l, j - 1, k - ((p + 1)/2))
- end do
-
- q_prim_vf(momxb + 1)%sf(l, -j, k) = &
- -q_prim_vf(momxb + 1)%sf(l, j - 1, k - ((p + 1)/2))
-
- q_prim_vf(momxe)%sf(l, -j, k) = &
- -q_prim_vf(momxe)%sf(l, j - 1, k - ((p + 1)/2))
-
- !$acc loop seq
- do i = E_idx, sys_size
- q_prim_vf(i)%sf(l, -j, k) = &
- q_prim_vf(i)%sf(l, j - 1, k - ((p + 1)/2))
- end do
+ c_divs(i)%sf(x, y, z) = +c_divs(i)%sf(sx, sy, sz)
end if
- end do
- end do
- end do
-
- if (qbmm .and. .not. polytropic) then
- !$acc parallel loop collapse(5) gang vector default(present)
- do i = 1, nb
- do q = 1, nnode
- do k = 0, p
- do j = 1, buff_size
- do l = -buff_size, m + buff_size
- pb(l, -j, k, q, i) = &
- pb(l, j - 1, k - ((p + 1)/2), q, i)
- mv(l, -j, k, q, i) = &
- mv(l, j - 1, k - ((p + 1)/2), q, i)
- end do
- end do
- end do
- end do
- end do
- end if
-
- end subroutine s_axis
-
- subroutine s_slip_wall(q_prim_vf, pb, mv, bc_dir, bc_loc)
-
- type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf
- real(kind(0d0)), dimension(startx:, starty:, startz:, 1:, 1:), intent(inout) :: pb, mv
- integer, intent(in) :: bc_dir, bc_loc
-
- integer :: j, k, l, q, i
-
- !< x-direction =========================================================
- if (bc_dir == 1) then
-
- if (bc_loc == -1) then !< bc_x%beg
-
- !$acc parallel loop collapse(4) gang vector default(present)
- do i = 1, sys_size
- do l = 0, p
- do k = 0, n
- do j = 1, buff_size
- if (i == momxb) then
- q_prim_vf(i)%sf(-j, k, l) = &
- -q_prim_vf(i)%sf(j - 1, k, l) + 2d0*bc_x%vb1
- else
- q_prim_vf(i)%sf(-j, k, l) = &
- q_prim_vf(i)%sf(0, k, l)
- end if
- end do
- end do
- end do
- end do
-
- else !< bc_x%end
-
- !$acc parallel loop collapse(4) gang vector default(present)
- do i = 1, sys_size
- do l = 0, p
- do k = 0, n
- do j = 1, buff_size
- if (i == momxb) then
- q_prim_vf(i)%sf(m + j, k, l) = &
- -q_prim_vf(i)%sf(m - (j - 1), k, l) + 2d0*bc_x%ve1
- else
- q_prim_vf(i)%sf(m + j, k, l) = &
- q_prim_vf(i)%sf(m, k, l)
- end if
- end do
- end do
- end do
- end do
-
- end if
-
- !< y-direction =========================================================
- elseif (bc_dir == 2) then
-
- if (bc_loc == -1) then !< bc_y%beg
-
- !$acc parallel loop collapse(4) gang vector default(present)
- do i = 1, sys_size
- do k = 0, p
- do j = 1, buff_size
- do l = -buff_size, m + buff_size
- if (i == momxb + 1) then
- q_prim_vf(i)%sf(l, -j, k) = &
- -q_prim_vf(i)%sf(l, j - 1, k) + 2d0*bc_y%vb2
- else
- q_prim_vf(i)%sf(l, -j, k) = &
- q_prim_vf(i)%sf(l, 0, k)
- end if
- end do
- end do
- end do
- end do
-
- else !< bc_y%end
-
- !$acc parallel loop collapse(4) gang vector default(present)
- do i = 1, sys_size
- do k = 0, p
- do j = 1, buff_size
- do l = -buff_size, m + buff_size
- if (i == momxb + 1) then
- q_prim_vf(i)%sf(l, n + j, k) = &
- -q_prim_vf(i)%sf(l, n - (j - 1), k) + 2d0*bc_y%ve2
- else
- q_prim_vf(i)%sf(l, n + j, k) = &
- q_prim_vf(i)%sf(l, n, k)
- end if
- end do
- end do
- end do
- end do
+ case (-1);
+ c_divs(i)%sf(x, y, z) = c_divs(i)%sf(px, py, pz)
+ end select
+ #:endblock
+ if (${dir}$ <= num_dims) then
+ call s_mpi_sendrecv_capilary_variables_buffers(c_divs, bc_id_sfs, ${dir}$, ${loc}$)
end if
-
- !< z-direction =========================================================
- elseif (bc_dir == 3) then
-
- if (bc_loc == -1) then !< bc_z%beg
-
- !$acc parallel loop collapse(4) gang vector default(present)
- do i = 1, sys_size
- do j = 1, buff_size
- do l = -buff_size, n + buff_size
- do k = -buff_size, m + buff_size
- if (i == momxe) then
- q_prim_vf(i)%sf(k, l, -j) = &
- -q_prim_vf(i)%sf(k, l, j - 1) + 2d0*bc_z%vb3
- else
- q_prim_vf(i)%sf(k, l, -j) = &
- q_prim_vf(i)%sf(k, l, 0)
- end if
- end do
- end do
- end do
- end do
-
- else !< bc_z%end
-
- !$acc parallel loop collapse(4) gang vector default(present)
- do i = 1, sys_size
- do j = 1, buff_size
- do l = -buff_size, n + buff_size
- do k = -buff_size, m + buff_size
- if (i == momxe) then
- q_prim_vf(i)%sf(k, l, p + j) = &
- -q_prim_vf(i)%sf(k, l, p - (j - 1)) + 2d0*bc_z%ve3
- else
- q_prim_vf(i)%sf(k, l, p + j) = &
- q_prim_vf(i)%sf(k, l, p)
- end if
- end do
- end do
- end do
- end do
-
- end if
-
- end if
- !< =====================================================================
-
- end subroutine s_slip_wall
-
- subroutine s_no_slip_wall(q_prim_vf, pb, mv, bc_dir, bc_loc)
-
- type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf
- real(kind(0d0)), dimension(startx:, starty:, startz:, 1:, 1:), intent(inout) :: pb, mv
- integer, intent(in) :: bc_dir, bc_loc
-
- integer :: j, k, l, q, i
-
- !< x-direction =========================================================
- if (bc_dir == 1) then
-
- if (bc_loc == -1) then !< bc_x%beg
-
- !$acc parallel loop collapse(4) gang vector default(present)
- do i = 1, sys_size
- do l = 0, p
- do k = 0, n
- do j = 1, buff_size
- if (i == momxb) then
- q_prim_vf(i)%sf(-j, k, l) = &
- -q_prim_vf(i)%sf(j - 1, k, l) + 2d0*bc_x%vb1
- elseif (i == momxb + 1 .and. num_dims > 1) then
- q_prim_vf(i)%sf(-j, k, l) = &
- -q_prim_vf(i)%sf(j - 1, k, l) + 2d0*bc_x%vb2
- elseif (i == momxb + 2 .and. num_dims > 2) then
- q_prim_vf(i)%sf(-j, k, l) = &
- -q_prim_vf(i)%sf(j - 1, k, l) + 2d0*bc_x%vb3
- else
- q_prim_vf(i)%sf(-j, k, l) = &
- q_prim_vf(i)%sf(0, k, l)
- end if
- end do
- end do
- end do
- end do
-
- else !< bc_x%end
-
- !$acc parallel loop collapse(4) gang vector default(present)
- do i = 1, sys_size
- do l = 0, p
- do k = 0, n
- do j = 1, buff_size
- if (i == momxb) then
- q_prim_vf(i)%sf(m + j, k, l) = &
- -q_prim_vf(i)%sf(m - (j - 1), k, l) + 2d0*bc_x%ve1
- elseif (i == momxb + 1 .and. num_dims > 1) then
- q_prim_vf(i)%sf(m + j, k, l) = &
- -q_prim_vf(i)%sf(m - (j - 1), k, l) + 2d0*bc_x%ve2
- elseif (i == momxb + 2 .and. num_dims > 2) then
- q_prim_vf(i)%sf(m + j, k, l) = &
- -q_prim_vf(i)%sf(m - (j - 1), k, l) + 2d0*bc_x%ve3
- else
- q_prim_vf(i)%sf(m + j, k, l) = &
- q_prim_vf(i)%sf(m, k, l)
- end if
- end do
- end do
- end do
- end do
-
- end if
-
- !< y-direction =========================================================
- elseif (bc_dir == 2) then
-
- if (bc_loc == -1) then !< bc_y%beg
-
- !$acc parallel loop collapse(4) gang vector default(present)
- do i = 1, sys_size
- do k = 0, p
- do j = 1, buff_size
- do l = -buff_size, m + buff_size
- if (i == momxb) then
- q_prim_vf(i)%sf(l, -j, k) = &
- -q_prim_vf(i)%sf(l, j - 1, k) + 2d0*bc_y%vb1
- elseif (i == momxb + 1 .and. num_dims > 1) then
- q_prim_vf(i)%sf(l, -j, k) = &
- -q_prim_vf(i)%sf(l, j - 1, k) + 2d0*bc_y%vb2
- elseif (i == momxb + 2 .and. num_dims > 2) then
- q_prim_vf(i)%sf(l, -j, k) = &
- -q_prim_vf(i)%sf(l, j - 1, k) + 2d0*bc_y%vb3
- else
- q_prim_vf(i)%sf(l, -j, k) = &
- q_prim_vf(i)%sf(l, 0, k)
- end if
- end do
- end do
- end do
- end do
-
- else !< bc_y%end
-
- !$acc parallel loop collapse(4) gang vector default(present)
- do i = 1, sys_size
- do k = 0, p
- do j = 1, buff_size
- do l = -buff_size, m + buff_size
- if (i == momxb) then
- q_prim_vf(i)%sf(l, n + j, k) = &
- -q_prim_vf(i)%sf(l, n - (j - 1), k) + 2d0*bc_y%ve1
- elseif (i == momxb + 1 .and. num_dims > 1) then
- q_prim_vf(i)%sf(l, n + j, k) = &
- -q_prim_vf(i)%sf(l, n - (j - 1), k) + 2d0*bc_y%ve2
- elseif (i == momxb + 2 .and. num_dims > 2) then
- q_prim_vf(i)%sf(l, n + j, k) = &
- -q_prim_vf(i)%sf(l, n - (j - 1), k) + 2d0*bc_y%ve3
- else
- q_prim_vf(i)%sf(l, n + j, k) = &
- q_prim_vf(i)%sf(l, n, k)
- end if
- end do
- end do
- end do
- end do
-
- end if
-
- !< z-direction =========================================================
- elseif (bc_dir == 3) then
-
- if (bc_loc == -1) then !< bc_z%beg
-
- !$acc parallel loop collapse(4) gang vector default(present)
- do i = 1, sys_size
- do j = 1, buff_size
- do l = -buff_size, n + buff_size
- do k = -buff_size, m + buff_size
- if (i == momxb) then
- q_prim_vf(i)%sf(k, l, -j) = &
- -q_prim_vf(i)%sf(k, l, j - 1) + 2d0*bc_z%vb1
- elseif (i == momxb + 1 .and. num_dims > 1) then
- q_prim_vf(i)%sf(k, l, -j) = &
- -q_prim_vf(i)%sf(k, l, j - 1) + 2d0*bc_z%vb2
- elseif (i == momxb + 2 .and. num_dims > 2) then
- q_prim_vf(i)%sf(k, l, -j) = &
- -q_prim_vf(i)%sf(k, l, j - 1) + 2d0*bc_z%vb3
- else
- q_prim_vf(i)%sf(k, l, -j) = &
- q_prim_vf(i)%sf(k, l, 0)
- end if
- end do
- end do
- end do
- end do
-
- else !< bc_z%end
-
- !$acc parallel loop collapse(4) gang vector default(present)
- do i = 1, sys_size
- do j = 1, buff_size
- do l = -buff_size, n + buff_size
- do k = -buff_size, m + buff_size
- if (i == momxb) then
- q_prim_vf(i)%sf(k, l, p + j) = &
- -q_prim_vf(i)%sf(k, l, p - (j - 1)) + 2d0*bc_z%ve1
- elseif (i == momxb + 1 .and. num_dims > 1) then
- q_prim_vf(i)%sf(k, l, p + j) = &
- -q_prim_vf(i)%sf(k, l, p - (j - 1)) + 2d0*bc_z%ve2
- elseif (i == momxb + 2 .and. num_dims > 2) then
- q_prim_vf(i)%sf(k, l, p + j) = &
- -q_prim_vf(i)%sf(k, l, p - (j - 1)) + 2d0*bc_z%ve3
- else
- q_prim_vf(i)%sf(k, l, p + j) = &
- q_prim_vf(i)%sf(k, l, p)
- end if
- end do
- end do
- end do
- end do
-
- end if
-
- end if
- !< =====================================================================
-
- end subroutine s_no_slip_wall
-
- subroutine s_qbmm_extrapolation(pb, mv, bc_dir, bc_loc)
-
- real(kind(0d0)), dimension(startx:, starty:, startz:, 1:, 1:), intent(inout) :: pb, mv
- integer, intent(in) :: bc_dir, bc_loc
-
- integer :: j, k, l, q, i
-
- !< x-direction =========================================================
- if (bc_dir == 1) then
-
- if (bc_loc == -1) then !< bc_x%beg
-
- !$acc parallel loop collapse(4) gang vector default(present)
- do i = 1, nb
- do q = 1, nnode
- do l = 0, p
- do k = 0, n
- do j = 1, buff_size
- pb(-j, k, l, q, i) = &
- pb(0, k, l, q, i)
- mv(-j, k, l, q, i) = &
- mv(0, k, l, q, i)
- end do
- end do
- end do
- end do
- end do
-
- else !< bc_x%end
-
- !$acc parallel loop collapse(5) gang vector default(present)
- do i = 1, nb
- do q = 1, nnode
- do l = 0, p
- do k = 0, n
- do j = 1, buff_size
- pb(m + j, k, l, q, i) = &
- pb(m, k, l, q, i)
- mv(m + j, k, l, q, i) = &
- mv(m, k, l, q, i)
- end do
- end do
- end do
- end do
- end do
-
- end if
-
- !< y-direction =========================================================
- elseif (bc_dir == 2) then
-
- if (bc_loc == -1) then !< bc_y%beg
-
- !$acc parallel loop collapse(5) gang vector default(present)
- do i = 1, nb
- do q = 1, nnode
- do k = 0, p
- do j = 1, buff_size
- do l = -buff_size, m + buff_size
- pb(l, -j, k, q, i) = &
- pb(l, 0, k, q, i)
- mv(l, -j, k, q, i) = &
- mv(l, 0, k, q, i)
- end do
- end do
- end do
- end do
- end do
-
- else !< bc_y%end
-
- !$acc parallel loop collapse(5) gang vector default(present)
- do i = 1, nb
- do q = 1, nnode
- do k = 0, p
- do j = 1, buff_size
- do l = -buff_size, m + buff_size
- pb(l, n + j, k, q, i) = &
- pb(l, n, k, q, i)
- mv(l, n + j, k, q, i) = &
- mv(l, n, k, q, i)
- end do
- end do
- end do
- end do
- end do
-
- end if
-
- !< z-direction =========================================================
- elseif (bc_dir == 3) then
-
- if (bc_loc == -1) then !< bc_z%beg
-
- !$acc parallel loop collapse(5) gang vector default(present)
- do i = 1, nb
- do q = 1, nnode
- do j = 1, buff_size
- do l = -buff_size, n + buff_size
- do k = -buff_size, m + buff_size
- pb(k, l, -j, q, i) = &
- pb(k, l, 0, q, i)
- mv(k, l, -j, q, i) = &
- mv(k, l, 0, q, i)
- end do
- end do
- end do
- end do
- end do
-
- else !< bc_z%end
-
- !$acc parallel loop collapse(5) gang vector default(present)
- do i = 1, nb
- do q = 1, nnode
- do j = 1, buff_size
- do l = -buff_size, n + buff_size
- do k = -buff_size, m + buff_size
- pb(k, l, p + j, q, i) = &
- pb(k, l, p, q, i)
- mv(k, l, p + j, q, i) = &
- mv(k, l, p, q, i)
- end do
- end do
- end do
- end do
- end do
-
- end if
-
- end if
-
- end subroutine
-
- subroutine s_populate_capillary_buffers(c_divs)
-
- type(scalar_field), dimension(num_dims + 1), intent(inout) :: c_divs
- integer :: i, j, k, l
-
- ! x - direction
- if (bc_x%beg <= -3) then !< ghost cell extrapolation
- !$acc parallel loop collapse(4) gang vector default(present)
- do i = 1, num_dims + 1
- do l = 0, p
- do k = 0, n
- do j = 1, buff_size
- c_divs(i)%sf(-j, k, l) = &
- c_divs(i)%sf(0, k, l)
- end do
- end do
- end do
- end do
- elseif (bc_x%beg == -2) then !< slip wall or reflective
- !$acc parallel loop collapse(4) gang vector default(present)
- do i = 1, num_dims + 1
- do l = 0, p
- do k = 0, n
- do j = 1, buff_size
- if (i == 1) then
- c_divs(i)%sf(-j, k, l) = &
- -c_divs(i)%sf(j - 1, k, l)
- else
- c_divs(i)%sf(-j, k, l) = &
- c_divs(i)%sf(j - 1, k, l)
- end if
- end do
- end do
- end do
- end do
- elseif (bc_x%beg == -1) then
- !$acc parallel loop collapse(4) gang vector default(present)
- do i = 1, num_dims + 1
- do l = 0, p
- do k = 0, n
- do j = 1, buff_size
- c_divs(i)%sf(-j, k, l) = &
- c_divs(i)%sf(m - (j - 1), k, l)
- end do
- end do
- end do
- end do
- else
- call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 1, -1)
- end if
-
- if (bc_x%end <= -3) then !< ghost-cell extrapolation
- !$acc parallel loop collapse(4) gang vector default(present)
- do i = 1, num_dims + 1
- do l = 0, p
- do k = 0, n
- do j = 1, buff_size
- c_divs(i)%sf(m + j, k, l) = &
- c_divs(i)%sf(m, k, l)
- end do
- end do
- end do
- end do
- elseif (bc_x%end == -2) then
- !$acc parallel loop collapse(4) default(present)
- do i = 1, num_dims + 1
- do l = 0, p
- do k = 0, n
- do j = 1, buff_size
- if (i == 1) then
- c_divs(i)%sf(m + j, k, l) = &
- -c_divs(i)%sf(m - (j - 1), k, l)
- else
- c_divs(i)%sf(m + j, k, l) = &
- c_divs(i)%sf(m - (j - 1), k, l)
- end if
- end do
- end do
- end do
- end do
- else if (bc_x%end == -1) then
- !$acc parallel loop collapse(4) gang vector default(present)
- do i = 1, num_dims + 1
- do l = 0, p
- do k = 0, n
- do j = 1, buff_size
- c_divs(i)%sf(m + j, k, l) = &
- c_divs(i)%sf(j - 1, k, l)
- end do
- end do
- end do
- end do
- else
- call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 1, 1)
- end if
-
- if (n == 0) then
- return
- elseif (bc_y%beg <= -3) then !< ghost-cell extrapolation
- !$acc parallel loop collapse(4) gang vector default(present)
- do i = 1, num_dims + 1
- do k = 0, p
- do j = 1, buff_size
- do l = -buff_size, m + buff_size
- c_divs(i)%sf(l, -j, k) = &
- c_divs(i)%sf(l, 0, k)
- end do
- end do
- end do
- end do
- elseif (bc_y%beg == -2) then !< slip wall or reflective
- !$acc parallel loop collapse(4) gang vector default(present)
- do i = 1, num_dims + 1
- do k = 0, p
- do j = 1, buff_size
- do l = -buff_size, m + buff_size
- if (i == 2) then
- c_divs(i)%sf(l, -j, k) = &
- -c_divs(i)%sf(l, j - 1, k)
- else
- c_divs(i)%sf(l, -j, k) = &
- c_divs(i)%sf(l, j - 1, k)
- end if
- end do
- end do
- end do
- end do
- elseif (bc_y%beg == -1) then
- !$acc parallel loop collapse(4) gang vector default(present)
- do i = 1, num_dims + 1
- do k = 0, p
- do j = 1, buff_size
- do l = -buff_size, m + buff_size
- c_divs(i)%sf(l, -j, k) = &
- c_divs(i)%sf(l, n - (j - 1), k)
- end do
- end do
- end do
- end do
- else
- call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 2, -1)
- end if
-
- if (bc_y%end <= -3) then !< ghost-cell extrapolation
- !$acc parallel loop collapse(4) gang vector default(present)
- do i = 1, num_dims + 1
- do k = 0, p
- do j = 1, buff_size
- do l = -buff_size, m + buff_size
- c_divs(i)%sf(l, n + j, k) = &
- c_divs(i)%sf(l, n, k)
- end do
- end do
- end do
- end do
- elseif (bc_y%end == -2) then !< slip wall or reflective
- !$acc parallel loop collapse(4) gang vector default(present)
- do i = 1, num_dims + 1
- do k = 0, p
- do j = 1, buff_size
- do l = -buff_size, m + buff_size
- if (i == 2) then
- c_divs(i)%sf(l, n + j, k) = &
- -c_divs(i)%sf(l, n - (j - 1), k)
- else
- c_divs(i)%sf(l, n + j, k) = &
- c_divs(i)%sf(l, n - (j - 1), k)
- end if
- end do
- end do
- end do
- end do
- elseif (bc_y%end == -1) then
- !$acc parallel loop collapse(4) gang vector default(present)
- do i = 1, num_dims + 1
- do k = 0, p
- do j = 1, buff_size
- do l = -buff_size, m + buff_size
- c_divs(i)%sf(l, n + j, k) = &
- c_divs(i)%sf(l, j - 1, k)
- end do
- end do
- end do
- end do
- else
- call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 2, 1)
- end if
-
- if (p == 0) then
- return
- elseif (bc_z%beg <= -3) then !< ghost-cell extrapolation
- !$acc parallel loop collapse(4) gang vector default(present)
- do i = 1, num_dims + 1
- do j = 1, buff_size
- do l = -buff_size, n + buff_size
- do k = -buff_size, m + buff_size
- c_divs(i)%sf(k, l, -j) = &
- c_divs(i)%sf(k, l, 0)
- end do
- end do
- end do
- end do
- elseif (bc_z%beg == -2) then !< symmetry
- !$acc parallel loop collapse(4) gang vector default(present)
- do i = 1, num_dims + 1
- do j = 1, buff_size
- do l = -buff_size, n + buff_size
- do k = -buff_size, m + buff_size
- if (i == 3) then
- c_divs(i)%sf(k, l, -j) = &
- -c_divs(i)%sf(k, l, j - 1)
- else
- c_divs(i)%sf(k, l, -j) = &
- c_divs(i)%sf(k, l, j - 1)
- end if
- end do
- end do
- end do
- end do
- elseif (bc_z%beg == -1) then
- !$acc parallel loop collapse(4) gang vector default(present)
- do i = 1, num_dims + 1
- do j = 1, buff_size
- do l = -buff_size, n + buff_size
- do k = -buff_size, m + buff_size
- c_divs(i)%sf(k, l, -j) = &
- c_divs(i)%sf(k, l, p - (j - 1))
- end do
- end do
- end do
- end do
- else
- call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 3, -1)
- end if
-
- if (bc_z%end <= -3) then !< ghost-cell extrapolation
- !$acc parallel loop collapse(4) gang vector default(present)
- do i = 1, num_dims + 1
- do j = 1, buff_size
- do l = -buff_size, n + buff_size
- do k = -buff_size, m + buff_size
- c_divs(i)%sf(k, l, p + j) = &
- c_divs(i)%sf(k, l, p)
- end do
- end do
- end do
- end do
- elseif (bc_z%end == -2) then !< symmetry
- !$acc parallel loop collapse(4) gang vector default(present)
- do i = 1, num_dims + 1
- do j = 1, buff_size
- do l = -buff_size, n + buff_size
- do k = -buff_size, m + buff_size
- if (i == 3) then
- c_divs(i)%sf(k, l, p + j) = &
- -c_divs(i)%sf(k, l, p - (j - 1))
- else
- c_divs(i)%sf(k, l, p + j) = &
- c_divs(i)%sf(k, l, p - (j - 1))
- end if
- end do
- end do
- end do
- end do
- elseif (bc_z%end == -1) then
- !$acc parallel loop collapse(4) gang vector default(present)
- do i = 1, num_dims + 1
- do j = 1, buff_size
- do l = -buff_size, n + buff_size
- do k = -buff_size, m + buff_size
- c_divs(i)%sf(k, l, p + j) = &
- c_divs(i)%sf(k, l, j - 1)
- end do
- end do
- end do
- end do
- else
- call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 3, 1)
- end if
+ #:endfor
end subroutine s_populate_capillary_buffers
diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp
index db5d17656e..fef58ba7b3 100644
--- a/src/simulation/m_global_parameters.fpp
+++ b/src/simulation/m_global_parameters.fpp
@@ -22,6 +22,8 @@ module m_global_parameters
use m_helper_basic !< Functions to compare floating point numbers
+ use m_global_parameters_common
+
#ifdef MFC_OpenACC
use openacc
#endif
@@ -184,6 +186,8 @@ module m_global_parameters
!> @name Boundary conditions (BC) in the x-, y- and z-directions, respectively
!> @{
type(int_bounds_info) :: bc_x, bc_y, bc_z
+ integer :: num_bc_patches
+ type(bc_patch_parameters) :: patch_bc(num_bc_patches_max)
!> @}
type(bounds_info) :: x_domain, y_domain, z_domain
@@ -191,9 +195,12 @@ module m_global_parameters
logical :: file_per_process !< shared file or not when using parallel io
integer :: precision !< Precision of output files
- integer, allocatable, dimension(:) :: proc_coords !<
+ integer, dimension(1:3) :: proc_coords !<
!! Processor coordinates in MPI_CART_COMM
+ integer, dimension(1:3) :: proc_nums !<
+ !! Processor dimensions in MPI_CART_COMM
+
integer, allocatable, dimension(:) :: start_idx !<
!! Starting cell-center index of local processor in global grid
@@ -295,9 +302,9 @@ module m_global_parameters
!! in the flow. These include the stiffened gas equation of state parameters,
!! the Reynolds numbers and the Weber numbers.
- !$acc declare create(bc_x%vb1, bc_x%vb2, bc_x%vb3, bc_x%ve1, bc_x%ve2, bc_x%ve3)
- !$acc declare create(bc_y%vb1, bc_y%vb2, bc_y%vb3, bc_y%ve1, bc_y%ve2, bc_y%ve3)
- !$acc declare create(bc_z%vb1, bc_z%vb2, bc_z%vb3, bc_z%ve1, bc_z%ve2, bc_z%ve3)
+ !$acc declare create(bc_x%vel_beg, bc_x%vel_end)
+ !$acc declare create(bc_y%vel_beg, bc_y%vel_end)
+ !$acc declare create(bc_z%vel_beg, bc_z%vel_end)
! ==========================================================================
@@ -535,16 +542,16 @@ contains
bc_z%beg = dflt_int; bc_z%end = dflt_int
#:for DIM in ['x', 'y', 'z']
- #:for DIR in [1, 2, 3]
- bc_${DIM}$%vb${DIR}$ = 0d0
- bc_${DIM}$%ve${DIR}$ = 0d0
- #:endfor
+ bc_${DIM}$%vel_beg = 0d0
+ bc_${DIM}$%vel_end = 0d0
#:endfor
x_domain%beg = dflt_int; x_domain%end = dflt_int
y_domain%beg = dflt_int; y_domain%end = dflt_int
z_domain%beg = dflt_int; z_domain%end = dflt_int
+ call s_bc_assign_default_values_to_user_inputs(num_bc_patches, patch_bc)
+
! Fluids physical parameters
do i = 1, num_fluids_max
fluid_pp(i)%gamma = dflt_real
@@ -1130,8 +1137,6 @@ contains
num_dims = 1 + min(1, n) + min(1, p)
#:endif
- allocate (proc_coords(1:num_dims))
-
if (parallel_io .neqv. .true.) return
#ifdef MFC_MPI
@@ -1166,7 +1171,6 @@ contains
@:DEALLOCATE(Re_idx)
end if
- deallocate (proc_coords)
if (parallel_io) then
deallocate (start_idx)
do i = 1, sys_size
diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp
index 8421fda52e..a8b1043f9f 100644
--- a/src/simulation/m_ibm.fpp
+++ b/src/simulation/m_ibm.fpp
@@ -19,6 +19,10 @@ module m_ibm
use m_helper
+ use m_constants
+
+ use m_boundary_conditions_common
+
! ==========================================================================
implicit none
@@ -42,7 +46,6 @@ module m_ibm
type(ghost_point), dimension(:), allocatable :: inner_points
!$acc declare create(ghost_points, inner_points)
- integer :: gp_layers !< Number of ghost point layers
integer :: num_gps !< Number of ghost points
integer :: num_inner_gps !< Number of ghost points
!$acc declare create(gp_layers, num_gps, num_inner_gps)
@@ -52,8 +55,6 @@ contains
!> Allocates memory for the variables in the IBM module
subroutine s_initialize_ibm_module()
- gp_layers = 3
-
if (p > 0) then
@:ALLOCATE(ib_markers%sf(-gp_layers:m+gp_layers, &
-gp_layers:n+gp_layers, -gp_layers:p+gp_layers))
@@ -74,7 +75,7 @@ contains
@:ACC_SETUP_SFs(levelset)
! @:ALLOCATE(ib_markers%sf(0:m, 0:n, 0:p))
- !$acc enter data copyin(gp_layers, num_gps, num_inner_gps)
+ !$acc enter data copyin(num_gps, num_inner_gps)
end subroutine s_initialize_ibm_module
@@ -87,7 +88,7 @@ contains
!$acc update device(ib_markers%sf)
! Get neighboring IB variables from other processors
- call s_mpi_sendrecv_ib_buffers(ib_markers, gp_layers)
+ call s_mpi_sendrecv_ib_buffers(ib_markers, bc_id_sfs)
!$acc update host(ib_markers%sf)
@@ -212,7 +213,7 @@ contains
! Calculate velocity of ghost cell
if (gp%slip) then
- norm = gp%ip_loc - physical_loc !
+ norm = levelset_norm%sf(gp%loc(1), gp%loc(2), gp%loc(3), gp%ib_patch_id, :)
buf = sqrt(sum(norm**2))
norm = norm/buf
vel_norm_IP = sum(vel_IP*norm)*norm
@@ -447,6 +448,7 @@ contains
integer :: i, j, k, l, q !< Iterator variables
num_gps = 0
+ num_inner_gps = 0
do i = 0, m
do j = 0, n
diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp
index 8b1ea7749e..94dbb1364b 100644
--- a/src/simulation/m_mpi_proxy.fpp
+++ b/src/simulation/m_mpi_proxy.fpp
@@ -30,19 +30,12 @@ module m_mpi_proxy
use m_nvtx
use ieee_arithmetic
- ! ==========================================================================
- implicit none
+ use m_constants
- real(kind(0d0)), private, allocatable, dimension(:), target :: q_cons_buff_send !<
- !! This variable is utilized to pack and send the buffer of the cell-average
- !! conservative variables, for a single computational domain boundary at the
- !! time, to the relevant neighboring processor.
+ ! ==========================================================================
- real(kind(0d0)), private, allocatable, dimension(:), target :: q_cons_buff_recv !<
- !! q_cons_buff_recv is utilized to receive and unpack the buffer of the cell-
- !! average conservative variables, for a single computational domain boundary
- !! at the time, from the relevant neighboring processor.
+ implicit none
real(kind(0d0)), private, allocatable, dimension(:), target :: c_divs_buff_send !<
!! c_divs_buff_send is utilized to send and unpack the buffer of the cell-
@@ -64,15 +57,13 @@ module m_mpi_proxy
!! immersed boundary markers, for a single computational domain boundary
!! at the time, from the relevant neighboring processor.
- !$acc declare create(q_cons_buff_send, q_cons_buff_recv)
- !$acc declare create( ib_buff_send, ib_buff_recv)
+ !$acc declare create(ib_buff_send, ib_buff_recv)
!$acc declare create(c_divs_buff_send, c_divs_buff_recv)
!> @name Generic flags used to identify and report MPI errors
!> @{
- integer, private :: err_code, ierr, v_size
+ integer, private :: err_code, ierr
!> @}
- !$acc declare create(v_size)
!real :: s_time, e_time
!real :: compress_time, mpi_time, decompress_time
@@ -90,70 +81,45 @@ contains
#ifdef MFC_MPI
- ! Allocating q_cons_buff_send/recv and ib_buff_send/recv. Please note that
- ! for the sake of simplicity, both variables are provided sufficient
- ! storage to hold the largest buffer in the computational domain.
-
- if (qbmm .and. .not. polytropic) then
- if (n > 0) then
- if (p > 0) then
- @:ALLOCATE(q_cons_buff_send(0:-1 + buff_size*(sys_size + 2*nb*4)* &
- & (m + 2*buff_size + 1)* &
- & (n + 2*buff_size + 1)* &
- & (p + 2*buff_size + 1)/ &
- & (min(m, n, p) + 2*buff_size + 1)))
- else
- @:ALLOCATE(q_cons_buff_send(0:-1 + buff_size*(sys_size + 2*nb*4)* &
- & (max(m, n) + 2*buff_size + 1)))
- end if
- else
- @:ALLOCATE(q_cons_buff_send(0:-1 + buff_size*(sys_size + 2*nb*4)))
- end if
-
- @:ALLOCATE(q_cons_buff_recv(0:ubound(q_cons_buff_send, 1)))
-
- v_size = sys_size + 2*nb*4
- else
+ if (surface_tension) then
+ nVars = num_dims + 1
if (n > 0) then
if (p > 0) then
- @:ALLOCATE(q_cons_buff_send(0:-1 + buff_size*sys_size* &
+ @:ALLOCATE(c_divs_buff_send(0:-1 + buff_size*(num_dims+1)* &
& (m + 2*buff_size + 1)* &
& (n + 2*buff_size + 1)* &
& (p + 2*buff_size + 1)/ &
& (min(m, n, p) + 2*buff_size + 1)))
else
- @:ALLOCATE(q_cons_buff_send(0:-1 + buff_size*sys_size* &
+ @:ALLOCATE(c_divs_buff_send(0:-1 + buff_size*(num_dims+1)* &
& (max(m, n) + 2*buff_size + 1)))
end if
else
- @:ALLOCATE(q_cons_buff_send(0:-1 + buff_size*sys_size))
+ @:ALLOCATE(c_divs_buff_send(0:-1 + buff_size*(num_dims+1)))
end if
- @:ALLOCATE(q_cons_buff_recv(0:ubound(q_cons_buff_send, 1)))
-
- v_size = sys_size
+ @:ALLOCATE(c_divs_buff_recv(0:ubound(c_divs_buff_send, 1)))
end if
+ !$acc update device(nVars)
- if (surface_tension) then
- nVars = num_dims + 1
+ if (ib) then
if (n > 0) then
if (p > 0) then
- @:ALLOCATE(c_divs_buff_send(0:-1 + buff_size*(num_dims+1)* &
- & (m + 2*buff_size + 1)* &
- & (n + 2*buff_size + 1)* &
- & (p + 2*buff_size + 1)/ &
- & (min(m, n, p) + 2*buff_size + 1)))
+ @:ALLOCATE(ib_buff_send(0:-1 + gp_layers * &
+ & (m + 2*gp_layers + 1)* &
+ & (n + 2*gp_layers + 1)* &
+ & (p + 2*gp_layers + 1)/ &
+ & (min(m, n, p) + 2*gp_layers + 1)))
else
- @:ALLOCATE(c_divs_buff_send(0:-1 + buff_size*(num_dims+1)* &
- & (max(m, n) + 2*buff_size + 1)))
+ @:ALLOCATE(ib_buff_send(0:-1 + gp_layers* &
+ & (max(m, n) + 2*gp_layers + 1)))
end if
else
- @:ALLOCATE(c_divs_buff_send(0:-1 + buff_size*(num_dims+1)))
+ @:ALLOCATE(ib_buff_send(0:-1 + gp_layers))
end if
- @:ALLOCATE(c_divs_buff_recv(0:ubound(c_divs_buff_send, 1)))
+ @:ALLOCATE(ib_buff_recv(0:ubound(ib_buff_send, 1)))
end if
- !$acc update device(v_size, nVars)
#endif
@@ -214,9 +180,6 @@ contains
#:for VAR in [ 'dt','weno_eps','teno_CT','pref','rhoref','R0ref','Web','Ca', 'sigma', &
& 'Re_inv', 'poly_sigma', 'palpha_eps', 'ptgalpha_eps', 'pi_fac', &
- & 'bc_x%vb1','bc_x%vb2','bc_x%vb3','bc_x%ve1','bc_x%ve2','bc_x%ve2', &
- & 'bc_y%vb1','bc_y%vb2','bc_y%vb3','bc_y%ve1','bc_y%ve2','bc_y%ve3', &
- & 'bc_z%vb1','bc_z%vb2','bc_z%vb3','bc_z%ve1','bc_z%ve2','bc_z%ve3', &
& 'bc_x%pres_in','bc_x%pres_out','bc_y%pres_in','bc_y%pres_out', 'bc_z%pres_in','bc_z%pres_out', &
& 'x_domain%beg', 'x_domain%end', 'y_domain%beg', 'y_domain%end', &
& 'z_domain%beg', 'z_domain%end', 't_stop', 't_save', 'cfl_target']
@@ -294,812 +257,146 @@ contains
end subroutine s_mpi_bcast_user_inputs
- !> The purpose of this procedure is to optimally decompose
- !! the computational domain among the available processors.
- !! This is performed by attempting to award each processor,
- !! in each of the coordinate directions, approximately the
- !! same number of cells, and then recomputing the affected
- !! global parameters.
- subroutine s_mpi_decompose_computational_domain
-
-#ifdef MFC_MPI
-
- integer :: num_procs_x, num_procs_y, num_procs_z !<
- !! Optimal number of processors in the x-, y- and z-directions
-
- real(kind(0d0)) :: tmp_num_procs_x, tmp_num_procs_y, tmp_num_procs_z !<
- !! Non-optimal number of processors in the x-, y- and z-directions
-
- real(kind(0d0)) :: fct_min !<
- !! Processor factorization (fct) minimization parameter
-
- integer :: MPI_COMM_CART !<
- !! Cartesian processor topology communicator
-
- integer :: rem_cells !<
- !! Remaining number of cells, in a particular coordinate direction,
- !! after the majority is divided up among the available processors
-
- integer :: i, j !< Generic loop iterators
-
- if (num_procs == 1 .and. parallel_io) then
- do i = 1, num_dims
- start_idx(i) = 0
- end do
- return
- end if
-
- ! 3D Cartesian Processor Topology ==================================
- if (n > 0) then
-
- if (p > 0) then
-
- if (cyl_coord .and. p > 0) then
- ! Implement pencil processor blocking if using cylindrical coordinates so
- ! that all cells in azimuthal direction are stored on a single processor.
- ! This is necessary for efficient application of Fourier filter near axis.
-
- ! Initial values of the processor factorization optimization
- num_procs_x = 1
- num_procs_y = num_procs
- num_procs_z = 1
- ierr = -1
-
- ! Computing minimization variable for these initial values
- tmp_num_procs_x = num_procs_x
- tmp_num_procs_y = num_procs_y
- tmp_num_procs_z = num_procs_z
- fct_min = 10d0*abs((m + 1)/tmp_num_procs_x &
- - (n + 1)/tmp_num_procs_y)
-
- ! Searching for optimal computational domain distribution
- do i = 1, num_procs
-
- if (mod(num_procs, i) == 0 &
- .and. &
- (m + 1)/i >= num_stcls_min*weno_order) then
-
- tmp_num_procs_x = i
- tmp_num_procs_y = num_procs/i
-
- if (fct_min >= abs((m + 1)/tmp_num_procs_x &
- - (n + 1)/tmp_num_procs_y) &
- .and. &
- (n + 1)/tmp_num_procs_y &
- >= &
- num_stcls_min*weno_order) then
-
- num_procs_x = i
- num_procs_y = num_procs/i
- fct_min = abs((m + 1)/tmp_num_procs_x &
- - (n + 1)/tmp_num_procs_y)
- ierr = 0
-
- end if
-
- end if
-
- end do
+ subroutine s_mpi_sendrecv_ib_buffers(ib_markers, bc_id_sfs)
- else
-
- ! Initial estimate of optimal processor topology
- num_procs_x = 1
- num_procs_y = 1
- num_procs_z = num_procs
- ierr = -1
-
- ! Benchmarking the quality of this initial guess
- tmp_num_procs_x = num_procs_x
- tmp_num_procs_y = num_procs_y
- tmp_num_procs_z = num_procs_z
- fct_min = 10d0*abs((m + 1)/tmp_num_procs_x &
- - (n + 1)/tmp_num_procs_y) &
- + 10d0*abs((n + 1)/tmp_num_procs_y &
- - (p + 1)/tmp_num_procs_z)
-
- ! Optimization of the initial processor topology
- do i = 1, num_procs
-
- if (mod(num_procs, i) == 0 &
- .and. &
- (m + 1)/i >= num_stcls_min*weno_order) then
-
- do j = 1, num_procs/i
-
- if (mod(num_procs/i, j) == 0 &
- .and. &
- (n + 1)/j >= num_stcls_min*weno_order) then
-
- tmp_num_procs_x = i
- tmp_num_procs_y = j
- tmp_num_procs_z = num_procs/(i*j)
-
- if (fct_min >= abs((m + 1)/tmp_num_procs_x &
- - (n + 1)/tmp_num_procs_y) &
- + abs((n + 1)/tmp_num_procs_y &
- - (p + 1)/tmp_num_procs_z) &
- .and. &
- (p + 1)/tmp_num_procs_z &
- >= &
- num_stcls_min*weno_order) &
- then
-
- num_procs_x = i
- num_procs_y = j
- num_procs_z = num_procs/(i*j)
- fct_min = abs((m + 1)/tmp_num_procs_x &
- - (n + 1)/tmp_num_procs_y) &
- + abs((n + 1)/tmp_num_procs_y &
- - (p + 1)/tmp_num_procs_z)
- ierr = 0
+ type(integer_field), intent(inout) :: ib_markers
+ type(t_bc_id_sf), dimension(1:3, -1:1), intent(in) :: bc_id_sfs
- end if
+ @:BOUNDARY_CONDITION_INTEGER_DECLARATIONS()
- end if
+ integer :: buffer_counts(1:3)
+ integer :: iter_dir, iter_loc
+ integer, pointer :: p_send, p_recv
- end do
+#ifdef MFC_MPI
- end if
+ buffer_counts = (/ &
+ gp_layers*(n + 1)*(p + 1), &
+ gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
+ gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1) &
+ /)
- end do
+ do iter_dir = 1, num_dims
- end if
+ do iter_loc = -1, 1, 2
- ! Verifying that a valid decomposition of the computational
- ! domain has been established. If not, the simulation exits.
- if (proc_rank == 0 .and. ierr == -1) then
- call s_mpi_abort('Unsupported combination of values '// &
- 'of num_procs, m, n, p and '// &
- 'weno_order. Exiting ...')
- end if
+ call nvtxStartRange("RHS-COMM-PACKBUF")
- ! Creating new communicator using the Cartesian topology
- call MPI_CART_CREATE(MPI_COMM_WORLD, 3, (/num_procs_x, &
- num_procs_y, num_procs_z/), &
- (/.true., .true., .true./), &
- .false., MPI_COMM_CART, ierr)
+ #:block ITERATE_OVER_BUFFER_REGION_SIDED(dir="iter_dir", loc="-iter_loc", pack_v_size='1', thickness='gp_layers', inner_loops=[("i", 1, 1)])
+ ib_buff_send(pack_idr) = ib_markers%sf(sx, sy, sz)
+ #:endblock
- ! Finding the Cartesian coordinates of the local process
- call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 3, &
- proc_coords, ierr)
- ! END: 3D Cartesian Processor Topology =============================
+ call nvtxEndRange ! Packbuf
- ! Global Parameters for z-direction ================================
+ p_send => ib_buff_send(0)
+ p_recv => ib_buff_recv(0)
+ #:for rdma_mpi in [False, True]
+ if (rdma_mpi .eqv. ${'.true.' if rdma_mpi else '.false.'}$) then
+ #:if rdma_mpi
+ !$acc data attach(p_send, p_recv)
+ !$acc host_data use_device(p_send, p_recv)
- ! Number of remaining cells
- rem_cells = mod(p + 1, num_procs_z)
+ call nvtxStartRange("RHS-COMM-SENDRECV-RDMA")
+ #:else
+ call nvtxStartRange("RHS-COMM-DEV2HOST")
+ !$acc update host(ib_buff_send)
+ call nvtxEndRange ! Dev2Host
- ! Optimal number of cells per processor
- p = (p + 1)/num_procs_z - 1
+ call nvtxStartRange("RHS-COMM-SENDRECV-NO-RMDA")
+ #:endif
- ! Distributing the remaining cells
- do i = 1, rem_cells
- if (proc_coords(3) == i - 1) then
- p = p + 1; exit
- end if
- end do
-
- ! Boundary condition at the beginning
- if (proc_coords(3) > 0 .or. bc_z%beg == -1) then
- proc_coords(3) = proc_coords(3) - 1
- call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
- bc_z%beg, ierr)
- proc_coords(3) = proc_coords(3) + 1
- end if
+ call MPI_SENDRECV( &
+ p_send, buffer_counts(iter_dir), MPI_INTEGER, neighbor_procs(iter_dir, -iter_loc), 0, &
+ p_recv, buffer_counts(iter_dir), MPI_INTEGER, neighbor_procs(iter_dir, +iter_loc), 0, &
+ MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
- ! Boundary condition at the end
- if (proc_coords(3) < num_procs_z - 1 .or. bc_z%end == -1) then
- proc_coords(3) = proc_coords(3) + 1
- call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
- bc_z%end, ierr)
- proc_coords(3) = proc_coords(3) - 1
- end if
+ call nvtxEndRange ! RHS-MPI-SENDRECV-(NO)-RDMA
- if (parallel_io) then
- if (proc_coords(3) < rem_cells) then
- start_idx(3) = (p + 1)*proc_coords(3)
- else
- start_idx(3) = (p + 1)*proc_coords(3) + rem_cells
+ #:if rdma_mpi
+ !$acc end host_data
+ !$acc end data
+ !$acc wait
+ #:else
+ call nvtxStartRange("RHS-COMM-HOST2DEV")
+ !$acc update device(ib_buff_recv)
+ call nvtxEndRange ! Host2Dev
+ #:endif
end if
- end if
- ! ==================================================================
-
- ! 2D Cartesian Processor Topology ==================================
- else
+ #:endfor
- ! Initial estimate of optimal processor topology
- num_procs_x = 1
- num_procs_y = num_procs
- ierr = -1
-
- ! Benchmarking the quality of this initial guess
- tmp_num_procs_x = num_procs_x
- tmp_num_procs_y = num_procs_y
- fct_min = 10d0*abs((m + 1)/tmp_num_procs_x &
- - (n + 1)/tmp_num_procs_y)
-
- ! Optimization of the initial processor topology
- do i = 1, num_procs
-
- if (mod(num_procs, i) == 0 &
- .and. &
- (m + 1)/i >= num_stcls_min*weno_order) then
-
- tmp_num_procs_x = i
- tmp_num_procs_y = num_procs/i
-
- if (fct_min >= abs((m + 1)/tmp_num_procs_x &
- - (n + 1)/tmp_num_procs_y) &
- .and. &
- (n + 1)/tmp_num_procs_y &
- >= &
- num_stcls_min*weno_order) then
-
- num_procs_x = i
- num_procs_y = num_procs/i
- fct_min = abs((m + 1)/tmp_num_procs_x &
- - (n + 1)/tmp_num_procs_y)
- ierr = 0
-
- end if
+ call nvtxStartRange("RHS-COMM-UNPACKBUF")
+ #:block ITERATE_OVER_BUFFER_REGION_SIDED(dir="iter_dir", loc="iter_loc", pack_v_size='1', thickness='gp_layers', inner_loops=[("i", 1, 1)])
+ if (bc_id_sfs(iter_dir, iter_loc)%sf(exlhs, eylhs, ezlhs)%type >= 0) then
+ ib_markers%sf(x, y, z) = ib_buff_recv(pack_idr)
end if
+ #:endblock
- end do
-
- ! Verifying that a valid decomposition of the computational
- ! domain has been established. If not, the simulation exits.
- if (proc_rank == 0 .and. ierr == -1) then
- call s_mpi_abort('Unsupported combination of values '// &
- 'of num_procs, m, n and '// &
- 'weno_order. Exiting ...')
- end if
-
- ! Creating new communicator using the Cartesian topology
- call MPI_CART_CREATE(MPI_COMM_WORLD, 2, (/num_procs_x, &
- num_procs_y/), (/.true., &
- .true./), .false., MPI_COMM_CART, &
- ierr)
-
- ! Finding the Cartesian coordinates of the local process
- call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 2, &
- proc_coords, ierr)
-
- end if
- ! END: 2D Cartesian Processor Topology =============================
-
- ! Global Parameters for y-direction ================================
-
- ! Number of remaining cells
- rem_cells = mod(n + 1, num_procs_y)
-
- ! Optimal number of cells per processor
- n = (n + 1)/num_procs_y - 1
+ call nvtxEndRange ! Unpackbuf
- ! Distributing the remaining cells
- do i = 1, rem_cells
- if (proc_coords(2) == i - 1) then
- n = n + 1; exit
- end if
end do
- ! Boundary condition at the beginning
- if (proc_coords(2) > 0 .or. bc_y%beg == -1) then
- proc_coords(2) = proc_coords(2) - 1
- call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
- bc_y%beg, ierr)
- proc_coords(2) = proc_coords(2) + 1
- end if
-
- ! Boundary condition at the end
- if (proc_coords(2) < num_procs_y - 1 .or. bc_y%end == -1) then
- proc_coords(2) = proc_coords(2) + 1
- call MPI_CART_RANK(MPI_COMM_CART, proc_coords, &
- bc_y%end, ierr)
- proc_coords(2) = proc_coords(2) - 1
- end if
-
- if (parallel_io) then
- if (proc_coords(2) < rem_cells) then
- start_idx(2) = (n + 1)*proc_coords(2)
- else
- start_idx(2) = (n + 1)*proc_coords(2) + rem_cells
- end if
- end if
-
- ! ==================================================================
-
- ! 1D Cartesian Processor Topology ==================================
- else
-
- ! Optimal processor topology
- num_procs_x = num_procs
-
- ! Creating new communicator using the Cartesian topology
- call MPI_CART_CREATE(MPI_COMM_WORLD, 1, (/num_procs_x/), &
- (/.true./), .false., MPI_COMM_CART, &
- ierr)
-
- ! Finding the Cartesian coordinates of the local process
- call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 1, &
- proc_coords, ierr)
-
- end if
- ! ==================================================================
-
- ! Global Parameters for x-direction ================================
-
- ! Number of remaining cells
- rem_cells = mod(m + 1, num_procs_x)
-
- ! Optimal number of cells per processor
- m = (m + 1)/num_procs_x - 1
-
- ! Distributing the remaining cells
- do i = 1, rem_cells
- if (proc_coords(1) == i - 1) then
- m = m + 1; exit
- end if
end do
- ! Boundary condition at the beginning
- if (proc_coords(1) > 0 .or. bc_x%beg == -1) then
- proc_coords(1) = proc_coords(1) - 1
- call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%beg, ierr)
- proc_coords(1) = proc_coords(1) + 1
- end if
-
- ! Boundary condition at the end
- if (proc_coords(1) < num_procs_x - 1 .or. bc_x%end == -1) then
- proc_coords(1) = proc_coords(1) + 1
- call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%end, ierr)
- proc_coords(1) = proc_coords(1) - 1
- end if
-
- if (parallel_io) then
- if (proc_coords(1) < rem_cells) then
- start_idx(1) = (m + 1)*proc_coords(1)
- else
- start_idx(1) = (m + 1)*proc_coords(1) + rem_cells
- end if
- end if
- ! ==================================================================
-
#endif
- end subroutine s_mpi_decompose_computational_domain
-
- !> The goal of this procedure is to populate the buffers of
- !! the grid variables by communicating with the neighboring
- !! processors. Note that only the buffers of the cell-width
- !! distributions are handled in such a way. This is because
- !! the buffers of cell-boundary locations may be calculated
- !! directly from those of the cell-width distributions.
- !! @param mpi_dir MPI communication coordinate direction
- !! @param pbc_loc Processor boundary condition (PBC) location
- subroutine s_mpi_sendrecv_grid_variables_buffers(mpi_dir, pbc_loc)
-
- integer, intent(in) :: mpi_dir
- integer, intent(in) :: pbc_loc
-
- integer :: dst_proc(1:3)
-
-#ifdef MFC_MPI
-
- ! MPI Communication in x-direction =================================
- if (mpi_dir == 1) then
-
- if (pbc_loc == -1) then ! PBC at the beginning
-
- if (bc_x%end >= 0) then ! PBC at the beginning and end
-
- ! Send/receive buffer to/from bc_x%end/bc_x%beg
- call MPI_SENDRECV( &
- dx(m - buff_size + 1), buff_size, &
- MPI_DOUBLE_PRECISION, bc_x%end, 0, &
- dx(-buff_size), buff_size, &
- MPI_DOUBLE_PRECISION, bc_x%beg, 0, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
- else ! PBC at the beginning only
-
- ! Send/receive buffer to/from bc_x%beg/bc_x%beg
- call MPI_SENDRECV( &
- dx(0), buff_size, &
- MPI_DOUBLE_PRECISION, bc_x%beg, 1, &
- dx(-buff_size), buff_size, &
- MPI_DOUBLE_PRECISION, bc_x%beg, 0, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
- end if
-
- else ! PBC at the end
-
- if (bc_x%beg >= 0) then ! PBC at the end and beginning
-
- ! Send/receive buffer to/from bc_x%beg/bc_x%end
- call MPI_SENDRECV( &
- dx(0), buff_size, &
- MPI_DOUBLE_PRECISION, bc_x%beg, 1, &
- dx(m + 1), buff_size, &
- MPI_DOUBLE_PRECISION, bc_x%end, 1, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
- else ! PBC at the end only
-
- ! Send/receive buffer to/from bc_x%end/bc_x%end
- call MPI_SENDRECV( &
- dx(m - buff_size + 1), buff_size, &
- MPI_DOUBLE_PRECISION, bc_x%end, 0, &
- dx(m + 1), buff_size, &
- MPI_DOUBLE_PRECISION, bc_x%end, 1, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
- end if
-
- end if
- ! END: MPI Communication in x-direction ============================
-
- ! MPI Communication in y-direction =================================
- elseif (mpi_dir == 2) then
-
- if (pbc_loc == -1) then ! PBC at the beginning
-
- if (bc_y%end >= 0) then ! PBC at the beginning and end
-
- ! Send/receive buffer to/from bc_y%end/bc_y%beg
- call MPI_SENDRECV( &
- dy(n - buff_size + 1), buff_size, &
- MPI_DOUBLE_PRECISION, bc_y%end, 0, &
- dy(-buff_size), buff_size, &
- MPI_DOUBLE_PRECISION, bc_y%beg, 0, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
- else ! PBC at the beginning only
-
- ! Send/receive buffer to/from bc_y%beg/bc_y%beg
- call MPI_SENDRECV( &
- dy(0), buff_size, &
- MPI_DOUBLE_PRECISION, bc_y%beg, 1, &
- dy(-buff_size), buff_size, &
- MPI_DOUBLE_PRECISION, bc_y%beg, 0, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
- end if
-
- else ! PBC at the end
-
- if (bc_y%beg >= 0) then ! PBC at the end and beginning
-
- ! Send/receive buffer to/from bc_y%beg/bc_y%end
- call MPI_SENDRECV( &
- dy(0), buff_size, &
- MPI_DOUBLE_PRECISION, bc_y%beg, 1, &
- dy(n + 1), buff_size, &
- MPI_DOUBLE_PRECISION, bc_y%end, 1, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
- else ! PBC at the end only
-
- ! Send/receive buffer to/from bc_y%end/bc_y%end
- call MPI_SENDRECV( &
- dy(n - buff_size + 1), buff_size, &
- MPI_DOUBLE_PRECISION, bc_y%end, 0, &
- dy(n + 1), buff_size, &
- MPI_DOUBLE_PRECISION, bc_y%end, 1, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
- end if
-
- end if
- ! END: MPI Communication in y-direction ============================
-
- ! MPI Communication in z-direction =================================
- else
-
- if (pbc_loc == -1) then ! PBC at the beginning
-
- if (bc_z%end >= 0) then ! PBC at the beginning and end
-
- ! Send/receive buffer to/from bc_z%end/bc_z%beg
- call MPI_SENDRECV( &
- dz(p - buff_size + 1), buff_size, &
- MPI_DOUBLE_PRECISION, bc_z%end, 0, &
- dz(-buff_size), buff_size, &
- MPI_DOUBLE_PRECISION, bc_z%beg, 0, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
- else ! PBC at the beginning only
-
- ! Send/receive buffer to/from bc_z%beg/bc_z%beg
- call MPI_SENDRECV( &
- dz(0), buff_size, &
- MPI_DOUBLE_PRECISION, bc_z%beg, 1, &
- dz(-buff_size), buff_size, &
- MPI_DOUBLE_PRECISION, bc_z%beg, 0, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
- end if
-
- else ! PBC at the end
-
- if (bc_z%beg >= 0) then ! PBC at the end and beginning
-
- ! Send/receive buffer to/from bc_z%beg/bc_z%end
- call MPI_SENDRECV( &
- dz(0), buff_size, &
- MPI_DOUBLE_PRECISION, bc_z%beg, 1, &
- dz(p + 1), buff_size, &
- MPI_DOUBLE_PRECISION, bc_z%end, 1, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
- else ! PBC at the end only
-
- ! Send/receive buffer to/from bc_z%end/bc_z%end
- call MPI_SENDRECV( &
- dz(p - buff_size + 1), buff_size, &
- MPI_DOUBLE_PRECISION, bc_z%end, 0, &
- dz(p + 1), buff_size, &
- MPI_DOUBLE_PRECISION, bc_z%end, 1, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
- end if
-
- end if
+ end subroutine s_mpi_sendrecv_ib_buffers
- end if
- ! END: MPI Communication in z-direction ============================
+ subroutine s_mpi_sendrecv_capilary_variables_buffers(c_divs_vf, bc_id_sfs, mpi_dir, pbc_loc)
-#endif
+ type(scalar_field), dimension(num_dims + 1), intent(inout) :: c_divs_vf
+ type(t_bc_id_sf), dimension(1:3, -1:1), intent(in) :: bc_id_sfs
- end subroutine s_mpi_sendrecv_grid_variables_buffers
-
- !> The goal of this procedure is to populate the buffers of
- !! the cell-average conservative variables by communicating
- !! with the neighboring processors.
- !! @param q_cons_vf Cell-average conservative variables
- !! @param mpi_dir MPI communication coordinate direction
- !! @param pbc_loc Processor boundary condition (PBC) location
- subroutine s_mpi_sendrecv_variables_buffers(q_cons_vf, &
- pb, mv, &
- mpi_dir, &
- pbc_loc)
-
- type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf
- real(kind(0d0)), dimension(startx:, starty:, startz:, 1:, 1:), intent(inout) :: pb, mv
integer, intent(in) :: mpi_dir, pbc_loc
- integer :: i, j, k, l, r, q !< Generic loop iterators
-
integer :: buffer_counts(1:3), buffer_count
type(int_bounds_info) :: boundary_conditions(1:3)
integer :: beg_end(1:2), grid_dims(1:3)
integer :: dst_proc, src_proc, recv_tag, send_tag
- logical :: beg_end_geq_0
-
integer :: pack_offset, unpack_offset
real(kind(0d0)), pointer :: p_send, p_recv
-#ifdef MFC_MPI
-
- call nvtxStartRange("RHS-COMM-PACKBUF")
- !$acc update device(v_size)
-
- if (qbmm .and. .not. polytropic) then
- buffer_counts = (/ &
- buff_size*(sys_size + 2*nb*4)*(n + 1)*(p + 1), &
- buff_size*(sys_size + 2*nb*4)*(m + 2*buff_size + 1)*(p + 1), &
- buff_size*v_size*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) &
- /)
- else
- buffer_counts = (/ &
- buff_size*sys_size*(n + 1)*(p + 1), &
- buff_size*sys_size*(m + 2*buff_size + 1)*(p + 1), &
- buff_size*v_size*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) &
- /)
- end if
-
- buffer_count = buffer_counts(mpi_dir)
- boundary_conditions = (/bc_x, bc_y, bc_z/)
- beg_end = (/boundary_conditions(mpi_dir)%beg, boundary_conditions(mpi_dir)%end/)
- beg_end_geq_0 = beg_end(max(pbc_loc, 0) - pbc_loc + 1) >= 0
-
- ! Implements:
- ! pbc_loc bc_x >= 0 -> [send/recv]_tag [dst/src]_proc
- ! -1 (=0) 0 -> [1,0] [0,0] | 0 0 [1,0] [beg,beg]
- ! -1 (=0) 1 -> [0,0] [1,0] | 0 1 [0,0] [end,beg]
- ! +1 (=1) 0 -> [0,1] [1,1] | 1 0 [0,1] [end,end]
- ! +1 (=1) 1 -> [1,1] [0,1] | 1 1 [1,1] [beg,end]
-
- send_tag = f_logical_to_int(.not. f_xor(beg_end_geq_0, pbc_loc == 1))
- recv_tag = f_logical_to_int(pbc_loc == 1)
+ @:BOUNDARY_CONDITION_INTEGER_DECLARATIONS()
- dst_proc = beg_end(1 + f_logical_to_int(f_xor(pbc_loc == 1, beg_end_geq_0)))
- src_proc = beg_end(1 + f_logical_to_int(pbc_loc == 1))
-
- grid_dims = (/m, n, p/)
+#ifdef MFC_MPI
- pack_offset = 0
- if (f_xor(pbc_loc == 1, beg_end_geq_0)) then
- pack_offset = grid_dims(mpi_dir) - buff_size + 1
- end if
+ nVars = num_dims + 1
+ !$acc update device(nVars)
- unpack_offset = 0
- if (pbc_loc == 1) then
- unpack_offset = grid_dims(mpi_dir) + buff_size + 1
- end if
+ buffer_counts = (/ &
+ buff_size*nVars*(n + 1)*(p + 1), &
+ buff_size*nVars*(m + 2*buff_size + 1)*(p + 1), &
+ buff_size*nVars*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) &
+ /)
- ! Pack Buffer to Send
- #:for mpi_dir in [1, 2, 3]
- if (mpi_dir == ${mpi_dir}$) then
- #:if mpi_dir == 1
- !$acc parallel loop collapse(4) gang vector default(present) private(r)
- do l = 0, p
- do k = 0, n
- do j = 0, buff_size - 1
- do i = 1, sys_size
- r = (i - 1) + v_size*(j + buff_size*(k + (n + 1)*l))
- q_cons_buff_send(r) = q_cons_vf(i)%sf(j + pack_offset, k, l)
- end do
- end do
- end do
- end do
-
- if (qbmm .and. .not. polytropic) then
- !$acc parallel loop collapse(4) gang vector default(present) private(r)
- do l = 0, p
- do k = 0, n
- do j = 0, buff_size - 1
- do i = sys_size + 1, sys_size + 4
- do q = 1, nb
- r = (i - 1) + (q - 1)*4 + v_size* &
- (j + buff_size*(k + (n + 1)*l))
- q_cons_buff_send(r) = pb(j + pack_offset, k, l, i - sys_size, q)
- end do
- end do
- end do
- end do
- end do
-
- !$acc parallel loop collapse(5) gang vector default(present) private(r)
- do l = 0, p
- do k = 0, n
- do j = 0, buff_size - 1
- do i = sys_size + 1, sys_size + 4
- do q = 1, nb
- r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
- (j + buff_size*(k + (n + 1)*l))
- q_cons_buff_send(r) = mv(j + pack_offset, k, l, i - sys_size, q)
- end do
- end do
- end do
- end do
- end do
- end if
- #:elif mpi_dir == 2
- !$acc parallel loop collapse(4) gang vector default(present) private(r)
- do i = 1, sys_size
- do l = 0, p
- do k = 0, buff_size - 1
- do j = -buff_size, m + buff_size
- r = (i - 1) + v_size* &
- ((j + buff_size) + (m + 2*buff_size + 1)* &
- (k + buff_size*l))
- q_cons_buff_send(r) = q_cons_vf(i)%sf(j, k + pack_offset, l)
- end do
- end do
- end do
- end do
-
- if (qbmm .and. .not. polytropic) then
- !$acc parallel loop collapse(5) gang vector default(present) private(r)
- do i = sys_size + 1, sys_size + 4
- do l = 0, p
- do k = 0, buff_size - 1
- do j = -buff_size, m + buff_size
- do q = 1, nb
- r = (i - 1) + (q - 1)*4 + v_size* &
- ((j + buff_size) + (m + 2*buff_size + 1)* &
- (k + buff_size*l))
- q_cons_buff_send(r) = pb(j, k + pack_offset, l, i - sys_size, q)
- end do
- end do
- end do
- end do
- end do
-
- !$acc parallel loop collapse(5) gang vector default(present) private(r)
- do i = sys_size + 1, sys_size + 4
- do l = 0, p
- do k = 0, buff_size - 1
- do j = -buff_size, m + buff_size
- do q = 1, nb
- r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
- ((j + buff_size) + (m + 2*buff_size + 1)* &
- (k + buff_size*l))
- q_cons_buff_send(r) = mv(j, k + pack_offset, l, i - sys_size, q)
- end do
- end do
- end do
- end do
- end do
- end if
- #:else
- !$acc parallel loop collapse(4) gang vector default(present) private(r)
- do i = 1, sys_size
- do l = 0, buff_size - 1
- do k = -buff_size, n + buff_size
- do j = -buff_size, m + buff_size
- r = (i - 1) + v_size* &
- ((j + buff_size) + (m + 2*buff_size + 1)* &
- ((k + buff_size) + (n + 2*buff_size + 1)*l))
- q_cons_buff_send(r) = q_cons_vf(i)%sf(j, k, l + pack_offset)
- end do
- end do
- end do
- end do
-
- if (qbmm .and. .not. polytropic) then
- !$acc parallel loop collapse(5) gang vector default(present) private(r)
- do i = sys_size + 1, sys_size + 4
- do l = 0, buff_size - 1
- do k = -buff_size, n + buff_size
- do j = -buff_size, m + buff_size
- do q = 1, nb
- r = (i - 1) + (q - 1)*4 + v_size* &
- ((j + buff_size) + (m + 2*buff_size + 1)* &
- ((k + buff_size) + (n + 2*buff_size + 1)*l))
- q_cons_buff_send(r) = pb(j, k, l + pack_offset, i - sys_size, q)
- end do
- end do
- end do
- end do
- end do
-
- !$acc parallel loop collapse(5) gang vector default(present) private(r)
- do i = sys_size + 1, sys_size + 4
- do l = 0, buff_size - 1
- do k = -buff_size, n + buff_size
- do j = -buff_size, m + buff_size
- do q = 1, nb
- r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
- ((j + buff_size) + (m + 2*buff_size + 1)* &
- ((k + buff_size) + (n + 2*buff_size + 1)*l))
- q_cons_buff_send(r) = mv(j, k, l + pack_offset, i - sys_size, q)
- end do
- end do
- end do
- end do
- end do
- end if
- #:endif
- end if
- #:endfor
- call nvtxEndRange ! Packbuf
+ #:block ITERATE_OVER_BUFFER_REGION_SIDED(dir="mpi_dir", loc="-pbc_loc", inner_loops=[("i", 1, "nVars")], pack_v_size="nVars")
+ c_divs_buff_send(pack_idr) = c_divs_vf(i)%sf(sx, sy, sz)
+ #:endblock
- ! Send/Recv
+ p_send => c_divs_buff_send(0)
+ p_recv => c_divs_buff_recv(0)
#:for rdma_mpi in [False, True]
if (rdma_mpi .eqv. ${'.true.' if rdma_mpi else '.false.'}$) then
- p_send => q_cons_buff_send(0)
- p_recv => q_cons_buff_recv(0)
#:if rdma_mpi
!$acc data attach(p_send, p_recv)
!$acc host_data use_device(p_send, p_recv)
+
call nvtxStartRange("RHS-COMM-SENDRECV-RDMA")
#:else
call nvtxStartRange("RHS-COMM-DEV2HOST")
- !$acc update host(q_cons_buff_send, ib_buff_send)
+ !$acc update host(c_divs_buff_send)
call nvtxEndRange
+
call nvtxStartRange("RHS-COMM-SENDRECV-NO-RMDA")
#:endif
call MPI_SENDRECV( &
- p_send, buffer_count, MPI_DOUBLE_PRECISION, dst_proc, send_tag, &
- p_recv, buffer_count, MPI_DOUBLE_PRECISION, src_proc, recv_tag, &
+ p_send, buffer_counts(mpi_dir), MPI_DOUBLE_PRECISION, neighbor_procs(mpi_dir, -pbc_loc), 0, &
+ p_recv, buffer_counts(mpi_dir), MPI_DOUBLE_PRECISION, neighbor_procs(mpi_dir, +pbc_loc), 0, &
MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+
call nvtxEndRange ! RHS-MPI-SENDRECV-(NO)-RDMA
#:if rdma_mpi
@@ -1108,1243 +405,17 @@ contains
!$acc wait
#:else
call nvtxStartRange("RHS-COMM-HOST2DEV")
- !$acc update device(q_cons_buff_recv)
+ !$acc update device(c_divs_buff_recv)
call nvtxEndRange
#:endif
end if
#:endfor
- ! Unpack Received Buffer
- call nvtxStartRange("RHS-COMM-UNPACKBUF")
- #:for mpi_dir in [1, 2, 3]
- if (mpi_dir == ${mpi_dir}$) then
- #:if mpi_dir == 1
- !$acc parallel loop collapse(4) gang vector default(present) private(r)
- do l = 0, p
- do k = 0, n
- do j = -buff_size, -1
- do i = 1, sys_size
- r = (i - 1) + v_size* &
- (j + buff_size*((k + 1) + (n + 1)*l))
- q_cons_vf(i)%sf(j + unpack_offset, k, l) = q_cons_buff_recv(r)
-#if defined(__INTEL_COMPILER)
- if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then
- print *, "Error", j, k, l, i
- error stop "NaN(s) in recv"
- end if
-#endif
- end do
- end do
- end do
- end do
-
- if (qbmm .and. .not. polytropic) then
- !$acc parallel loop collapse(5) gang vector default(present) private(r)
- do l = 0, p
- do k = 0, n
- do j = -buff_size, -1
- do i = sys_size + 1, sys_size + 4
- do q = 1, nb
- r = (i - 1) + (q - 1)*4 + v_size* &
- (j + buff_size*((k + 1) + (n + 1)*l))
- pb(j + unpack_offset, k, l, i - sys_size, q) = q_cons_buff_recv(r)
- end do
- end do
- end do
- end do
- end do
-
- !$acc parallel loop collapse(5) gang vector default(present) private(r)
- do l = 0, p
- do k = 0, n
- do j = -buff_size, -1
- do i = sys_size + 1, sys_size + 4
- do q = 1, nb
- r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
- (j + buff_size*((k + 1) + (n + 1)*l))
- mv(j + unpack_offset, k, l, i - sys_size, q) = q_cons_buff_recv(r)
- end do
- end do
- end do
- end do
- end do
- end if
- #:elif mpi_dir == 2
- !$acc parallel loop collapse(4) gang vector default(present) private(r)
- do i = 1, sys_size
- do l = 0, p
- do k = -buff_size, -1
- do j = -buff_size, m + buff_size
- r = (i - 1) + v_size* &
- ((j + buff_size) + (m + 2*buff_size + 1)* &
- ((k + buff_size) + buff_size*l))
- q_cons_vf(i)%sf(j, k + unpack_offset, l) = q_cons_buff_recv(r)
-#if defined(__INTEL_COMPILER)
- if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then
- print *, "Error", j, k, l, i
- error stop "NaN(s) in recv"
- end if
-#endif
- end do
- end do
- end do
- end do
-
- if (qbmm .and. .not. polytropic) then
- !$acc parallel loop collapse(5) gang vector default(present) private(r)
- do i = sys_size + 1, sys_size + 4
- do l = 0, p
- do k = -buff_size, -1
- do j = -buff_size, m + buff_size
- do q = 1, nb
- r = (i - 1) + (q - 1)*4 + v_size* &
- ((j + buff_size) + (m + 2*buff_size + 1)* &
- ((k + buff_size) + buff_size*l))
- pb(j, k + unpack_offset, l, i - sys_size, q) = q_cons_buff_recv(r)
- end do
- end do
- end do
- end do
- end do
-
- !$acc parallel loop collapse(5) gang vector default(present) private(r)
- do i = sys_size + 1, sys_size + 4
- do l = 0, p
- do k = -buff_size, -1
- do j = -buff_size, m + buff_size
- do q = 1, nb
- r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
- ((j + buff_size) + (m + 2*buff_size + 1)* &
- ((k + buff_size) + buff_size*l))
- mv(j, k + unpack_offset, l, i - sys_size, q) = q_cons_buff_recv(r)
- end do
- end do
- end do
- end do
- end do
- end if
- #:else
- ! Unpacking buffer from bc_z%beg
- !$acc parallel loop collapse(4) gang vector default(present) private(r)
- do i = 1, sys_size
- do l = -buff_size, -1
- do k = -buff_size, n + buff_size
- do j = -buff_size, m + buff_size
- r = (i - 1) + v_size* &
- ((j + buff_size) + (m + 2*buff_size + 1)* &
- ((k + buff_size) + (n + 2*buff_size + 1)* &
- (l + buff_size)))
- q_cons_vf(i)%sf(j, k, l + unpack_offset) = q_cons_buff_recv(r)
-#if defined(__INTEL_COMPILER)
- if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then
- print *, "Error", j, k, l, i
- error stop "NaN(s) in recv"
- end if
-#endif
- end do
- end do
- end do
- end do
-
- if (qbmm .and. .not. polytropic) then
- !$acc parallel loop collapse(5) gang vector default(present) private(r)
- do i = sys_size + 1, sys_size + 4
- do l = -buff_size, -1
- do k = -buff_size, n + buff_size
- do j = -buff_size, m + buff_size
- do q = 1, nb
- r = (i - 1) + (q - 1)*4 + v_size* &
- ((j + buff_size) + (m + 2*buff_size + 1)* &
- ((k + buff_size) + (n + 2*buff_size + 1)* &
- (l + buff_size)))
- pb(j, k, l + unpack_offset, i - sys_size, q) = q_cons_buff_recv(r)
- end do
- end do
- end do
- end do
- end do
-
- !$acc parallel loop collapse(5) gang vector default(present) private(r)
- do i = sys_size + 1, sys_size + 4
- do l = -buff_size, -1
- do k = -buff_size, n + buff_size
- do j = -buff_size, m + buff_size
- do q = 1, nb
- r = (i - 1) + (q - 1)*4 + nb*4 + v_size* &
- ((j + buff_size) + (m + 2*buff_size + 1)* &
- ((k + buff_size) + (n + 2*buff_size + 1)* &
- (l + buff_size)))
- mv(j, k, l + unpack_offset, i - sys_size, q) = q_cons_buff_recv(r)
- end do
- end do
- end do
- end do
- end do
- end if
- #:endif
- end if
- #:endfor
- call nvtxEndRange
-
-#endif
-
- end subroutine s_mpi_sendrecv_variables_buffers
-
- !> The goal of this procedure is to populate the buffers of
- !! the cell-average conservative variables by communicating
- !! with the neighboring processors.
- subroutine s_mpi_sendrecv_ib_buffers(ib_markers, gp_layers)
-
- type(integer_field), intent(inout) :: ib_markers
- integer, intent(in) :: gp_layers
-
- integer :: i, j, k, l, r !< Generic loop iterators
- integer, pointer, dimension(:) :: p_i_send, p_i_recv
-
-#ifdef MFC_MPI
-
- if (n > 0) then
- if (p > 0) then
- @:ALLOCATE(ib_buff_send(0:-1 + gp_layers * &
- & (m + 2*gp_layers + 1)* &
- & (n + 2*gp_layers + 1)* &
- & (p + 2*gp_layers + 1)/ &
- & (min(m, n, p) + 2*gp_layers + 1)))
- else
- @:ALLOCATE(ib_buff_send(0:-1 + gp_layers* &
- & (max(m, n) + 2*gp_layers + 1)))
+ #:block ITERATE_OVER_BUFFER_REGION_SIDED(dir="mpi_dir", loc="pbc_loc", inner_loops=[("i", 1, "nVars")], pack_v_size="nVars")
+ if (bc_id_sfs(mpi_dir, pbc_loc)%sf(exlhs, eylhs, ezlhs)%type >= 0) then
+ c_divs_vf(i)%sf(x, y, z) = c_divs_buff_recv(pack_idr)
end if
- else
- @:ALLOCATE(ib_buff_send(0:-1 + gp_layers))
- end if
- @:ALLOCATE(ib_buff_recv(0:ubound(ib_buff_send, 1)))
-
- !nCalls_time = nCalls_time + 1
-
- ! MPI Communication in x-direction =================================
- if (bc_x%beg >= 0) then ! PBC at the beginning
-
- if (bc_x%end >= 0) then ! PBC at the beginning and end
-
- ! Packing buffer to be sent to bc_x%end
- !$acc parallel loop collapse(3) gang vector default(present) private(r)
- do l = 0, p
- do k = 0, n
- do j = m - gp_layers + 1, m
- r = ((j - m - 1) + gp_layers*((k + 1) + (n + 1)*l))
- ib_buff_send(r) = ib_markers%sf(j, k, l)
- end do
- end do
- end do
-
- !call MPI_Barrier(MPI_COMM_WORLD, ierr)
-
-#if defined(MFC_OpenACC)
- if (rdma_mpi) then
- p_i_send => ib_buff_send
- p_i_recv => ib_buff_recv
-
- !$acc data attach(p_i_send, p_i_recv)
- !$acc host_data use_device(p_i_send, p_i_recv)
-
- ! Send/receive buffer to/from bc_x%end/bc_x%beg
- call MPI_SENDRECV( &
- p_i_send(0), &
- gp_layers*(n + 1)*(p + 1), &
- MPI_INTEGER, bc_x%end, 0, &
- p_i_recv(0), &
- gp_layers*(n + 1)*(p + 1), &
- MPI_INTEGER, bc_x%beg, 0, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
- !$acc end host_data
- !$acc end data
- !$acc wait
- else
-#endif
-
- !$acc update host(ib_buff_send, ib_buff_send)
-
- ! Send/receive buffer to/from bc_x%end/bc_x%beg
- call MPI_SENDRECV( &
- ib_buff_send(0), &
- gp_layers*(n + 1)*(p + 1), &
- MPI_INTEGER, bc_x%end, 0, &
- ib_buff_recv(0), &
- gp_layers*(n + 1)*(p + 1), &
- MPI_INTEGER, bc_x%beg, 0, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-#if defined(MFC_OpenACC)
- end if
-#endif
-
- else ! PBC at the beginning only
-
- ! Packing buffer to be sent to bc_x%beg
- !$acc parallel loop collapse(3) gang vector default(present) private(r)
- do l = 0, p
- do k = 0, n
- do j = 0, gp_layers - 1
- r = (j + gp_layers*(k + (n + 1)*l))
- ib_buff_send(r) = ib_markers%sf(j, k, l)
- end do
- end do
- end do
-
- !call MPI_Barrier(MPI_COMM_WORLD, ierr)
-
-#if defined(MFC_OpenACC)
- if (rdma_mpi) then
- p_i_send => ib_buff_send
- p_i_recv => ib_buff_recv
-
- !$acc data attach(p_i_send, p_i_recv)
- !$acc host_data use_device(p_i_send, p_i_recv)
-
- ! Send/receive buffer to/from bc_x%end/bc_x%beg
- call MPI_SENDRECV( &
- p_i_send(0), &
- gp_layers*(n + 1)*(p + 1), &
- MPI_INTEGER, bc_x%beg, 1, &
- p_i_recv(0), &
- gp_layers*(n + 1)*(p + 1), &
- MPI_INTEGER, bc_x%beg, 0, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
- !$acc end host_data
- !$acc end data
- !$acc wait
- else
-#endif
- !$acc update host(ib_buff_send)
-
- ! Send/receive buffer to/from bc_x%end/bc_x%beg
- call MPI_SENDRECV( &
- ib_buff_send(0), &
- gp_layers*(n + 1)*(p + 1), &
- MPI_INTEGER, bc_x%beg, 1, &
- ib_buff_recv(0), &
- gp_layers*(n + 1)*(p + 1), &
- MPI_INTEGER, bc_x%beg, 0, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-#if defined(MFC_OpenACC)
- end if
-#endif
-
- end if
-
-#if defined(MFC_OpenACC)
- if (rdma_mpi .eqv. .false.) then
- !$acc update device(ib_buff_recv)
- end if
-#endif
-
- ! Unpacking buffer received from bc_x%beg
- !$acc parallel loop collapse(3) gang vector default(present) private(r)
- do l = 0, p
- do k = 0, n
- do j = -gp_layers, -1
- r = (j + gp_layers*((k + 1) + (n + 1)*l))
- ib_markers%sf(j, k, l) = ib_buff_recv(r)
- end do
- end do
- end do
-
- end if
-
- if (bc_x%end >= 0) then ! PBC at the end
-
- if (bc_x%beg >= 0) then ! PBC at the end and beginning
-
- !$acc parallel loop collapse(3) gang vector default(present) private(r)
- ! Packing buffer to be sent to bc_x%beg
- do l = 0, p
- do k = 0, n
- do j = 0, gp_layers - 1
- r = (j + gp_layers*(k + (n + 1)*l))
- ib_buff_send(r) = ib_markers%sf(j, k, l)
- end do
- end do
- end do
-
- !call MPI_Barrier(MPI_COMM_WORLD, ierr)
-
-#if defined(MFC_OpenACC)
- if (rdma_mpi) then
- p_i_send => ib_buff_send
- p_i_recv => ib_buff_recv
-
- !$acc data attach(p_i_send, p_i_recv)
- !$acc host_data use_device(p_i_send, p_i_recv)
-
- ! Send/receive buffer to/from bc_x%end/bc_x%beg
- call MPI_SENDRECV( &
- p_i_send(0), &
- gp_layers*(n + 1)*(p + 1), &
- MPI_INTEGER, bc_x%beg, 1, &
- p_i_recv(0), &
- gp_layers*(n + 1)*(p + 1), &
- MPI_INTEGER, bc_x%end, 1, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
- !$acc end host_data
- !$acc end data
- !$acc wait
- else
-#endif
-
- !$acc update host(ib_buff_send)
- call MPI_SENDRECV( &
- ib_buff_send(0), &
- gp_layers*(n + 1)*(p + 1), &
- MPI_INTEGER, bc_x%beg, 1, &
- ib_buff_recv(0), &
- gp_layers*(n + 1)*(p + 1), &
- MPI_INTEGER, bc_x%end, 1, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-#if defined(MFC_OpenACC)
- end if
-#endif
-
- else ! PBC at the end only
-
- ! Packing buffer to be sent to bc_x%end
- !$acc parallel loop collapse(3) gang vector default(present) private(r)
- do l = 0, p
- do k = 0, n
- do j = m - gp_layers + 1, m
- r = ((j - m - 1) + gp_layers*((k + 1) + (n + 1)*l))
- ib_buff_send(r) = ib_markers%sf(j, k, l)
- end do
- end do
- end do
-
- !call MPI_Barrier(MPI_COMM_WORLD, ierr)
-
-#if defined(MFC_OpenACC)
- if (rdma_mpi) then
- p_i_send => ib_buff_send
- p_i_recv => ib_buff_recv
-
- !$acc data attach(p_i_send, p_i_recv)
- !$acc host_data use_device(p_i_send, p_i_recv)
-
- ! Send/receive buffer to/from bc_x%end/bc_x%beg
- call MPI_SENDRECV( &
- p_i_send(0), &
- gp_layers*(n + 1)*(p + 1), &
- MPI_INTEGER, bc_x%end, 0, &
- p_i_recv(0), &
- gp_layers*(n + 1)*(p + 1), &
- MPI_INTEGER, bc_x%end, 1, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
- !$acc end host_data
- !$acc end data
- !$acc wait
- else
-#endif
-
- !$acc update host(ib_buff_send)
-
- call MPI_SENDRECV( &
- ib_buff_send(0), &
- gp_layers*(n + 1)*(p + 1), &
- MPI_INTEGER, bc_x%end, 0, &
- ib_buff_recv(0), &
- gp_layers*(n + 1)*(p + 1), &
- MPI_INTEGER, bc_x%end, 1, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-#if defined(MFC_OpenACC)
- end if
-#endif
-
- end if
-
- if (rdma_mpi .eqv. .false.) then
- !$acc update device(ib_buff_recv)
- end if
-
- ! Unpacking buffer received from bc_x%end
- !$acc parallel loop collapse(3) gang vector default(present) private(r)
- do l = 0, p
- do k = 0, n
- do j = m + 1, m + gp_layers
- r = ((j - m - 1) + gp_layers*(k + (n + 1)*l))
- ib_markers%sf(j, k, l) = ib_buff_recv(r)
- end do
- end do
- end do
-
- end if
- ! END: MPI Communication in x-direction ============================
-
- ! MPI Communication in y-direction =================================
-
- if (bc_y%beg >= 0) then ! PBC at the beginning
-
- if (bc_y%end >= 0) then ! PBC at the beginning and end
-
- ! Packing buffer to be sent to bc_y%end
- !$acc parallel loop collapse(3) gang vector default(present) private(r)
- do l = 0, p
- do k = n - gp_layers + 1, n
- do j = -gp_layers, m + gp_layers
- r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
- ((k - n + gp_layers - 1) + gp_layers*l))
- ib_buff_send(r) = ib_markers%sf(j, k, l)
- end do
- end do
- end do
-
- !call MPI_Barrier(MPI_COMM_WORLD, ierr)
-
-#if defined(MFC_OpenACC)
- if (rdma_mpi) then
- p_i_send => ib_buff_send
- p_i_recv => ib_buff_recv
-
- !$acc data attach(p_i_send, p_i_recv)
- !$acc host_data use_device(p_i_send, p_i_recv)
-
- ! Send/receive buffer to/from bc_x%end/bc_x%beg
- call MPI_SENDRECV( &
- p_i_send(0), &
- gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
- MPI_INTEGER, bc_y%end, 0, &
- p_i_recv(0), &
- gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
- MPI_INTEGER, bc_y%beg, 0, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
- !$acc end host_data
- !$acc end data
- !$acc wait
- else
-#endif
-
- !$acc update host(ib_buff_send)
-
- ! Send/receive buffer to/from bc_x%end/bc_x%beg
- call MPI_SENDRECV( &
- ib_buff_send(0), &
- gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
- MPI_INTEGER, bc_y%end, 0, &
- ib_buff_recv(0), &
- gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
- MPI_INTEGER, bc_y%beg, 0, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-#if defined(MFC_OpenACC)
- end if
-#endif
-
- else ! PBC at the beginning only
-
- ! Packing buffer to be sent to bc_y%beg
- !$acc parallel loop collapse(3) gang vector default(present) private(r)
- do l = 0, p
- do k = 0, gp_layers - 1
- do j = -gp_layers, m + gp_layers
- r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
- (k + gp_layers*l))
- ib_buff_send(r) = ib_markers%sf(j, k, l)
- end do
- end do
- end do
-
- !call MPI_Barrier(MPI_COMM_WORLD, ierr)
-
-#if defined(MFC_OpenACC)
- if (rdma_mpi) then
- p_i_send => ib_buff_send
- p_i_recv => ib_buff_recv
-
- !$acc data attach(p_i_send, p_i_recv)
- !$acc host_data use_device(p_i_send, p_i_recv)
-
- ! Send/receive buffer to/from bc_x%end/bc_x%beg
- call MPI_SENDRECV( &
- p_i_send(0), &
- gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
- MPI_INTEGER, bc_y%beg, 1, &
- p_i_recv(0), &
- gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
- MPI_INTEGER, bc_y%beg, 0, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
- !$acc end host_data
- !$acc end data
- !$acc wait
- else
-#endif
-
- !$acc update host(ib_buff_send)
-
- ! Send/receive buffer to/from bc_x%end/bc_x%beg
- call MPI_SENDRECV( &
- ib_buff_send(0), &
- gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
- MPI_INTEGER, bc_y%beg, 1, &
- ib_buff_recv(0), &
- gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
- MPI_INTEGER, bc_y%beg, 0, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-#if defined(MFC_OpenACC)
- end if
-#endif
-
- end if
-
-#if defined(MFC_OpenACC)
- if (rdma_mpi .eqv. .false.) then
- !$acc update device(ib_buff_recv)
- end if
-#endif
-
- ! Unpacking buffer received from bc_y%beg
- !$acc parallel loop collapse(3) gang vector default(present) private(r)
- do l = 0, p
- do k = -gp_layers, -1
- do j = -gp_layers, m + gp_layers
- r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
- ((k + gp_layers) + gp_layers*l))
- ib_markers%sf(j, k, l) = ib_buff_recv(r)
- end do
- end do
- end do
-
- end if
-
- if (bc_y%end >= 0) then ! PBC at the end
-
- if (bc_y%beg >= 0) then ! PBC at the end and beginning
-
- ! Packing buffer to be sent to bc_y%beg
- !$acc parallel loop collapse(3) gang vector default(present) private(r)
- do l = 0, p
- do k = 0, gp_layers - 1
- do j = -gp_layers, m + gp_layers
- r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
- (k + gp_layers*l))
- ib_buff_send(r) = ib_markers%sf(j, k, l)
- end do
- end do
- end do
-
- !call MPI_Barrier(MPI_COMM_WORLD, ierr)
-
-#if defined(MFC_OpenACC)
- if (rdma_mpi) then
- p_i_send => ib_buff_send
- p_i_recv => ib_buff_recv
-
- !$acc data attach(p_i_send, p_i_recv)
- !$acc host_data use_device(p_i_send, p_i_recv)
-
- ! Send/receive buffer to/from bc_x%end/bc_x%beg
- call MPI_SENDRECV( &
- p_i_send(0), &
- gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
- MPI_INTEGER, bc_y%beg, 1, &
- p_i_recv(0), &
- gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
- MPI_INTEGER, bc_y%end, 1, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
- !$acc end host_data
- !$acc end data
- !$acc wait
- else
-#endif
-
- !$acc update host(ib_buff_send)
-
- ! Send/receive buffer to/from bc_x%end/bc_x%beg
- call MPI_SENDRECV( &
- ib_buff_send(0), &
- gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
- MPI_INTEGER, bc_y%beg, 1, &
- ib_buff_recv(0), &
- gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
- MPI_INTEGER, bc_y%end, 1, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-#if defined(MFC_OpenACC)
- end if
-#endif
-
- else ! PBC at the end only
-
- ! Packing buffer to be sent to bc_y%end
- !$acc parallel loop collapse(3) gang vector default(present) private(r)
- do l = 0, p
- do k = n - gp_layers + 1, n
- do j = -gp_layers, m + gp_layers
- r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
- ((k - n + gp_layers - 1) + gp_layers*l))
- ib_buff_send(r) = ib_markers%sf(j, k, l)
- end do
- end do
- end do
-
- !call MPI_Barrier(MPI_COMM_WORLD, ierr)
-
-#if defined(MFC_OpenACC)
- if (rdma_mpi) then
- p_i_send => ib_buff_send
- p_i_recv => ib_buff_recv
-
- !$acc data attach(p_i_send, p_i_recv)
- !$acc host_data use_device(p_i_send, p_i_recv)
-
- ! Send/receive buffer to/from bc_x%end/bc_x%beg
- call MPI_SENDRECV( &
- p_i_send(0), &
- gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
- MPI_INTEGER, bc_y%end, 0, &
- p_i_recv(0), &
- gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
- MPI_INTEGER, bc_y%end, 1, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
- !$acc end host_data
- !$acc end data
- !$acc wait
- else
-#endif
-
- !$acc update host(ib_buff_send)
-
- ! Send/receive buffer to/from bc_x%end/bc_x%beg
- call MPI_SENDRECV( &
- ib_buff_send(0), &
- gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
- MPI_INTEGER, bc_y%end, 0, &
- ib_buff_recv(0), &
- gp_layers*(m + 2*gp_layers + 1)*(p + 1), &
- MPI_INTEGER, bc_y%end, 1, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-#if defined(MFC_OpenACC)
- end if
-#endif
-
- end if
-
-#if defined(MFC_OpenACC)
- if (rdma_mpi .eqv. .false.) then
- !$acc update device(ib_buff_recv)
- end if
-#endif
-
- ! Unpacking buffer received form bc_y%end
- !$acc parallel loop collapse(3) gang vector default(present) private(r)
- do l = 0, p
- do k = n + 1, n + gp_layers
- do j = -gp_layers, m + gp_layers
- r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
- ((k - n - 1) + gp_layers*l))
- ib_markers%sf(j, k, l) = ib_buff_recv(r)
- end do
- end do
- end do
-
- end if
- ! END: MPI Communication in y-direction ============================
-
- ! MPI Communication in z-direction =================================
- if (bc_z%beg >= 0) then ! PBC at the beginning
-
- if (bc_z%end >= 0) then ! PBC at the beginning and end
-
- ! Packing buffer to be sent to bc_z%end
- !$acc parallel loop collapse(3) gang vector default(present) private(r)
- do l = p - gp_layers + 1, p
- do k = -gp_layers, n + gp_layers
- do j = -gp_layers, m + gp_layers
- r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
- ((k + gp_layers) + (n + 2*gp_layers + 1)* &
- (l - p + gp_layers - 1)))
- ib_buff_send(r) = ib_markers%sf(j, k, l)
- end do
- end do
- end do
-
- !call MPI_Barrier(MPI_COMM_WORLD, ierr)
-
-#if defined(MFC_OpenACC)
- if (rdma_mpi) then
- p_i_send => ib_buff_send
- p_i_recv => ib_buff_recv
-
- !$acc data attach(p_i_send, p_i_recv)
- !$acc host_data use_device(p_i_send, p_i_recv)
-
- ! Send/receive buffer to/from bc_x%end/bc_x%beg
- call MPI_SENDRECV( &
- p_i_send(0), &
- gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
- MPI_INTEGER, bc_z%end, 0, &
- p_i_recv(0), &
- gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
- MPI_INTEGER, bc_z%beg, 0, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
- !$acc end host_data
- !$acc end data
- !$acc wait
- else
-#endif
-
- !$acc update host(ib_buff_send)
-
- ! Send/receive buffer to/from bc_x%end/bc_x%beg
- call MPI_SENDRECV( &
- ib_buff_send(0), &
- gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
- MPI_INTEGER, bc_z%end, 0, &
- ib_buff_recv(0), &
- gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
- MPI_INTEGER, bc_z%beg, 0, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-#if defined(MFC_OpenACC)
- end if
-#endif
-
- else ! PBC at the beginning only
-
- ! Packing buffer to be sent to bc_z%beg
- !$acc parallel loop collapse(3) gang vector default(present) private(r)
- do l = 0, gp_layers - 1
- do k = -gp_layers, n + gp_layers
- do j = -gp_layers, m + gp_layers
- r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
- ((k + gp_layers) + (n + 2*gp_layers + 1)*l))
- ib_buff_send(r) = ib_markers%sf(j, k, l)
- end do
- end do
- end do
-
- !call MPI_Barrier(MPI_COMM_WORLD, ierr)
-
-#if defined(MFC_OpenACC)
- if (rdma_mpi) then
- p_i_send => ib_buff_send
- p_i_recv => ib_buff_recv
-
- !$acc data attach(p_i_send, p_i_recv)
- !$acc host_data use_device(p_i_send, p_i_recv)
-
- ! Send/receive buffer to/from bc_x%end/bc_x%beg
- call MPI_SENDRECV( &
- p_i_send(0), &
- gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
- MPI_INTEGER, bc_z%beg, 1, &
- p_i_recv(0), &
- gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
- MPI_INTEGER, bc_z%beg, 0, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
- !$acc end host_data
- !$acc end data
- !$acc wait
- else
-#endif
-
- !$acc update host(ib_buff_send)
-
- ! Send/receive buffer to/from bc_x%end/bc_x%beg
- call MPI_SENDRECV( &
- ib_buff_send(0), &
- gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
- MPI_INTEGER, bc_z%beg, 1, &
- ib_buff_recv(0), &
- gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
- MPI_INTEGER, bc_z%beg, 0, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-#if defined(MFC_OpenACC)
- end if
-#endif
-
- end if
-
-#if defined(MFC_OpenACC)
- if (rdma_mpi .eqv. .false.) then
- !$acc update device(ib_buff_recv)
- end if
-#endif
-
- ! Unpacking buffer from bc_z%beg
- !$acc parallel loop collapse(3) gang vector default(present) private(r)
- do l = -gp_layers, -1
- do k = -gp_layers, n + gp_layers
- do j = -gp_layers, m + gp_layers
- r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
- ((k + gp_layers) + (n + 2*gp_layers + 1)* &
- (l + gp_layers)))
- ib_markers%sf(j, k, l) = ib_buff_recv(r)
- end do
- end do
- end do
-
- end if
-
- if (bc_z%end >= 0) then ! PBC at the end
-
- if (bc_z%beg >= 0) then ! PBC at the end and beginning
-
- ! Packing buffer to be sent to bc_z%beg
- !$acc parallel loop collapse(3) gang vector default(present) private(r)
- do l = 0, gp_layers - 1
- do k = -gp_layers, n + gp_layers
- do j = -gp_layers, m + gp_layers
- r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
- ((k + gp_layers) + (n + 2*gp_layers + 1)*l))
- ib_buff_send(r) = ib_markers%sf(j, k, l)
- end do
- end do
- end do
-
- !call MPI_Barrier(MPI_COMM_WORLD, ierr)
-
-#if defined(MFC_OpenACC)
- if (rdma_mpi) then
- p_i_send => ib_buff_send
- p_i_recv => ib_buff_recv
-
- !$acc data attach(p_i_send, p_i_recv)
- !$acc host_data use_device(p_i_send, p_i_recv)
-
- ! Send/receive buffer to/from bc_x%end/bc_x%beg
- call MPI_SENDRECV( &
- p_i_send(0), &
- gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
- MPI_INTEGER, bc_z%beg, 1, &
- p_i_recv(0), &
- gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
- MPI_INTEGER, bc_z%end, 1, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
- !$acc end host_data
- !$acc end data
- !$acc wait
- else
-#endif
- !$acc update host(ib_buff_send)
-
- ! Send/receive buffer to/from bc_x%end/bc_x%beg
- call MPI_SENDRECV( &
- ib_buff_send(0), &
- gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
- MPI_INTEGER, bc_z%beg, 1, &
- ib_buff_recv(0), &
- gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
- MPI_INTEGER, bc_z%end, 1, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-#if defined(MFC_OpenACC)
- end if
-#endif
-
- else ! PBC at the end only
-
- ! Packing buffer to be sent to bc_z%end
- !$acc parallel loop collapse(3) gang vector default(present) private(r)
- do l = p - gp_layers + 1, p
- do k = -gp_layers, n + gp_layers
- do j = -gp_layers, m + gp_layers
- r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
- ((k + gp_layers) + (n + 2*gp_layers + 1)* &
- (l - p + gp_layers - 1)))
- ib_buff_send(r) = ib_markers%sf(j, k, l)
- end do
- end do
- end do
-
- !call MPI_Barrier(MPI_COMM_WORLD, ierr)
-
-#if defined(MFC_OpenACC)
- if (rdma_mpi) then
- p_i_send => ib_buff_send
- p_i_recv => ib_buff_recv
-
- !$acc data attach(p_i_send, p_i_recv)
- !$acc host_data use_device(p_i_send, p_i_recv)
-
- ! Send/receive buffer to/from bc_x%end/bc_x%beg
- call MPI_SENDRECV( &
- p_i_send(0), &
- gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
- MPI_INTEGER, bc_z%end, 0, &
- p_i_recv(0), &
- gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
- MPI_INTEGER, bc_z%end, 1, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
- !$acc end host_data
- !$acc end data
- !$acc wait
- else
-#endif
- !$acc update host(ib_buff_send)
-
- ! Send/receive buffer to/from bc_x%end/bc_x%beg
- call MPI_SENDRECV( &
- ib_buff_send(0), &
- gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
- MPI_INTEGER, bc_z%end, 0, &
- ib_buff_recv(0), &
- gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), &
- MPI_INTEGER, bc_z%end, 1, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
-#if defined(MFC_OpenACC)
- end if
-#endif
-
- end if
-
-#if defined(MFC_OpenACC)
- if (rdma_mpi .eqv. .false.) then
- !$acc update device(ib_buff_recv)
- end if
-#endif
-
- ! Unpacking buffer received from bc_z%end
- !$acc parallel loop collapse(3) gang vector default(present) private(r)
- do l = p + 1, p + gp_layers
- do k = -gp_layers, n + gp_layers
- do j = -gp_layers, m + gp_layers
- r = ((j + gp_layers) + (m + 2*gp_layers + 1)* &
- ((k + gp_layers) + (n + 2*gp_layers + 1)* &
- (l - p - 1)))
- ib_markers%sf(j, k, l) = ib_buff_recv(r)
- end do
- end do
- end do
-
- end if
-
- ! END: MPI Communication in z-direction ============================
-
-#endif
-
- end subroutine s_mpi_sendrecv_ib_buffers
-
- subroutine s_mpi_sendrecv_capilary_variables_buffers(c_divs_vf, mpi_dir, pbc_loc)
-
- type(scalar_field), dimension(num_dims + 1), intent(inout) :: c_divs_vf
- integer, intent(in) :: mpi_dir, pbc_loc
-
- integer :: i, j, k, l, r, q !< Generic loop iterators
-
- integer :: buffer_counts(1:3), buffer_count
-
- type(int_bounds_info) :: boundary_conditions(1:3)
- integer :: beg_end(1:2), grid_dims(1:3)
- integer :: dst_proc, src_proc, recv_tag, send_tag
-
- logical :: beg_end_geq_0
-
- integer :: pack_offset, unpack_offset
- real(kind(0d0)), pointer :: p_send, p_recv
-
-#ifdef MFC_MPI
-
- nVars = num_dims + 1
- !$acc update device(nVars)
-
- buffer_counts = (/ &
- buff_size*nVars*(n + 1)*(p + 1), &
- buff_size*nVars*(m + 2*buff_size + 1)*(p + 1), &
- buff_size*nVars*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) &
- /)
-
- buffer_count = buffer_counts(mpi_dir)
- boundary_conditions = (/bc_x, bc_y, bc_z/)
- beg_end = (/boundary_conditions(mpi_dir)%beg, boundary_conditions(mpi_dir)%end/)
- beg_end_geq_0 = beg_end(max(pbc_loc, 0) - pbc_loc + 1) >= 0
-
- ! Implements:
- ! pbc_loc bc_x >= 0 -> [send/recv]_tag [dst/src]_proc
- ! -1 (=0) 0 -> [1,0] [0,0] | 0 0 [1,0] [beg,beg]
- ! -1 (=0) 1 -> [0,0] [1,0] | 0 1 [0,0] [end,beg]
- ! +1 (=1) 0 -> [0,1] [1,1] | 1 0 [0,1] [end,end]
- ! +1 (=1) 1 -> [1,1] [0,1] | 1 1 [1,1] [beg,end]
-
- send_tag = f_logical_to_int(.not. f_xor(beg_end_geq_0, pbc_loc == 1))
- recv_tag = f_logical_to_int(pbc_loc == 1)
-
- dst_proc = beg_end(1 + f_logical_to_int(f_xor(pbc_loc == 1, beg_end_geq_0)))
- src_proc = beg_end(1 + f_logical_to_int(pbc_loc == 1))
-
- grid_dims = (/m, n, p/)
-
- pack_offset = 0
- if (f_xor(pbc_loc == 1, beg_end_geq_0)) then
- pack_offset = grid_dims(mpi_dir) - buff_size + 1
- end if
-
- unpack_offset = 0
- if (pbc_loc == 1) then
- unpack_offset = grid_dims(mpi_dir) + buff_size + 1
- end if
-
- ! Pack Buffer to Send
- #:for mpi_dir in [1, 2, 3]
- if (mpi_dir == ${mpi_dir}$) then
- #:if mpi_dir == 1
- !$acc parallel loop collapse(4) gang vector default(present) private(r)
- do l = 0, p
- do k = 0, n
- do j = 0, buff_size - 1
- do i = 1, nVars
- r = (i - 1) + nVars*(j + buff_size*(k + (n + 1)*l))
- c_divs_buff_send(r) = c_divs_vf(i)%sf(j + pack_offset, k, l)
- end do
- end do
- end do
- end do
-
- #:elif mpi_dir == 2
- !$acc parallel loop collapse(4) gang vector default(present) private(r)
- do i = 1, nVars
- do l = 0, p
- do k = 0, buff_size - 1
- do j = -buff_size, m + buff_size
- r = (i - 1) + nVars* &
- ((j + buff_size) + (m + 2*buff_size + 1)* &
- (k + buff_size*l))
- c_divs_buff_send(r) = c_divs_vf(i)%sf(j, k + pack_offset, l)
- end do
- end do
- end do
- end do
-
- #:else
- !$acc parallel loop collapse(4) gang vector default(present) private(r)
- do i = 1, nVars
- do l = 0, buff_size - 1
- do k = -buff_size, n + buff_size
- do j = -buff_size, m + buff_size
- r = (i - 1) + nVars* &
- ((j + buff_size) + (m + 2*buff_size + 1)* &
- ((k + buff_size) + (n + 2*buff_size + 1)*l))
- c_divs_buff_send(r) = c_divs_vf(i)%sf(j, k, l + pack_offset)
- end do
- end do
- end do
- end do
- #:endif
- end if
- #:endfor
-
- ! Send/Recv
- #:for rdma_mpi in [False, True]
- if (rdma_mpi .eqv. ${'.true.' if rdma_mpi else '.false.'}$) then
- p_send => c_divs_buff_send(0)
- p_recv => c_divs_buff_recv(0)
-
- #:if rdma_mpi
- !$acc data attach(p_send, p_recv)
- !$acc host_data use_device(p_send, p_recv)
- #:else
- !$acc update host(c_divs_buff_send)
- #:endif
-
- call MPI_SENDRECV( &
- p_send, buffer_count, MPI_DOUBLE_PRECISION, dst_proc, send_tag, &
- p_recv, buffer_count, MPI_DOUBLE_PRECISION, src_proc, recv_tag, &
- MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-
- #:if rdma_mpi
- !$acc end host_data
- !$acc end data
- !$acc wait
- #:else
- !$acc update device(c_divs_buff_recv)
- #:endif
- end if
- #:endfor
-
- ! Unpack Received Buffer
- #:for mpi_dir in [1, 2, 3]
- if (mpi_dir == ${mpi_dir}$) then
- #:if mpi_dir == 1
- !$acc parallel loop collapse(4) gang vector default(present) private(r)
- do l = 0, p
- do k = 0, n
- do j = -buff_size, -1
- do i = 1, nVars
- r = (i - 1) + nVars* &
- (j + buff_size*((k + 1) + (n + 1)*l))
- c_divs_vf(i)%sf(j + unpack_offset, k, l) = c_divs_buff_recv(r)
-#if defined(__INTEL_COMPILER)
- if (ieee_is_nan(c_divs_vf(i)%sf(j, k, l))) then
- print *, "Error", j, k, l, i
- error stop "NaN(s) in recv"
- end if
-#endif
- end do
- end do
- end do
- end do
-
- #:elif mpi_dir == 2
- !$acc parallel loop collapse(4) gang vector default(present) private(r)
- do i = 1, nVars
- do l = 0, p
- do k = -buff_size, -1
- do j = -buff_size, m + buff_size
- r = (i - 1) + nVars* &
- ((j + buff_size) + (m + 2*buff_size + 1)* &
- ((k + buff_size) + buff_size*l))
- c_divs_vf(i)%sf(j, k + unpack_offset, l) = c_divs_buff_recv(r)
-#if defined(__INTEL_COMPILER)
- if (ieee_is_nan(c_divs_vf(i)%sf(j, k, l))) then
- print *, "Error", j, k, l, i
- error stop "NaN(s) in recv"
- end if
-#endif
- end do
- end do
- end do
- end do
-
- #:else
- ! Unpacking buffer from bc_z%beg
- !$acc parallel loop collapse(4) gang vector default(present) private(r)
- do i = 1, nVars
- do l = -buff_size, -1
- do k = -buff_size, n + buff_size
- do j = -buff_size, m + buff_size
- r = (i - 1) + nVars* &
- ((j + buff_size) + (m + 2*buff_size + 1)* &
- ((k + buff_size) + (n + 2*buff_size + 1)* &
- (l + buff_size)))
- c_divs_vf(i)%sf(j, k, l + unpack_offset) = c_divs_buff_recv(r)
-#if defined(__INTEL_COMPILER)
- if (ieee_is_nan(c_divs_vf(i)%sf(j, k, l))) then
- print *, "Error", j, k, l, i
- error stop "NaN(s) in recv"
- end if
-#endif
- end do
- end do
- end do
- end do
-
- #:endif
- end if
- #:endfor
+ #:endblock
#endif
@@ -2363,8 +434,6 @@ contains
#ifdef MFC_MPI
- ! Deallocating q_cons_buff_send and q_cons_buff_recv
- @:DEALLOCATE(q_cons_buff_send, q_cons_buff_recv)
if (ib) then
@:DEALLOCATE(ib_buff_send, ib_buff_recv)
end if
diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp
index 389847dd75..1d6c20f3fb 100644
--- a/src/simulation/m_rhs.fpp
+++ b/src/simulation/m_rhs.fpp
@@ -670,7 +670,7 @@ contains
call nvtxEndRange
call nvtxStartRange("RHS-COMMUNICATION")
- call s_populate_variables_buffers(q_prim_qp%vf, pb, mv)
+ call s_populate_prim_buffers(q_prim_qp%vf, pb, mv)
call nvtxEndRange
if (cfl_dt) then
diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp
index 4ecb6127c0..cabc2f5de4 100644
--- a/src/simulation/m_start_up.fpp
+++ b/src/simulation/m_start_up.fpp
@@ -61,6 +61,8 @@ module m_start_up
use m_helper_basic !< Functions to compare floating point numbers
+ use m_boundary_conditions_common
+
#ifdef MFC_OpenACC
use openacc
#endif
@@ -78,6 +80,8 @@ module m_start_up
use m_surface_tension
use m_body_forces
+
+ use m_boundary_conditions
! ==========================================================================
implicit none
@@ -87,7 +91,7 @@ module m_start_up
s_read_data_files, &
s_read_serial_data_files, &
s_read_parallel_data_files, &
- s_populate_grid_variables_buffers, &
+ s_mpi_sendrecv_grid_spacing_buffers, &
s_initialize_internal_energy_equations, &
s_initialize_modules, s_initialize_gpu_vars, &
s_initialize_mpi_domain, s_finalize_modules, &
@@ -141,6 +145,7 @@ contains
teno_CT, mp_weno, weno_avg, &
riemann_solver, low_Mach, wave_speeds, avg_state, &
bc_x, bc_y, bc_z, &
+ num_bc_patches, patch_bc, &
x_domain, y_domain, z_domain, &
hypoelasticity, &
ib, num_ibs, patch_ib, &
@@ -252,6 +257,8 @@ contains
integer :: i, r !< Generic loop iterator
+ integer :: iter_dir, iter_loc
+
! Confirming that the directory from which the initial condition and
! the grid data files are to be read in exists and exiting otherwise
if (cfl_dt) then
@@ -269,6 +276,8 @@ contains
call s_mpi_abort(trim(file_path)//' is missing. Exiting ...')
end if
+ call s_read_boundary_condition_files(t_step_dir)
+
! Cell-boundary Locations in x-direction ===========================
file_path = trim(t_step_dir)//'/x_cb.dat'
@@ -502,6 +511,7 @@ contains
logical :: file_exist
character(len=10) :: t_step_start_string
+ character(len=path_len) :: step_dirpath
integer :: i, j
@@ -509,8 +519,10 @@ contains
allocate (y_cb_glb(-1:n_glb))
allocate (z_cb_glb(-1:p_glb))
+ step_dirpath = trim(case_dir)//'/restart_data'
+
! Read in cell boundary locations in x-direction
- file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//'x_cb.dat'
+ file_loc = trim(step_dirpath)//trim(mpiiofs)//'x_cb.dat'
inquire (FILE=trim(file_loc), EXIST=file_exist)
if (file_exist) then
@@ -541,7 +553,7 @@ contains
if (n > 0) then
! Read in cell boundary locations in y-direction
- file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//'y_cb.dat'
+ file_loc = trim(step_dirpath)//trim(mpiiofs)//'y_cb.dat'
inquire (FILE=trim(file_loc), EXIST=file_exist)
if (file_exist) then
@@ -584,6 +596,8 @@ contains
end if
end if
+ call s_read_boundary_condition_files(trim(step_dirpath))
+
if (file_per_process) then
if (cfl_dt) then
call s_int_to_str(n_start, t_step_start_string)
@@ -592,7 +606,7 @@ contains
call s_int_to_str(t_step_start, t_step_start_string)
write (file_loc, '(I0,A1,I7.7,A)') t_step_start, '_', proc_rank, '.dat'
end if
- file_loc = trim(case_dir)//'/restart_data/lustre_'//trim(t_step_start_string)//trim(mpiiofs)//trim(file_loc)
+ file_loc = trim(step_dirpath)//'/lustre_'//trim(t_step_start_string)//trim(mpiiofs)//trim(file_loc)
inquire (FILE=trim(file_loc), EXIST=file_exist)
if (file_exist) then
@@ -653,7 +667,7 @@ contains
if (ib) then
! Read IB Markers
write (file_loc, '(A)') 'ib.dat'
- file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc)
+ file_loc = trim(step_dirpath)//trim(mpiiofs)//trim(file_loc)
inquire (FILE=trim(file_loc), EXIST=file_exist)
if (file_exist) then
@@ -673,7 +687,7 @@ contains
! Read Levelset
write (file_loc, '(A)') 'levelset.dat'
- file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc)
+ file_loc = trim(step_dirpath)//trim(mpiiofs)//trim(file_loc)
inquire (FILE=trim(file_loc), EXIST=file_exist)
if (file_exist) then
@@ -693,7 +707,7 @@ contains
! Read Levelset Norm
write (file_loc, '(A)') 'levelset_norm.dat'
- file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc)
+ file_loc = trim(step_dirpath)//trim(mpiiofs)//trim(file_loc)
inquire (FILE=trim(file_loc), EXIST=file_exist)
if (file_exist) then
@@ -724,7 +738,7 @@ contains
else
write (file_loc, '(I0,A)') t_step_start, '.dat'
end if
- file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc)
+ file_loc = trim(step_dirpath)//trim(mpiiofs)//trim(file_loc)
inquire (FILE=trim(file_loc), EXIST=file_exist)
if (file_exist) then
@@ -803,7 +817,7 @@ contains
! Read IB Markers
write (file_loc, '(A)') 'ib.dat'
- file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc)
+ file_loc = trim(step_dirpath)//trim(mpiiofs)//trim(file_loc)
inquire (FILE=trim(file_loc), EXIST=file_exist)
if (file_exist) then
@@ -823,7 +837,7 @@ contains
! Read Levelset
write (file_loc, '(A)') 'levelset.dat'
- file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc)
+ file_loc = trim(step_dirpath)//trim(mpiiofs)//trim(file_loc)
inquire (FILE=trim(file_loc), EXIST=file_exist)
if (file_exist) then
@@ -834,7 +848,7 @@ contains
call MPI_FILE_SET_VIEW(ifile, disp, MPI_DOUBLE_PRECISION, MPI_IO_levelset_DATA%view, &
'native', mpi_info_int, ierr)
- call MPI_FILE_READ(ifile, MPI_IO_levelset_DATA%var%sf, data_size, &
+ call MPI_FILE_READ(ifile, MPI_IO_levelset_DATA%var%sf, data_size*num_ibs, &
MPI_DOUBLE_PRECISION, status, ierr)
else
@@ -843,7 +857,7 @@ contains
! Read Levelset Norm
write (file_loc, '(A)') 'levelset_norm.dat'
- file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc)
+ file_loc = trim(step_dirpath)//trim(mpiiofs)//trim(file_loc)
inquire (FILE=trim(file_loc), EXIST=file_exist)
if (file_exist) then
@@ -880,7 +894,7 @@ contains
allocate (airfoil_grid_l(1:Np))
write (file_loc, '(A)') 'airfoil_l.dat'
- file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc)
+ file_loc = trim(step_dirpath)//trim(mpiiofs)//trim(file_loc)
inquire (FILE=trim(file_loc), EXIST=file_exist)
if (file_exist) then
@@ -897,7 +911,7 @@ contains
end if
write (file_loc, '(A)') 'airfoil_u.dat'
- file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc)
+ file_loc = trim(step_dirpath)//trim(mpiiofs)//trim(file_loc)
inquire (FILE=trim(file_loc), EXIST=file_exist)
if (file_exist) then
@@ -932,218 +946,6 @@ contains
end subroutine s_read_parallel_data_files
- !> The purpose of this subroutine is to populate the buffers
- !! of the grid variables, which are constituted of the cell-
- !! boundary locations and cell-width distributions, based on
- !! the boundary conditions.
- subroutine s_populate_grid_variables_buffers
-
- integer :: i !< Generic loop iterator
-
- ! Population of Buffers in x-direction =============================
-
- ! Populating cell-width distribution buffer, at the beginning of the
- ! coordinate direction, based on the selected boundary condition. In
- ! order, these are the ghost-cell extrapolation, symmetry, periodic,
- ! and processor boundary conditions.
- if (bc_x%beg <= -3) then
- do i = 1, buff_size
- dx(-i) = dx(0)
- end do
- elseif (bc_x%beg == -2) then
- do i = 1, buff_size
- dx(-i) = dx(i - 1)
- end do
- elseif (bc_x%beg == -1) then
- do i = 1, buff_size
- dx(-i) = dx(m - (i - 1))
- end do
- else
- call s_mpi_sendrecv_grid_variables_buffers(1, -1)
- end if
-
- ! Computing the cell-boundary locations buffer, at the beginning of
- ! the coordinate direction, from the cell-width distribution buffer
- do i = 1, buff_size
- x_cb(-1 - i) = x_cb(-i) - dx(-i)
- end do
- ! Computing the cell-center locations buffer, at the beginning of
- ! the coordinate direction, from the cell-width distribution buffer
- do i = 1, buff_size
- x_cc(-i) = x_cc(1 - i) - (dx(1 - i) + dx(-i))/2d0
- end do
-
- ! Populating the cell-width distribution buffer, at the end of the
- ! coordinate direction, based on desired boundary condition. These
- ! include, in order, ghost-cell extrapolation, symmetry, periodic,
- ! and processor boundary conditions.
- if (bc_x%end <= -3) then
- do i = 1, buff_size
- dx(m + i) = dx(m)
- end do
- elseif (bc_x%end == -2) then
- do i = 1, buff_size
- dx(m + i) = dx(m - (i - 1))
- end do
- elseif (bc_x%end == -1) then
- do i = 1, buff_size
- dx(m + i) = dx(i - 1)
- end do
- else
- call s_mpi_sendrecv_grid_variables_buffers(1, 1)
- end if
-
- ! Populating the cell-boundary locations buffer, at the end of the
- ! coordinate direction, from buffer of the cell-width distribution
- do i = 1, buff_size
- x_cb(m + i) = x_cb(m + (i - 1)) + dx(m + i)
- end do
- ! Populating the cell-center locations buffer, at the end of the
- ! coordinate direction, from buffer of the cell-width distribution
- do i = 1, buff_size
- x_cc(m + i) = x_cc(m + (i - 1)) + (dx(m + (i - 1)) + dx(m + i))/2d0
- end do
-
- ! END: Population of Buffers in x-direction ========================
-
- ! Population of Buffers in y-direction =============================
-
- ! Populating cell-width distribution buffer, at the beginning of the
- ! coordinate direction, based on the selected boundary condition. In
- ! order, these are the ghost-cell extrapolation, symmetry, periodic,
- ! and processor boundary conditions.
- if (n == 0) then
- return
- elseif (bc_y%beg <= -3 .and. bc_y%beg /= -14) then
- do i = 1, buff_size
- dy(-i) = dy(0)
- end do
- elseif (bc_y%beg == -2 .or. bc_y%beg == -14) then
- do i = 1, buff_size
- dy(-i) = dy(i - 1)
- end do
- elseif (bc_y%beg == -1) then
- do i = 1, buff_size
- dy(-i) = dy(n - (i - 1))
- end do
- else
- call s_mpi_sendrecv_grid_variables_buffers(2, -1)
- end if
-
- ! Computing the cell-boundary locations buffer, at the beginning of
- ! the coordinate direction, from the cell-width distribution buffer
- do i = 1, buff_size
- y_cb(-1 - i) = y_cb(-i) - dy(-i)
- end do
- ! Computing the cell-center locations buffer, at the beginning of
- ! the coordinate direction, from the cell-width distribution buffer
- do i = 1, buff_size
- y_cc(-i) = y_cc(1 - i) - (dy(1 - i) + dy(-i))/2d0
- end do
-
- ! Populating the cell-width distribution buffer, at the end of the
- ! coordinate direction, based on desired boundary condition. These
- ! include, in order, ghost-cell extrapolation, symmetry, periodic,
- ! and processor boundary conditions.
- if (bc_y%end <= -3) then
- do i = 1, buff_size
- dy(n + i) = dy(n)
- end do
- elseif (bc_y%end == -2) then
- do i = 1, buff_size
- dy(n + i) = dy(n - (i - 1))
- end do
- elseif (bc_y%end == -1) then
- do i = 1, buff_size
- dy(n + i) = dy(i - 1)
- end do
- else
- call s_mpi_sendrecv_grid_variables_buffers(2, 1)
- end if
-
- ! Populating the cell-boundary locations buffer, at the end of the
- ! coordinate direction, from buffer of the cell-width distribution
- do i = 1, buff_size
- y_cb(n + i) = y_cb(n + (i - 1)) + dy(n + i)
- end do
- ! Populating the cell-center locations buffer, at the end of the
- ! coordinate direction, from buffer of the cell-width distribution
- do i = 1, buff_size
- y_cc(n + i) = y_cc(n + (i - 1)) + (dy(n + (i - 1)) + dy(n + i))/2d0
- end do
-
- ! END: Population of Buffers in y-direction ========================
-
- ! Population of Buffers in z-direction =============================
-
- ! Populating cell-width distribution buffer, at the beginning of the
- ! coordinate direction, based on the selected boundary condition. In
- ! order, these are the ghost-cell extrapolation, symmetry, periodic,
- ! and processor boundary conditions.
- if (p == 0) then
- return
- elseif (bc_z%beg <= -3) then
- do i = 1, buff_size
- dz(-i) = dz(0)
- end do
- elseif (bc_z%beg == -2) then
- do i = 1, buff_size
- dz(-i) = dz(i - 1)
- end do
- elseif (bc_z%beg == -1) then
- do i = 1, buff_size
- dz(-i) = dz(p - (i - 1))
- end do
- else
- call s_mpi_sendrecv_grid_variables_buffers(3, -1)
- end if
-
- ! Computing the cell-boundary locations buffer, at the beginning of
- ! the coordinate direction, from the cell-width distribution buffer
- do i = 1, buff_size
- z_cb(-1 - i) = z_cb(-i) - dz(-i)
- end do
- ! Computing the cell-center locations buffer, at the beginning of
- ! the coordinate direction, from the cell-width distribution buffer
- do i = 1, buff_size
- z_cc(-i) = z_cc(1 - i) - (dz(1 - i) + dz(-i))/2d0
- end do
-
- ! Populating the cell-width distribution buffer, at the end of the
- ! coordinate direction, based on desired boundary condition. These
- ! include, in order, ghost-cell extrapolation, symmetry, periodic,
- ! and processor boundary conditions.
- if (bc_z%end <= -3) then
- do i = 1, buff_size
- dz(p + i) = dz(p)
- end do
- elseif (bc_z%end == -2) then
- do i = 1, buff_size
- dz(p + i) = dz(p - (i - 1))
- end do
- elseif (bc_z%end == -1) then
- do i = 1, buff_size
- dz(p + i) = dz(i - 1)
- end do
- else
- call s_mpi_sendrecv_grid_variables_buffers(3, 1)
- end if
-
- ! Populating the cell-boundary locations buffer, at the end of the
- ! coordinate direction, from buffer of the cell-width distribution
- do i = 1, buff_size
- z_cb(p + i) = z_cb(p + (i - 1)) + dz(p + i)
- end do
- ! Populating the cell-center locations buffer, at the end of the
- ! coordinate direction, from buffer of the cell-width distribution
- do i = 1, buff_size
- z_cc(p + i) = z_cc(p + (i - 1)) + (dz(p + (i - 1)) + dz(p + i))/2d0
- end do
-
- ! END: Population of Buffers in z-direction ========================
-
- end subroutine s_populate_grid_variables_buffers
-
!> The purpose of this procedure is to initialize the
!! values of the internal-energy equations of each phase
!! from the mass of each phase, the mixture momentum and
@@ -1398,6 +1200,7 @@ contains
subroutine s_initialize_modules
call s_initialize_global_parameters_module()
+
!Quadrature weights and nodes for polydisperse simulations
if (bubbles .and. nb > 1 .and. R0_type == 1) then
call s_simpson
@@ -1417,6 +1220,7 @@ contains
call acc_present_dump()
#endif
+ call s_initialize_mpi_common_module()
call s_initialize_mpi_proxy_module()
call s_initialize_variables_conversion_module()
if (grid_geometry == 3) call s_initialize_fftw_module()
@@ -1458,6 +1262,8 @@ contains
call acc_present_dump()
#endif
+ call s_initialize_boundary_conditions_module()
+
! Reading in the user provided initial condition and grid data
call s_read_data_files(q_cons_ts(1)%vf)
@@ -1467,7 +1273,7 @@ contains
if (acoustic_source) call s_precalculate_acoustic_spatial_sources()
! Populating the buffers of the grid variables using the boundary conditions
- call s_populate_grid_variables_buffers()
+ call s_mpi_sendrecv_grid_spacing_buffers(bc_id_sfs)
! Computation of parameters, allocation of memory, association of pointers,
! and/or execution of any other tasks that are needed to properly configure
@@ -1573,11 +1379,10 @@ contains
!$acc update device(sigma, surface_tension)
!$acc update device(dx, dy, dz, x_cb, x_cc, y_cb, y_cc, z_cb, z_cc)
-
- !$acc update device(bc_x%vb1, bc_x%vb2, bc_x%vb3, bc_x%ve1, bc_x%ve2, bc_x%ve3)
- !$acc update device(bc_y%vb1, bc_y%vb2, bc_y%vb3, bc_y%ve1, bc_y%ve2, bc_y%ve3)
- !$acc update device(bc_z%vb1, bc_z%vb2, bc_z%vb3, bc_z%ve1, bc_z%ve2, bc_z%ve3)
+ !$acc update device(bc_x%vel_beg, bc_x%vel_end)
+ !$acc update device(bc_y%vel_beg, bc_y%vel_end)
+ !$acc update device(bc_z%vel_beg, bc_z%vel_end)
!$acc update device(bc_x%grcbc_in, bc_x%grcbc_out, bc_x%grcbc_vel_out)
!$acc update device(bc_y%grcbc_in, bc_y%grcbc_out, bc_y%grcbc_vel_out)
!$acc update device(bc_z%grcbc_in, bc_z%grcbc_out, bc_z%grcbc_vel_out)
@@ -1605,6 +1410,7 @@ contains
call s_finalize_variables_conversion_module()
if (grid_geometry == 3) call s_finalize_fftw_module
call s_finalize_mpi_proxy_module()
+ call s_finalize_mpi_common_module()
call s_finalize_global_parameters_module()
if (relax) call s_finalize_relaxation_solver_module()
if (viscous) then
diff --git a/src/simulation/m_viscous.fpp b/src/simulation/m_viscous.fpp
index afe909ca52..75d0b91d14 100644
--- a/src/simulation/m_viscous.fpp
+++ b/src/simulation/m_viscous.fpp
@@ -16,6 +16,8 @@ module m_viscous
use m_helper
use m_finite_differences
+
+ use m_boundary_conditions_common
! ==========================================================================
private; public s_get_viscous, &
@@ -1303,7 +1305,8 @@ contains
type(scalar_field), intent(inout) :: grad_z
type(int_bounds_info) :: ix, iy, iz
- integer :: j, k, l !< Generic loop iterators
+ @:BOUNDARY_CONDITION_INTEGER_DECLARATIONS()
+ integer :: iter_loc
ix%beg = 1 - buff_size; ix%end = m + buff_size - 1
if (n > 0) then
@@ -1397,66 +1400,31 @@ contains
end if
end if
- if (bc_x%beg <= -3) then
- !$acc parallel loop collapse(2) gang vector default(present)
- do l = idwbuff(3)%beg, idwbuff(3)%end
- do k = idwbuff(2)%beg, idwbuff(2)%end
- grad_x%sf(0, k, l) = (-3d0*var%sf(0, k, l) + 4d0*var%sf(1, k, l) - var%sf(2, k, l))/ &
- (x_cc(2) - x_cc(0))
- end do
- end do
- end if
- if (bc_x%end <= -3) then
- !$acc parallel loop collapse(2) gang vector default(present)
- do l = idwbuff(3)%beg, idwbuff(3)%end
- do k = idwbuff(2)%beg, idwbuff(2)%end
- grad_x%sf(m, k, l) = (3d0*var%sf(m, k, l) - 4d0*var%sf(m - 1, k, l) + var%sf(m - 2, k, l))/ &
- (x_cc(m) - x_cc(m - 2))
- end do
- end do
- end if
- if (n > 0) then
- if (bc_y%beg <= -3 .and. bc_y%beg /= -13) then
- !$acc parallel loop collapse(2) gang vector default(present)
- do l = idwbuff(3)%beg, idwbuff(3)%end
- do j = idwbuff(1)%beg, idwbuff(1)%end
- grad_y%sf(j, 0, l) = (-3d0*var%sf(j, 0, l) + 4d0*var%sf(j, 1, l) - var%sf(j, 2, l))/ &
- (y_cc(2) - y_cc(0))
- end do
- end do
- end if
- if (bc_y%end <= -3) then
- !$acc parallel loop collapse(2) gang vector default(present)
- do l = idwbuff(3)%beg, idwbuff(3)%end
- do j = idwbuff(1)%beg, idwbuff(1)%end
- grad_y%sf(j, n, l) = (3d0*var%sf(j, n, l) - 4d0*var%sf(j, n - 1, l) + var%sf(j, n - 2, l))/ &
- (y_cc(n) - y_cc(n - 2))
- end do
- end do
+ #:for dir, cmp in zip([1, 2, 3], ['x', 'y', 'z'])
+ if (${dir}$ <= num_dims .and. any(bc_id_has_bc(${dir}$, -1, :-3))) then
+ #:block ITERATE_OVER_BUFFER_REGION_SIDED(dir=dir, loc="-1", thickness=1, pack_v_size=1)
+ if (bc_id_sfs(${dir}$, -1)%sf(exlhs, eylhs, ezlhs)%type <= -13 .and. bc_id_sfs(${dir}$, -1)%sf(exlhs, eylhs, ezlhs)%type /= -13) then
+ grad_${cmp}$%sf(ex, ey, ez) = &
+ (iter_loc*3d0*var%sf(ex, ey, ez) - &
+ iter_loc*4d0*var%sf(ex - locx, ey - locy, ez - locz) + &
+ iter_loc*var%sf(ex - 2*locx, ey - 2*locy, ez - 2*locz))/ &
+ (${cmp}$_cc(2) - ${cmp}$_cc(0))
+ end if
+ #:endblock
end if
- if (p > 0) then
- if (bc_z%beg <= -3) then
- !$acc parallel loop collapse(2) gang vector default(present)
- do k = idwbuff(2)%beg, idwbuff(2)%end
- do j = idwbuff(1)%beg, idwbuff(1)%end
- grad_z%sf(j, k, 0) = &
- (-3d0*var%sf(j, k, 0) + 4d0*var%sf(j, k, 1) - var%sf(j, k, 2))/ &
- (z_cc(2) - z_cc(0))
- end do
- end do
- end if
- if (bc_z%end <= -3) then
- !$acc parallel loop collapse(2) gang vector default(present)
- do k = idwbuff(2)%beg, idwbuff(2)%end
- do j = idwbuff(1)%beg, idwbuff(1)%end
- grad_z%sf(j, k, p) = &
- (3d0*var%sf(j, k, p) - 4d0*var%sf(j, k, p - 1) + var%sf(j, k, p - 2))/ &
- (z_cc(p) - z_cc(p - 2))
- end do
- end do
- end if
+
+ if (${dir}$ <= num_dims .and. any(bc_id_has_bc(${dir}$, +1, :-3))) then
+ #:block ITERATE_OVER_BUFFER_REGION_SIDED(dir=dir, loc="+1", thickness=1, pack_v_size=1)
+ if (bc_id_sfs(${dir}$, +1)%sf(exlhs, eylhs, ezlhs)%type <= -13 .and. bc_id_sfs(${dir}$, -1)%sf(exlhs, eylhs, ezlhs)%type /= -13) then
+ grad_${cmp}$%sf(ex, ey, ez) = &
+ (-iter_loc*3d0*var%sf(ex, ey, ez) + &
+ iter_loc*4d0*var%sf(ex - locx, ey - locy, ez - locz) - &
+ iter_loc*var%sf(ex - 2*locx, ey - 2*locy, ez - 2*locz))/ &
+ (${cmp}$_cc(e${cmp}$) - ${cmp}$_cc(e${cmp}$-2))
+ end if
+ #:endblock
end if
- end if
+ #:endfor
end subroutine s_compute_fd_gradient
diff --git a/src/simulation/m_weno.fpp b/src/simulation/m_weno.fpp
index 38f659a8e9..962cab30cb 100644
--- a/src/simulation/m_weno.fpp
+++ b/src/simulation/m_weno.fpp
@@ -96,10 +96,6 @@ module m_weno
! END: WENO Coefficients ===================================================
- integer :: v_size !< Number of WENO-reconstructed cell-average variables
-
- !$acc declare create(v_size)
-
!> @name Indical bounds in the s1-, s2- and s3-directions
!> @{
type(int_bounds_info) :: is1_weno, is2_weno, is3_weno
diff --git a/toolchain/mfc/args.py b/toolchain/mfc/args.py
index cad27120a0..275fa295fe 100644
--- a/toolchain/mfc/args.py
+++ b/toolchain/mfc/args.py
@@ -82,6 +82,8 @@ def add_common_arguments(p, mask = None):
test.add_argument("-m", "--max-attempts", type=int, default=1, help="Maximum number of attempts to run a test.")
test.add_argument( "--no-build", action="store_true", default=False, help="(Testing) Do not rebuild MFC.")
test.add_argument("--case-optimization", action="store_true", default=False, help="(GPU Optimization) Compile MFC targets with some case parameters hard-coded.")
+ test.add_argument( "--dry-run", action="store_true", default=False, help="Build and generate case files but do not run tests.")
+
test_meg = test.add_mutually_exclusive_group()
test_meg.add_argument("--generate", action="store_true", default=False, help="(Test Generation) Generate golden files.")
test_meg.add_argument("--add-new-variables", action="store_true", default=False, help="(Test Generation) If new variables are found in D/ when running tests, add them to the golden files.")
diff --git a/toolchain/mfc/run/case_dicts.py b/toolchain/mfc/run/case_dicts.py
index 5749bd1727..c091edbf6e 100644
--- a/toolchain/mfc/run/case_dicts.py
+++ b/toolchain/mfc/run/case_dicts.py
@@ -55,8 +55,28 @@ def analytic(self):
'cfl_const_dt': ParamType.LOG,
'chemistry': ParamType.LOG,
'cantera_file': ParamType.STR,
+ 'num_bc_patches': ParamType.INT,
}
+for i in range(100):
+ COMMON[f"patch_bc({i+1})%type"] = ParamType.INT
+ COMMON[f"patch_bc({i+1})%geometry"] = ParamType.INT
+ COMMON[f"patch_bc({i+1})%dir"] = ParamType.INT
+ COMMON[f"patch_bc({i+1})%loc"] = ParamType.INT
+ COMMON[f"patch_bc({i+1})%radius"] = ParamType.REAL
+
+ for j in range(1, 3+1):
+ COMMON[f"patch_bc({i+1})%centroid({j})"] = ParamType.REAL
+ COMMON[f"patch_bc({i+1})%length({j})"] = ParamType.REAL
+ COMMON[f"patch_bc({i+1})%vel({j})"] = ParamType.REAL
+
+for cmp in ["x", "y", "z"]:
+ for loc in ["beg", "end"]:
+ COMMON[f'bc_{cmp}%{loc}'] = ParamType.INT
+
+ for dir in range(1, 3+1):
+ COMMON[f'bc_{cmp}%vel_{loc}({dir})'] = ParamType.REAL
+
PRE_PROCESS = COMMON.copy()
PRE_PROCESS.update({
'old_grid': ParamType.LOG,
@@ -249,14 +269,6 @@ def analytic(self):
SIMULATION[f'patch_ib({ib_id})%length_{cmp}'] = ParamType.REAL
for cmp in ["x", "y", "z"]:
- SIMULATION[f'bc_{cmp}%beg'] = ParamType.INT
- SIMULATION[f'bc_{cmp}%end'] = ParamType.INT
- SIMULATION[f'bc_{cmp}%vb1'] = ParamType.REAL
- SIMULATION[f'bc_{cmp}%vb2'] = ParamType.REAL
- SIMULATION[f'bc_{cmp}%vb3'] = ParamType.REAL
- SIMULATION[f'bc_{cmp}%ve1'] = ParamType.REAL
- SIMULATION[f'bc_{cmp}%ve2'] = ParamType.REAL
- SIMULATION[f'bc_{cmp}%ve3'] = ParamType.REAL
SIMULATION[f'bc_{cmp}%pres_in'] = ParamType.REAL
SIMULATION[f'bc_{cmp}%pres_out'] = ParamType.REAL
SIMULATION[f'bc_{cmp}%grcbc_in'] = ParamType.LOG
@@ -275,7 +287,6 @@ def analytic(self):
SIMULATION[f'{var}_{cmp}'] = ParamType.REAL
SIMULATION[f'bf_{cmp}'] = ParamType.LOG
-
for prepend in ["domain%beg", "domain%end"]:
SIMULATION[f"{cmp}_{prepend}"] = ParamType.REAL
diff --git a/toolchain/mfc/test/test.py b/toolchain/mfc/test/test.py
index beedb639d4..0b84f83227 100644
--- a/toolchain/mfc/test/test.py
+++ b/toolchain/mfc/test/test.py
@@ -1,5 +1,5 @@
import os, typing, shutil, time, itertools
-from random import sample
+from random import sample, seed
import rich, rich.table
@@ -55,6 +55,7 @@ def __filter(cases_) -> typing.List[TestCase]:
if ARG("percent") == 100:
return cases
+ seed(time.time())
return sample(cases, k=int(len(cases)*ARG("percent")/100.0))
@@ -140,6 +141,10 @@ def _handle_case(case: TestCase, devices: typing.Set[int]):
case.delete_output()
case.create_directory()
+ if ARG("dry_run"):
+ cons.print(f" [bold magenta]{case.get_uuid()}[/bold magenta] SKIP {case.trace}")
+ return
+
cmd = case.run([PRE_PROCESS, SIMULATION], gpus=devices)
out_filepath = os.path.join(case.get_dirpath(), "out_pre_sim.txt")
@@ -224,7 +229,10 @@ def handle_case(case: TestCase, devices: typing.Set[int]):
try:
_handle_case(case, devices)
- nPASS += 1
+ if ARG("dry_run"):
+ nSKIP += 1
+ else:
+ nPASS += 1
except Exception as exc:
if nAttempts < ARG("max_attempts"):
cons.print(f"[bold yellow] Attempt {nAttempts}: Failed test {case.get_uuid()}. Retrying...[/bold yellow]")