From c393a6071df1bd16bf24c50fe4fd04085ab59422 Mon Sep 17 00:00:00 2001 From: Henry LE BERRE Date: Tue, 15 Oct 2024 00:20:19 -0400 Subject: [PATCH] (Atomic) Boundary Condition Patches --- .github/workflows/frontier/build.sh | 2 +- .github/workflows/phoenix/test.sh | 2 +- .github/workflows/test.yml | 2 +- CMakeLists.txt | 2 +- docs/documentation/case.md | 43 +- examples/1D_inert_shocktube/export.py | 30 + examples/1D_reactive_shocktube/export.py | 30 + examples/hypotests/case.py | 130 + src/common/include/macros.fpp | 158 ++ src/common/m_boundary_conditions_common.fpp | 484 ++++ src/common/m_checker_common.fpp | 40 +- src/common/m_constants.fpp | 4 + src/common/m_derived_types.fpp | 33 +- src/common/m_global_parameters_common.fpp | 27 + src/common/m_helper.fpp | 30 +- src/common/m_mpi_common.fpp | 787 ++++++ src/{simulation => common}/m_nvtx.f90 | 0 src/common/m_variables_conversion.fpp | 4 +- .../{m_data_input.f90 => m_data_input.fpp} | 648 +---- src/post_process/m_derived_variables.fpp | 1 + src/post_process/m_global_parameters.fpp | 24 +- src/post_process/m_mpi_proxy.fpp | 1208 ---------- src/post_process/m_start_up.f90 | 26 +- src/pre_process/m_compute_levelset.fpp | 12 +- src/pre_process/m_data_output.fpp | 9 + src/pre_process/m_global_parameters.fpp | 24 +- src/pre_process/m_initial_condition.fpp | 6 +- src/pre_process/m_mpi_proxy.fpp | 436 +--- src/pre_process/m_start_up.fpp | 13 +- src/simulation/m_boundary_conditions.fpp | 1633 +------------ src/simulation/m_global_parameters.fpp | 26 +- src/simulation/m_ibm.fpp | 14 +- src/simulation/m_mpi_proxy.fpp | 2143 +---------------- src/simulation/m_rhs.fpp | 2 +- src/simulation/m_start_up.fpp | 268 +-- src/simulation/m_viscous.fpp | 86 +- src/simulation/m_weno.fpp | 4 - toolchain/mfc/args.py | 2 + toolchain/mfc/run/case_dicts.py | 29 +- toolchain/mfc/test/test.py | 12 +- 40 files changed, 2115 insertions(+), 6319 deletions(-) create mode 100644 examples/1D_inert_shocktube/export.py create mode 100644 examples/1D_reactive_shocktube/export.py create mode 100644 examples/hypotests/case.py create mode 100644 src/common/m_boundary_conditions_common.fpp create mode 100644 src/common/m_global_parameters_common.fpp rename src/{simulation => common}/m_nvtx.f90 (100%) rename src/post_process/{m_data_input.f90 => m_data_input.fpp} (53%) diff --git a/.github/workflows/frontier/build.sh b/.github/workflows/frontier/build.sh index a6a51b65f..65d93d537 100644 --- a/.github/workflows/frontier/build.sh +++ b/.github/workflows/frontier/build.sh @@ -1,4 +1,4 @@ #!/bin/bash . ./mfc.sh load -c f -m g -./mfc.sh build -j 8 --gpu +./mfc.sh test --dry-run -j 8 --gpu diff --git a/.github/workflows/phoenix/test.sh b/.github/workflows/phoenix/test.sh index 5cdc57e78..e89af4721 100644 --- a/.github/workflows/phoenix/test.sh +++ b/.github/workflows/phoenix/test.sh @@ -5,7 +5,7 @@ if [ "$job_device" == "gpu" ]; then build_opts="--gpu" fi -./mfc.sh build -j 8 $build_opts +./mfc.sh test --dry-run -j 8 $build_opts n_test_threads=8 diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 1be80f5c7..b312197e0 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -84,7 +84,7 @@ jobs: - name: Build run: | if [ '${{ matrix.intel }}' == 'true' ]; then . /opt/intel/oneapi/setvars.sh; fi - /bin/bash mfc.sh build -j $(nproc) --${{ matrix.debug }} --${{ matrix.mpi }} + /bin/bash mfc.sh test --dry-run -j $(nproc) --${{ matrix.debug }} --${{ matrix.mpi }} - name: Test run: | diff --git a/CMakeLists.txt b/CMakeLists.txt index de84d7915..8a8e19120 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -335,7 +335,7 @@ macro(HANDLE_SOURCES target useCommon) add_custom_command( OUTPUT ${f90} - COMMAND ${FYPP_EXE} -m re + COMMAND ${FYPP_EXE} -m re -m itertools -I "${CMAKE_BINARY_DIR}/include/${target}" -I "${${target}_DIR}/include" -I "${common_DIR}/include" diff --git a/docs/documentation/case.md b/docs/documentation/case.md index d9461453e..86f9afb46 100644 --- a/docs/documentation/case.md +++ b/docs/documentation/case.md @@ -337,8 +337,8 @@ Details of implementation of viscosity in MFC can be found in [Coralic (2015)](r | Parameter | Type | Description | | ---: | :----: | :--- | | `bc_[x,y,z]%%beg[end]` | Integer | Beginning [ending] boundary condition in the $[x,y,z]$-direction (negative integer, see table [Boundary Conditions](#boundary-conditions)) | -| `bc_[x,y,z]%%vb[1,2,3]`‡| Real | Velocity in the (x,1), (y, 2), (z,3) direction applied to `bc_[x,y,z]%%beg` | -| `bc_[x,y,z]%%ve[1,2,3]`‡| Real | Velocity in the (x,1), (y, 2), (z,3) direction applied to `bc_[x,y,z]%%end` | +| `bc_[x,y,z]%%vel_beg[1,2,3]`‡| Real | Velocity in the (x,1), (y, 2), (z,3) direction applied to `bc_[x,y,z]%%beg` | +| `bc_[x,y,z]%%vel_end[1,2,3]`‡| Real | Velocity in the (x,1), (y, 2), (z,3) direction applied to `bc_[x,y,z]%%end` | | `model_eqns` | Integer | Multicomponent model: [1] $\Gamma/\Pi_\infty$; [2] 5-equation; [3] 6-equation; [4] 4-equation | | `alt_soundspeed` * | Logical | Alternate sound speed and $K \nabla \cdot u$ for 5-equation model | | `adv_n` | Logical | Solving directly for the number density (in the method of classes) and compute void fraction from the number density | @@ -463,7 +463,7 @@ The value of `dt` needs to be sufficiently small to satisfy the Courant-Friedric To newly start the simulation, set `t_step_start = 0`. To restart the simulation from $k$-th time step, set `t_step_start = k`; see [Restarting Cases](running.md#restarting-cases). -##### Adaptive Time-Stepping +##### Adaptive Time-Stepping (optional) - `cfl_adap_dt` enables adaptive time stepping with a constant CFL when true @@ -480,6 +480,36 @@ To restart the simulation from $k$-th time step, set `t_step_start = k`; see [Re To newly start the simulation, set `n_start = 0`. To restart the simulation from $k$-th time step, see [Restarting Cases](running.md#restarting-cases). +##### Boundary Condition Patches (optional and experimental) + +> [!WARNING] +> This feature is currently experimental and may not produce physicall correct +> results when certain other features are turned on. + +Boundary condition patches allow you to define boundary conditions with more granularity +than `bc_[x,y,z]`. When using this feature, any point along the edge of the domain can +be assigned its own boundary condition type. Since the boundaries of a 3D domain are +2D surfaces, the concept of patches is re-introduced. + +Boundary conditions are applied using the [Painter's algorithm](https://en.wikipedia.org/wiki/Painter%27s_algorithm) +where patches with higher indices take priority, in layers. The lowest priority is given +to `bc_[x,y,z]`. This feature is opt-in and enabled by assigning `num_bc_patches` to a +positive value. + +| Parameter | Type | Description | +| ---: | :----: | :--- | +| `num_bc_patches` | Integer | Number of boundary condition patches (default 0) | +| `%%type` * | Integer | Boundary condition type (negative integer, see table [Boundary Conditions](#boundary-conditions)) | +| `%%dir` * | Integer | About the [1] x; [2] y; [3] z; axis | +| `%%loc` * | Integer | About the line or surface at the [-1] beginning; [+2] end; of the `%%dir` axis. | +| `%%geometry` * | Integer | The geometry of the boundary condition patch. See table [Boundary Condition Patch Geometry Types](#boundary-condition-patch-geometry-types) | +| `%%vel(i)` * | Real | Meaning depends on the boundary condition | +| `%%centroid(i)` * | Real | Meaning depends on the patch geometry. Index $i = \text{dir}$ is always ignored | +| `%%length(i)` * | Real | Meaning depends on the patch geometry. Index $i = \text{dir}$ is always ignored | +| `%%radius` * | Real | Meaning depends on the patch geometry. Index $i = \text{dir}$ is always ignored | + +*: These parameters should be prepended with `patch_bc(j)%` where $j$ is the boundary condition patch index. + ### 7. Formatted Output | Parameter | Type | Description | @@ -862,6 +892,13 @@ This includes types exclusive to one-, two-, and three-dimensional problems. The patch type number (`#`) corresponds to the input value in `input.py` labeled `patch_icpp(j)%%geometry` where $j$ is the patch index. Each patch requires a different set of parameters, which are also listed in this table. +### Boundary Condition Patch Geometry Types + +| # | Name | Dim. | Requirements | +| ---: | :----: | :--- | :--- | +| 1 | Cuboid | 1 & 2 | `%%centroid(1:3)` and `%%length(1:3)` | +| 2 | Spheroid | 1 & 2 | `%%centroid(1:3)` and `%%radius` | + ### Immersed Boundary Patch Types | # | Name | Dim. | diff --git a/examples/1D_inert_shocktube/export.py b/examples/1D_inert_shocktube/export.py new file mode 100644 index 000000000..e676b6ced --- /dev/null +++ b/examples/1D_inert_shocktube/export.py @@ -0,0 +1,30 @@ +import csv +import numpy as np +import statistics +from tqdm import tqdm + +import mfc.viz +from case import dt, sol_L as sol + + +case = mfc.viz.Case(".", dt) + +for name in tqdm(sol.species_names, desc="Loading Variables"): + case.load_variable(f"Y_{name}", f"prim.{5 + sol.species_index(name)}") +case.load_variable("rho", "prim.1") +case.load_variable("u", "prim.2") +case.load_variable("p", "prim.3") +case.load_variable("T", "prim.15") + +steps = case.get_timesteps() + +for step in [min(steps), max(steps)]: + t = step * dt + + with open(f"mfc-{step}.csv", "w") as f: + writer = csv.writer(f) + keys = ['x'] + list(set(case.get_data()[0].keys()) - set(["x"])) + writer.writerow(keys) + for ix, x in enumerate(sorted(case.get_coords()[0])): + row = [case.get_data()[step][key][ix] for key in keys] + writer.writerow(row) diff --git a/examples/1D_reactive_shocktube/export.py b/examples/1D_reactive_shocktube/export.py new file mode 100644 index 000000000..e676b6ced --- /dev/null +++ b/examples/1D_reactive_shocktube/export.py @@ -0,0 +1,30 @@ +import csv +import numpy as np +import statistics +from tqdm import tqdm + +import mfc.viz +from case import dt, sol_L as sol + + +case = mfc.viz.Case(".", dt) + +for name in tqdm(sol.species_names, desc="Loading Variables"): + case.load_variable(f"Y_{name}", f"prim.{5 + sol.species_index(name)}") +case.load_variable("rho", "prim.1") +case.load_variable("u", "prim.2") +case.load_variable("p", "prim.3") +case.load_variable("T", "prim.15") + +steps = case.get_timesteps() + +for step in [min(steps), max(steps)]: + t = step * dt + + with open(f"mfc-{step}.csv", "w") as f: + writer = csv.writer(f) + keys = ['x'] + list(set(case.get_data()[0].keys()) - set(["x"])) + writer.writerow(keys) + for ix, x in enumerate(sorted(case.get_coords()[0])): + row = [case.get_data()[step][key][ix] for key in keys] + writer.writerow(row) diff --git a/examples/hypotests/case.py b/examples/hypotests/case.py new file mode 100644 index 000000000..badaa3915 --- /dev/null +++ b/examples/hypotests/case.py @@ -0,0 +1,130 @@ +#!/usr/bin/env python3 +import math +import json + +# Numerical setup +Nx = 201 # Number of grid points in x +Ny = 201 # Number of grid points in y +dx = 1./(1.*(Nx+1)) # Grid spacing in x +dy = 1./(1.*(Ny+1)) # Grid spacing in y + +Tend = 64E-06 # End time +Nt = 2000 # 2000 # Number of time steps +mydt = Tend/(1.*Nt) # Time step size + +# Configuring case dictionary +print(json.dumps({ + # Logistics ================================================ + 'run_time_info' : 'F', + # ========================================================== + + # Computational Domain Parameters ========================== + 'x_domain%beg' : 0.E+00, # x start + 'x_domain%end' : 2.E+00, # x end + 'y_domain%beg' : 0.E+00, # y start + 'y_domain%end' : 1.E+00, # y end + 'm' : Nx, # Number of grid points in x direction + 'n' : Ny, # Number of grid points in y direction + 'p' : 0, # Number of grid points in z (for 3D, change this) + 'dt' : 1e-6, # Time step size + 't_step_start' : 0, # Start time + 't_step_stop' : Nt, # End time + 't_step_save' : 500, # Save frequency + # ========================================================== + + # Simulation Algorithm Parameters ========================== + 'num_patches' : 1, # Two patches + 'model_eqns' : 2, # Number of model equations + 'alt_soundspeed' : 'F', + 'num_fluids' : 2, + 'low_Mach' : 0, + 'mpp_lim' : 'F', + # ' mixture_err' : 'F', + 'time_stepper' : 3, + 'weno_order' : 5, + 'weno_eps' : 1.E-16, + 'weno_Re_flux' : 'F', + 'weno_avg' : 'F', + 'mapped_weno' : 'F', + 'null_weights' : 'F', + 'mp_weno' : 'F', + 'riemann_solver' : 1, + 'wave_speeds' : 1, + 'avg_state' : 2, + 'bc_x%beg' : -3, + 'bc_x%end' : -3, + 'bc_y%beg' : -3, # Boundary conditions for y direction + 'bc_y%end' : -3, + 'num_bc_patches' : 1, + 'patch_bc(1)%type' : -17, + 'patch_bc(1)%dir' : 1, + 'patch_bc(1)%loc' : -1, + 'patch_bc(1)%geometry' : 1, + 'patch_bc(1)%centroid(1)' : 0, + 'patch_bc(1)%centroid(2)' : 0.5, + 'patch_bc(1)%length(2)' : 0.26, + 'patch_bc(1)%vel(1)' : 10, + 'patch_bc(1)%vel(2)' : 0, + # ========================================================== + + # Turning on IB ================================ + 'ib' : 'T', + 'num_ibs' : 2, + + # ========================================================== + + # Formatted Database Files Structure Parameters ============ + 'format' : 1, + 'precision' : 2, + 'prim_vars_wrt' :'T', + 'parallel_io' :'T', + # ========================================================== + + # Patch 1 (background flow) =================== + 'patch_icpp(1)%geometry' : 3, # 2D geometry + 'patch_icpp(1)%x_centroid' : 1.0, # x-center + 'patch_icpp(1)%y_centroid' : 0.5, # y-center + 'patch_icpp(1)%length_x' : 2.0, # x-length + 'patch_icpp(1)%length_y' : 1.0, # y-length + 'patch_icpp(1)%vel(1)' : 0.0, + 'patch_icpp(1)%vel(2)' : 0.0, # y-velocity '100*sin(3*x*pi)' + 'patch_icpp(1)%pres' : 1.E5, # Pressure + 'patch_icpp(1)%alpha_rho(1)' : 1000, # Density + 'patch_icpp(1)%alpha_rho(2)' : 0., + 'patch_icpp(1)%alpha(1)' : 1, + 'patch_icpp(1)%alpha(2)' : 0., + 'patch_icpp(1)%tau_e(1)' : 0.0, + + + # ========================================================== + + # Patch 2 (hypo material in the center) ================ + 'patch_ib(1)%geometry' : 3, # 2D geometry + # 'patch_ib(1)%hcid' : 201, + 'patch_ib(1)%x_centroid' : 0.5, # x-center + 'patch_ib(1)%y_centroid' : 0.65, # y-center + 'patch_ib(1)%length_x' : 1.0, # x-length + 'patch_ib(1)%length_y' : 0.04, # y-length + 'patch_ib(1)%slip' : 'T', + + # ========================================================== + + # Patch 3 (hypo material in the center) ================ + 'patch_ib(2)%geometry' : 3, # 2D geometry + # 'patch_ib(1)%hcid' : 201, + 'patch_ib(2)%x_centroid' : 0.5, # x-center + 'patch_ib(2)%y_centroid' : 0.35, # y-center + 'patch_ib(2)%length_x' : 1.0, # x-length + 'patch_ib(2)%length_y' : 0.04, # y-length + 'patch_ib(2)%slip' : 'T', + + # Fluids Physical Parameters =============================== + 'fluid_pp(1)%gamma' : 1.E+00/(6.12E+00-1.E+00), + 'fluid_pp(1)%pi_inf' : 6.12E+00*3.43E+08/(6.12E+00 - 1.E+00), + # 'fluid_pp(1)%G' : 0, + 'fluid_pp(2)%gamma' : 1.E+00/(1.3E+00-1.E+00), + 'fluid_pp(2)%pi_inf' : 1.3E+00*2.E+08/(1.3E+00 - 1.E+00), + # 'fluid_pp(2)%G' : 2.7E+05/(2.E+00*(1.E+00 + 0.4E+00)), + 'fluid_pp(2)%G' : 1.E7, + # ========================================================== +})) diff --git a/src/common/include/macros.fpp b/src/common/include/macros.fpp index c1f447a26..055ba3a5f 100644 --- a/src/common/include/macros.fpp +++ b/src/common/include/macros.fpp @@ -102,3 +102,161 @@ //${message or '"No error description."'}$) end if #:enddef + +#:def MAKE_LOOPS(impl, loops, gpu = None) + #:if gpu != False + !$acc parallel loop collapse(${len(loops)}$) gang vector default(present) + #:endif + #:for index, lbound, hbound in (loops or []) + do ${index}$ = ${lbound}$, ${hbound}$ + #:endfor + + $:impl + + #:for i in range(len(loops or [])) + end do + #:endfor + #:enddef + + #:def ITERATE_OVER_BUFFER_REGION_SIDED(impl, dir = None, loc = None, thickness = None, gpu = None, outer_loops = None, inner_loops = None, pack_v_size = None) + #:set thickness = thickness or 'buff_size' + #:set outer_loops = outer_loops or [] + #:set inner_loops = inner_loops or [] + #:set pack_v_size = pack_v_size or 'v_size' + + if (${dir}$ <= num_dims) then + + if ((${dir}$) == +1 .and. (${loc}$) == -1) then + + #:block MAKE_LOOPS(loops = outer_loops + [("l", 0, "p"), ("k", 0, "n"), ("j", 1, thickness)] + inner_loops, gpu=gpu) + x = -j; y = k; z = l ! Regular + sx = j - 1; sy = k; sz = l ! Symmetry + px = m - (j - 1); py = k; pz = l ! Periodic + ex = 0; ey = k; ez = l ! Extrapolation + locx = -1; locy = 0; locz = 0 + + exlhs = ex; eylhs = ey; ezlhs = ez + exlhs = f_clamp(exlhs, 0, m); eylhs = f_clamp(eylhs, 0, n); ezlhs = f_clamp(ezlhs, 0, p) + +#ifndef MFC_PRE_PROCESS + pack_idr = (i - 1) + ${pack_v_size}$*((j - 1) + ${thickness}$*(k + (n + 1)*l)) +#endif + + $:impl + #:endblock + + else if ((${dir}$) == +1 .and. (${loc}$) == +1) then + + #:block MAKE_LOOPS(loops = outer_loops + [("l", 0, "p"), ("k", 0, "n"), ("j", 1, thickness)] + inner_loops, gpu=gpu) + x = m + j; y = k; z = l ! Regular + sx = m - (j - 1); sy = k; sz = l ! Symmetry + px = j - 1; py = k; pz = l ! Periodic + ex = m; ey = k; ez = l ! Extrapolation + locx = +1; locy = 0; locz = 0 + + exlhs = 0; eylhs = ey; ezlhs = ez + exlhs = f_clamp(exlhs, 0, m); eylhs = f_clamp(eylhs, 0, n); ezlhs = f_clamp(ezlhs, 0, p) + +#ifndef MFC_PRE_PROCESS + pack_idr = (i - 1) + ${pack_v_size}$*((j - 1) + ${thickness}$*(k + (n + 1)*l)) +#endif + + $:impl + #:endblock + + else if ((${dir}$) == +2 .and. (${loc}$) == -1) then + + #:block MAKE_LOOPS(loops = outer_loops + [("k", 0, "p"), ("j", 1, thickness), ("l", f"-({thickness})", f"m + ({thickness})")] + inner_loops, gpu=gpu) + x = l; y = -j; z = k ! Regular + sx = l; sy = j - 1; sz = k ! Symmetry + px = l; py = n - (j - 1); pz = k ! Periodic + ex = l; ey = 0; ez = k ! Extrapolation + locx = 0; locy = -1; locz = 0 + + exlhs = ex; eylhs = ey; ezlhs = ez + exlhs = f_clamp(exlhs, 0, m); eylhs = f_clamp(eylhs, 0, n); ezlhs = f_clamp(ezlhs, 0, p) + +#ifndef MFC_PRE_PROCESS + pack_idr = (i - 1) + ${pack_v_size}$*((l + ${thickness}$) + (m + 2*${thickness}$+1)*(j - 1 + ${thickness}$*k)) +#endif + + $:impl + #:endblock + + else if ((${dir}$) == +2 .and. (${loc}$) == +1) then + + #:block MAKE_LOOPS(loops = outer_loops + [("k", 0, "p"), ("j", 1, thickness), ("l", f"-({thickness})", f"m + ({thickness})")] + inner_loops, gpu=gpu) + x = l; y = n + j; z = k ! Regular + sx = l; sy = n - (j - 1); sz = k ! Symmetry + px = l; py = j - 1; pz = k ! Periodic + ex = l; ey = n; ez = k ! Extrapolation + locx = 0; locy = +1; locz = 0 + + exlhs = ex; eylhs = 0; ezlhs = ez + exlhs = f_clamp(exlhs, 0, m); eylhs = f_clamp(eylhs, 0, n); ezlhs = f_clamp(ezlhs, 0, p) + +#ifndef MFC_PRE_PROCESS + pack_idr = (i - 1) + ${pack_v_size}$*((l + ${thickness}$) + (m + 2*${thickness}$+1)*(j - 1 + ${thickness}$*k)) +#endif + + $:impl + #:endblock + + else if ((${dir}$) == +3 .and. (${loc}$) == -1) then + + #:block MAKE_LOOPS(loops = outer_loops + [("j", 1, thickness), ("l", f"-({thickness})", f"n + ({thickness})"), ("k", f"-({thickness})", f"m + ({thickness})")] + inner_loops, gpu=gpu) + x = k; y = l; z = -j ! Regular + sx = k; sy = l; sz = j - 1 ! Symmetry + px = k; py = l; pz = p - (j - 1) ! Periodic + ex = k; ey = l; ez = 0 ! Extrapolation + locx = 0; locy = 0; locz = -1 + + exlhs = ex; eylhs = ey; ezlhs = ez + exlhs = f_clamp(exlhs, 0, m); eylhs = f_clamp(eylhs, 0, n); ezlhs = f_clamp(ezlhs, 0, p) + +#ifndef MFC_PRE_PROCESS + pack_idr = (i - 1) + ${pack_v_size}$*((k + ${thickness}$) + (m + 2*${thickness}$+1)*((l + ${thickness}$) + (n + 2*${thickness}$+1)*(j - 1))) +#endif + + $:impl + #:endblock + + else if ((${dir}$) == +3 .and. (${loc}$) == +1) then + + #:block MAKE_LOOPS(loops = outer_loops + [("j", 1, thickness), ("l", f"-({thickness})", f"n + ({thickness})"), ("k", f"-({thickness})", f"m + ({thickness})")] + inner_loops, gpu=gpu) + x = k; y = l; z = p + j ! Regular + sx = k; sy = l; sz = p - (j - 1) ! Symmetry + px = k; py = l; pz = j - 1 ! Periodic + ex = k; ey = l; ez = p ! Extrapolation + locx = 0; locy = 0; locz = +1 + + exlhs = ex; eylhs = ey; ezlhs = 0 + exlhs = f_clamp(exlhs, 0, m); eylhs = f_clamp(eylhs, 0, n); ezlhs = f_clamp(ezlhs, 0, p) + +#ifndef MFC_PRE_PROCESS + pack_idr = (i - 1) + ${pack_v_size}$*((k + ${thickness}$) + (m + 2*${thickness}$+1)*((l + ${thickness}$) + (n + 2*${thickness}$+1)*(j - 1))) +#endif + + $:impl + #:endblock + + else + + stop "Invalid boundary condition direction or location." + + end if + + end if + #:enddef ITERATE_OVER_BUFFER_REGION_SIDED + + #:def BOUNDARY_CONDITION_INTEGER_DECLARATIONS() + integer :: i, j, k, l, q + + integer :: x, y, z + integer :: sx, sy, sz + integer :: px, py, pz + integer :: ex, ey, ez + integer :: locx, locy, locz + integer :: exlhs, eylhs, ezlhs + integer :: pack_idr + #:enddef diff --git a/src/common/m_boundary_conditions_common.fpp b/src/common/m_boundary_conditions_common.fpp new file mode 100644 index 000000000..bd17a661b --- /dev/null +++ b/src/common/m_boundary_conditions_common.fpp @@ -0,0 +1,484 @@ +#:include 'macros.fpp' + +module m_boundary_conditions_common + + use m_global_parameters + + use m_mpi_proxy + + use m_helper + + use m_compile_specific + + implicit none + + private; public :: s_initialize_boundary_conditions_module, & + s_generate_boundary_condition_patch_buffers, & + bc_id_sfs, bc_id_has_bc, & + s_read_boundary_condition_files, & + s_write_boundary_condition_files + + ! Boundary condition structure. bc_id_sfs(, )%sf(, , ) + ! holds the necessary boundary condition information for a point along the + ! surface in the direction on the side , to apply said condition. + type(t_bc_id_sf), dimension(1:3, -1:1) :: bc_id_sfs + +#ifdef MFC_MPI + integer :: mpi_bc_id_type + integer, dimension(1:3, -1:1) :: mpi_bc_sf_type +#endif + + ! Optimization structure. Holds whether a boundary condition can be found + ! on a surface. bc_id_has_bc(, , 0) is true if MPI communication + ! is required for at least one point on the surface. In general, + ! bc_id_has_bc(, , ) is true if a boundary condition with type + ! can be found on the surface on the side of the direction . + logical, dimension(1:3, -1:1, -num_bcs_max:0) :: bc_id_has_bc + +!$acc declare create(bc_id_sfs) + +#ifndef MFC_PRE_PROCESS + public :: s_populate_prim_buffers, s_populate_cons_buffers +#endif + +contains + + subroutine s_initialize_boundary_conditions_module() + + integer :: iter_loc + + do iter_loc = -1, 2, 2 + @:ALLOCATE(bc_id_sfs(1, iter_loc)%sf(0:0, 0:n, 0:p)) + if (num_dims > 1) then + @:ALLOCATE(bc_id_sfs(2, iter_loc)%sf(0:m, 0:0, 0:p)) + + if (num_dims > 2) then + @:ALLOCATE(bc_id_sfs(3, iter_loc)%sf(0:m, 0:n, 0:0)) + end if + end if + end do + +#ifdef MFC_MPI + call s_create_mpi_types() +#endif + + end subroutine s_initialize_boundary_conditions_module + +#ifdef MFC_MPI + subroutine s_create_mpi_types() + + use mpi + + integer :: blocklengths(2) = (/1, 3/) + integer(KIND=MPI_ADDRESS_KIND) :: displacements(2) = (/0, 8/) ! Offset between fields in memory + integer :: types(2) = (/MPI_INTEGER, MPI_DOUBLE_PRECISION/) + integer, dimension(1:3) :: sf_extents_loc, sf_start_idx + integer :: ierr + + integer :: iter_dir, iter_loc + + call MPI_Type_create_struct(2, blocklengths, displacements, types, mpi_bc_id_type, ierr) + call MPI_Type_commit(mpi_bc_id_type, ierr) + + do iter_dir = 1, num_dims + do iter_loc = -1, 1, 2 + + sf_start_idx = (/0, 0, 0/) + sf_extents_loc = shape(bc_id_sfs(iter_dir, iter_loc)%sf) + + call MPI_TYPE_CREATE_SUBARRAY(num_dims, sf_extents_loc, sf_extents_loc, sf_start_idx, & + MPI_ORDER_FORTRAN, mpi_bc_id_type, mpi_bc_sf_type(iter_dir, iter_loc), ierr) + call MPI_TYPE_COMMIT(mpi_bc_sf_type(iter_dir, iter_loc), ierr) + + end do + end do + + end subroutine s_create_mpi_types +#endif + + subroutine s_write_boundary_condition_files(step_dirpath) + + character(LEN=*), intent(in) :: step_dirpath + + integer :: iter_dir, iter_loc + character(len=path_len) :: file_path + + character(len=10) :: status + +#ifdef MFC_MPI + integer :: ierr + integer :: file_id + integer :: offset + character(len=7) :: proc_rank_str +#endif + +#ifdef MFC_PRE_PROCESS + if (old_grid) then + status = 'old' + else + status = 'new' + end if +#else + status = 'unknown' +#endif + + if (parallel_io .eqv. .false.) then + file_path = trim(step_dirpath)//'/bc.dat' + open (1, FILE=trim(file_path), FORM='unformatted', STATUS=status) + do iter_dir = 1, num_dims + do iter_loc = -1, 1, 2 + write (1) bc_id_sfs(iter_dir, iter_loc)%sf + end do + end do + close (1) + else +#ifdef MFC_MPI + write (proc_rank_str, '(I7.7)') proc_rank + file_path = trim(step_dirpath)//'/bc_'//trim(proc_rank_str)//'.dat' + call MPI_File_open(MPI_COMM_SELF, trim(file_path), MPI_MODE_CREATE + MPI_MODE_WRONLY, MPI_INFO_NULL, file_id, ierr) + + offset = 0 + do iter_dir = 1, num_dims + do iter_loc = -1, 1, 2 + call MPI_File_set_view(file_id, int(offset, KIND=MPI_ADDRESS_KIND), mpi_bc_id_type, mpi_bc_sf_type(iter_dir, iter_loc), 'native', MPI_INFO_NULL, ierr) + call MPI_File_write_all(file_id, bc_id_sfs(iter_dir, iter_loc)%sf, 1, mpi_bc_sf_type(iter_dir, iter_loc), MPI_STATUS_IGNORE, ierr) + offset = offset + sizeof(bc_id_sfs(iter_dir, iter_loc)%sf) + end do + end do + + call MPI_File_close(file_id, ierr) +#endif + end if + + end subroutine s_write_boundary_condition_files + + subroutine s_read_boundary_condition_files(step_dirpath) + + character(LEN=*), intent(in) :: step_dirpath + + integer :: iter_dir, iter_loc + logical :: file_exist + character(len=path_len) :: file_path + +#ifdef MFC_MPI + integer :: ierr + integer :: file_id + integer :: offset + character(len=7) :: proc_rank_str +#endif + + if (parallel_io .eqv. .false.) then + file_path = trim(step_dirpath)//'/bc.dat' + else +#ifdef MFC_MPI + write (proc_rank_str, '(I7.7)') proc_rank + file_path = trim(step_dirpath)//'/bc_'//trim(proc_rank_str)//'.dat' +#endif + end if + + inquire (FILE=trim(file_path), EXIST=file_exist) + if (.not. file_exist) then + call s_mpi_abort(trim(file_path)//' is missing. Exiting ...') + end if + + if (parallel_io .eqv. .false.) then + open (1, FILE=trim(file_path), FORM='unformatted', STATUS='unknown') + do iter_dir = 1, num_dims + do iter_loc = -1, 1, 2 + read (1) bc_id_sfs(iter_dir, iter_loc)%sf + end do + end do + close (1) + else +#ifdef MFC_MPI + call MPI_File_open(MPI_COMM_SELF, trim(file_path), MPI_MODE_RDONLY, MPI_INFO_NULL, file_id, ierr) + + offset = 0 + do iter_dir = 1, num_dims + do iter_loc = -1, 1, 2 + call MPI_File_set_view(file_id, int(offset, KIND=MPI_ADDRESS_KIND), mpi_bc_id_type, mpi_bc_sf_type(iter_dir, iter_loc), 'native', MPI_INFO_NULL, ierr) + call MPI_File_read_all(file_id, bc_id_sfs(iter_dir, iter_loc)%sf, 1, mpi_bc_sf_type(iter_dir, iter_loc), MPI_STATUS_IGNORE, ierr) + offset = offset + sizeof(bc_id_sfs(iter_dir, iter_loc)%sf) + end do + end do + + call MPI_File_close(file_id, ierr) +#endif + end if + + call s_generate_boundary_condition_lookup_buffers() + + end subroutine s_read_boundary_condition_files + + subroutine s_generate_boundary_condition_patch_buffers() + + type(bc_patch_parameters) :: bc + integer :: iter_dir, iter_loc + real(kind(0d0)) :: radius2 + + type(int_bounds_info), dimension(1:3) :: user_input_bcs + + @:BOUNDARY_CONDITION_INTEGER_DECLARATIONS() + + user_input_bcs = [bc_x, bc_y, bc_z] + + do iter_dir = 1, num_dims + #:block ITERATE_OVER_BUFFER_REGION_SIDED(dir="iter_dir", loc="-1", thickness=1, gpu=False) + bc_id_sfs(iter_dir, -1)%sf(exlhs, eylhs, ezlhs)%type = user_input_bcs(iter_dir)%beg + bc_id_sfs(iter_dir, -1)%sf(exlhs, eylhs, ezlhs)%vel = user_input_bcs(iter_dir)%vel_beg + #:endblock + #:block ITERATE_OVER_BUFFER_REGION_SIDED(dir="iter_dir", loc="+1", thickness=1, gpu=False) + bc_id_sfs(iter_dir, +1)%sf(exlhs, eylhs, ezlhs)%type = user_input_bcs(iter_dir)%end + bc_id_sfs(iter_dir, +1)%sf(exlhs, eylhs, ezlhs)%vel = user_input_bcs(iter_dir)%vel_end + #:endblock + end do + + do iter_dir = 1, num_dims + do iter_loc = -1, 1, 2 + #:block ITERATE_OVER_BUFFER_REGION_SIDED(dir="iter_dir", loc="iter_loc", thickness=1, gpu=False) + do i = 1, num_bc_patches + bc = patch_bc(i) + if (bc%geometry == 1) then ! Cuboid + #:for dir, name in [(1, "x"), (2, "y"), (3, "z")] + if (${dir}$ /= bc%dir .and. ${dir}$ <= num_dims) then + if (${name}$_cc(e${name}$lhs) >= bc%centroid(${dir}$) + 0.5d0*bc%length(${dir}$)) then + cycle + end if + if (${name}$_cc(e${name}$lhs) <= bc%centroid(${dir}$) - 0.5d0*bc%length(${dir}$)) then + cycle + end if + end if + #:endfor + elseif (bc%geometry == 2) then ! Spheroid + radius2 = 0d0 + #:for dir, name in [(1, "x"), (2, "y"), (3, "z")] + if (${dir}$ /= bc%dir .and. ${dir}$ <= num_dims) then + radius2 = radius2 + (${name}$_cc(e${name}$lhs) - bc%centroid(${dir}$))**2 + end if + #:endfor + if (radius2 > bc%radius**2) then + cycle + end if + end if + + bc_id_sfs(bc%dir, bc%loc)%sf(exlhs, eylhs, ezlhs)%type = bc%type + bc_id_sfs(bc%dir, bc%loc)%sf(exlhs, eylhs, ezlhs)%vel = bc%vel + end do + #:endblock + end do + end do + + do iter_dir = 1, num_dims + do iter_loc = -1, 1, 2 + #:block ITERATE_OVER_BUFFER_REGION_SIDED(dir="iter_dir", loc="iter_loc", thickness=1, gpu=False) + if (proc_nums(iter_dir) > 1 .and. ( & + bc_id_sfs(iter_dir, iter_loc)%sf(exlhs, eylhs, ezlhs)%type == -1 & + .or. (iter_loc == -1 .and. proc_coords(iter_dir) > 0) & + .or. (iter_loc == +1 .and. proc_coords(iter_dir) < proc_nums(iter_dir) - 1) & + )) then + bc_id_sfs(iter_dir, iter_loc)%sf(exlhs, eylhs, ezlhs)%type = neighbor_procs(iter_dir, iter_loc) + end if + #:endblock + + !$acc update device(bc_id_sfs(iter_dir, iter_loc)%sf) + end do + end do + + call s_generate_boundary_condition_lookup_buffers() + + end subroutine s_generate_boundary_condition_patch_buffers + + subroutine s_generate_boundary_condition_lookup_buffers + + integer :: iter_dir, iter_loc + + @:BOUNDARY_CONDITION_INTEGER_DECLARATIONS() + + do iter_dir = 1, num_dims + do iter_loc = -1, 1, 2 + bc_id_has_bc(iter_dir, iter_loc, :) = .false. + #:block ITERATE_OVER_BUFFER_REGION_SIDED(dir="iter_dir", loc="iter_loc", thickness=1, gpu=False) + bc_id_has_bc(iter_dir, iter_loc, min(0, bc_id_sfs(iter_dir, iter_loc)%sf(exlhs, eylhs, ezlhs)%type)) = .true. + #:endblock + end do + end do + + end subroutine s_generate_boundary_condition_lookup_buffers + +#ifndef MFC_PRE_PROCESS + !> The purpose of this procedure is to populate the buffers + !! of the primitive variables, depending on the selected + !! boundary conditions. + subroutine s_populate_prim_buffers(q_prim_vf & +#ifdef MFC_SIMULATION + , pb, mv & +#endif + ) + + type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf +#ifdef MFC_SIMULATION + real(kind(0d0)), dimension(startx:, starty:, startz:, 1:, 1:), intent(inout) :: pb, mv +#endif + + integer :: iter_dir, iter_loc + + @:BOUNDARY_CONDITION_INTEGER_DECLARATIONS() + + do iter_dir = 1, num_dims + do iter_loc = -1, 1, 2 + if (any(bc_id_has_bc(iter_dir, iter_loc, :-1))) then + #:block ITERATE_OVER_BUFFER_REGION_SIDED(dir="iter_dir", loc="iter_loc") + select case (bc_id_sfs(iter_dir, iter_loc)%sf(exlhs, eylhs, ezlhs)%type) + case (-13:-3); ! Ghost-cell extrap. + !$acc loop seq + do i = 1, sys_size + q_prim_vf(i)%sf(x, y, z) = q_prim_vf(i)%sf(ex, ey, ez) + end do + case (-2); ! Symmetry + !$acc loop seq + do i = 1, momxb + iter_dir - 2 + q_prim_vf(i)%sf(x, y, z) = q_prim_vf(i)%sf(sx, sy, sz) + end do + + q_prim_vf(momxb + iter_dir - 1)%sf(x, y, z) = & + -q_prim_vf(momxb + iter_dir - 1)%sf(sx, sy, sz) + + !$acc loop seq + do i = momxb + iter_dir, sys_size + q_prim_vf(i)%sf(x, y, z) = q_prim_vf(i)%sf(sx, sy, sz) + end do + case (-1); ! Periodic + !$acc loop seq + do i = 1, sys_size + q_prim_vf(i)%sf(x, y, z) = q_prim_vf(i)%sf(px, py, pz) + end do + case (-15); ! Splip Wall + !$acc loop seq + do i = 1, sys_size + if (i == momxb + iter_dir - 1) then + q_prim_vf(i)%sf(x, y, z) = & + -q_prim_vf(i)%sf(sx, sy, sz) + 2d0*bc_id_sfs(iter_dir, iter_loc)%sf(exlhs, eylhs, ezlhs)%vel(iter_dir) + else + q_prim_vf(i)%sf(x, y, z) = q_prim_vf(i)%sf(ex, ey, ez) + end if + end do + case (-16); ! No Slip Wall + !$acc loop seq + do i = 1, sys_size + if (i >= momxb .and. i <= momxe) then + q_prim_vf(i)%sf(x, y, z) = & + -q_prim_vf(i)%sf(sx, sy, sz) + 2d0*bc_id_sfs(iter_dir, iter_loc)%sf(exlhs, eylhs, ezlhs)%vel(i - momxb + 1) + else + q_prim_vf(i)%sf(x, y, z) = q_prim_vf(i)%sf(ex, ey, ez) + end if + end do + case (-17); ! Velocity Inlet + !$acc loop seq + do i = 1, sys_size + if (i >= momxb .and. i <= momxe) then + q_prim_vf(i)%sf(x, y, z) = & + bc_id_sfs(iter_dir, iter_loc)%sf(exlhs, eylhs, ezlhs)%vel(i - momxb + 1) + else + q_prim_vf(i)%sf(x, y, z) = q_prim_vf(i)%sf(ex, ey, ez) + end if + end do + case (-14); ! Axis + if (z_cc(k) < pi) then + !$acc loop seq + do i = 1, momxb + q_prim_vf(i)%sf(x, y, z) = & + q_prim_vf(i)%sf(sx, sy, z + ((p + 1)/2)) + end do + + q_prim_vf(momxb + 1)%sf(x, y, z) = & + -q_prim_vf(momxb + 1)%sf(sx, sy, z + ((p + 1)/2)) + + q_prim_vf(momxe)%sf(x, y, z) = & + -q_prim_vf(momxe)%sf(sx, sy, z + ((p + 1)/2)) + + !$acc loop seq + do i = E_idx, sys_size + q_prim_vf(i)%sf(x, y, z) = & + q_prim_vf(i)%sf(sx, sy, z + ((p + 1)/2)) + end do + else + !$acc loop seq + do i = 1, momxb + q_prim_vf(i)%sf(x, y, z) = & + q_prim_vf(i)%sf(sx, sy, z - ((p + 1)/2)) + end do + + q_prim_vf(momxb + 1)%sf(x, y, z) = & + -q_prim_vf(momxb + 1)%sf(sx, sy, z - ((p + 1)/2)) + + q_prim_vf(momxe)%sf(x, y, z) = & + -q_prim_vf(momxe)%sf(sx, sy, z - ((p + 1)/2)) + + !$acc loop seq + do i = E_idx, sys_size + q_prim_vf(i)%sf(x, y, z) = & + q_prim_vf(i)%sf(sx, sy, z - ((p + 1)/2)) + end do + end if + + end select + #:endblock + +#ifdef MFC_SIMULATION + if (qbmm .and. .not. polytropic) then + #:block ITERATE_OVER_BUFFER_REGION_SIDED(dir="iter_dir", loc="iter_loc", outer_loops=[("i", 1, "nb"), ("q", 1, "nnode")]) + select case (bc_id_sfs(iter_dir, iter_loc)%sf(exlhs, eylhs, ezlhs)%type) + case (-13:-3); ! Ghost-cell extrap. + pb(x, y, z, q, i) = pb(ex, ey, ez, q, i) + mv(x, y, z, q, i) = mv(ex, ey, ez, q, i) + case (-2); ! Symmetry + pb(x, y, z, q, i) = pb(sx, sy, sz, q, i) + mv(x, y, z, q, i) = mv(sx, sy, sz, q, i) + case (-1); ! Periodic + pb(x, y, z, q, i) = pb(px, py, pz, q, i) + mv(x, y, z, q, i) = mv(px, py, pz, q, i) + case (-14); ! Axis + pb(x, y, z, q, i) = pb(sx, sy, z - ((p + 1)/2), q, i) + mv(x, y, z, q, i) = mv(sx, sy, z - ((p + 1)/2), q, i) + end select + #:endblock + end if +#endif + end if + + call s_mpi_sendrecv_variables_buffers(q_prim_vf, bc_id_sfs, & +#ifdef MFC_SIMULATION + pb, mv, & +#endif + iter_dir, iter_loc, & + bc_id_has_bc) + + end do + end do + + end subroutine s_populate_prim_buffers + + !> The purpose of this procedure is to populate the buffers + !! of the conservative variables, depending on the selected + !! boundary conditions. + subroutine s_populate_cons_buffers(q_cons_vf & +#ifdef MFC_SIMULATION + , pb, mv & +#endif + ) + + type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf +#ifdef MFC_SIMULATION + real(kind(0d0)), dimension(startx:, starty:, startz:, 1:, 1:), intent(inout) :: pb, mv +#endif + + call s_populate_prim_buffers(q_cons_vf & +#ifdef MFC_SIMULATION + , pb, mv & +#endif + ) + + end subroutine s_populate_cons_buffers +#endif + +end module m_boundary_conditions_common diff --git a/src/common/m_checker_common.fpp b/src/common/m_checker_common.fpp index 3623fabba..0d232ffc4 100644 --- a/src/common/m_checker_common.fpp +++ b/src/common/m_checker_common.fpp @@ -222,7 +222,7 @@ contains #:endif if (.not. skip_check) then - @:PROHIBIT(bc_${X}$%${BOUND}$ /= dflt_int .and. (bc_${X}$%${BOUND}$ > -1 .or. bc_${X}$%${BOUND}$ < -16), & + @:PROHIBIT(bc_${X}$%${BOUND}$ /= dflt_int .and. (bc_${X}$%${BOUND}$ > -1 .or. bc_${X}$%${BOUND}$ < -17), & "bc_${X}$%${BOUND}$ must be between -1 and -16") @:PROHIBIT(bc_${X}$%${BOUND}$ /= dflt_int .and. bc_${X}$%${BOUND}$ == -14, & @@ -320,35 +320,23 @@ contains !> Checks constraints on the inputs for moving boundaries. !! Called by s_check_inputs_common for all three stages subroutine s_check_inputs_moving_bc - #:for X, VB2, VB3 in [('x', 'vb2', 'vb3'), ('y', 'vb3', 'vb1'), ('z', 'vb1', 'vb2')] - if (any((/bc_${X}$%vb1, bc_${X}$%vb2, bc_${X}$%vb3/) /= 0d0)) then - if (bc_${X}$%beg == -15) then - if (any((/bc_${X}$%${VB2}$, bc_${X}$%${VB3}$/) /= 0d0)) then - call s_mpi_abort("bc_${X}$%beg must be -15 if "// & - "bc_${X}$%${VB2}$ or bc_${X}$%${VB3}$ "// & - "is set. Exiting ...") - end if - elseif (bc_${X}$%beg /= -16) then - call s_mpi_abort("bc_${X}$%beg must be -15 or -16 if "// & - "bc_${X}$%vb[1,2,3] is set. Exiting ...") - end if - end if - #:endfor - #:for X, VE2, VE3 in [('x', 've2', 've3'), ('y', 've3', 've1'), ('z', 've1', 've2')] - if (any((/bc_${X}$%ve1, bc_${X}$%ve2, bc_${X}$%ve3/) /= 0d0)) then - if (bc_${X}$%end == -15) then - if (any((/bc_${X}$%${VE2}$, bc_${X}$%${VE3}$/) /= 0d0)) then - call s_mpi_abort("bc_${X}$%end must be -15 if "// & - "bc_${X}$%${VE2}$ or bc_${X}$%${VE3}$ "// & - "is set. Exiting ...") + #:for DIR in ['x', 'y', 'z'] + #:for LOC in ['beg', 'end'] + if (any(bc_${DIR}$%vel_${LOC}$ /= 0d0)) then + if (bc_${DIR}$%${LOC}$ == -15) then + if (any((/bc_${DIR}$%vel_${LOC}$ (2), bc_${DIR}$%vel_${LOC}$ (3)/) /= 0d0)) then + call s_mpi_abort("bc_${DIR}$%${LOC}$ must be -15 if "// & + "bc_${DIR}$%vel_${LOC}$[2,3] is set. Exiting ...") + end if + elseif (bc_${DIR}$%${LOC}$ /= -16) then + call s_mpi_abort("bc_${DIR}$%${LOC}$ must be -15 or -16 if "// & + "bc_${DIR}$%vel_${LOC}$ is set. Exiting ...") end if - elseif (bc_${X}$%end /= -16) then - call s_mpi_abort("bc_${X}$%end must be -15 or -16 if "// & - "bc_${X}$%ve[1,2,3] is set. Exiting ...") end if - end if + #:endfor #:endfor + end subroutine s_check_inputs_moving_bc end module m_checker_common diff --git a/src/common/m_constants.fpp b/src/common/m_constants.fpp index 8bc120bef..a1435b4bf 100644 --- a/src/common/m_constants.fpp +++ b/src/common/m_constants.fpp @@ -20,8 +20,12 @@ module m_constants integer, parameter :: num_fluids_max = 10 !< Maximum number of fluids in the simulation integer, parameter :: num_probes_max = 10 !< Maximum number of flow probes in the simulation integer, parameter :: num_patches_max = 10 + integer, parameter :: num_bc_patches_max = 20 integer, parameter :: pathlen_max = 400 integer, parameter :: nnode = 4 !< Number of QBMM nodes + integer, parameter :: gp_layers = 3 !< Number of ghost point layers + integer, parameter :: num_bcs_max = 100 !< Maximum number of boundary condition types. + real(kind(0d0)), parameter :: capillary_cutoff = 1e-6 !< color function gradient magnitude at which to apply the surface tension fluxes real(kind(0d0)), parameter :: acoustic_spatial_support_width = 2.5d0 !< Spatial support width of acoustic source, used in s_source_spatial real(kind(0d0)), parameter :: dflt_vcfl_dt = 100d0 !< value of vcfl_dt when viscosity is off for computing adaptive timestep size diff --git a/src/common/m_derived_types.fpp b/src/common/m_derived_types.fpp index b94ec89b1..b43a72824 100644 --- a/src/common/m_derived_types.fpp +++ b/src/common/m_derived_types.fpp @@ -73,12 +73,8 @@ module m_derived_types type int_bounds_info integer :: beg integer :: end - real(kind(0d0)) :: vb1 - real(kind(0d0)) :: vb2 - real(kind(0d0)) :: vb3 - real(kind(0d0)) :: ve1 - real(kind(0d0)) :: ve2 - real(kind(0d0)) :: ve3 + real(kind(0d0)) :: vel_beg(1:3) + real(kind(0d0)) :: vel_end(1:3) real(kind(0d0)) :: pres_in, pres_out real(kind(0d0)), dimension(3) :: vel_in, vel_out real(kind(0d0)), dimension(num_fluids_max) :: alpha_rho_in, alpha_in @@ -320,7 +316,7 @@ module m_derived_types !> Ghost Point for Immersed Boundaries type ghost_point - real(kind(0d0)), dimension(3) :: loc !< Physical location of the ghost point + integer, dimension(3) :: loc !< Physical location of the ghost point real(kind(0d0)), dimension(3) :: ip_loc !< Physical location of the image point integer, dimension(3) :: ip_grid !< Top left grid point of IP real(kind(0d0)), dimension(2, 2, 2) :: interp_coeffs !< Interpolation Coefficients of image point @@ -348,4 +344,27 @@ module m_derived_types integer :: gamma_method end type chemistry_parameters + type bc_patch_parameters + ! User inputs + integer :: type + integer :: dir ! [x,y,z] => [1,2,3] + integer :: loc ! [beg,end] => [-1,+1] + integer :: geometry + + real(kind(0d0)) :: centroid(1:3) + real(kind(0d0)) :: length(1:3) + real(kind(0d0)) :: radius + + real(kind(0d0)) :: vel(1:3) + end type bc_patch_parameters + + type t_bc_id + integer :: type + real(kind(0d0)) :: vel(1:3) + end type t_bc_id + + type t_bc_id_sf + type(t_bc_id), dimension(:, :, :), allocatable :: sf + end type t_bc_id_sf + end module m_derived_types diff --git a/src/common/m_global_parameters_common.fpp b/src/common/m_global_parameters_common.fpp new file mode 100644 index 000000000..9164935d3 --- /dev/null +++ b/src/common/m_global_parameters_common.fpp @@ -0,0 +1,27 @@ +module m_global_parameters_common + + use m_constants + use m_derived_types + + implicit none + +contains + + subroutine s_bc_assign_default_values_to_user_inputs(num_bc_patches, patch_bc) + + integer, intent(inout) :: num_bc_patches + type(bc_patch_parameters), intent(inout) :: patch_bc(num_bc_patches_max) + + integer :: i + + num_bc_patches = 0 + do i = 1, num_bc_patches_max + patch_bc(i)%type = dflt_int + patch_bc(i)%geometry = dflt_int + patch_bc(i)%dir = dflt_int + patch_bc(i)%loc = dflt_int + end do + + end subroutine s_bc_assign_default_values_to_user_inputs + +end module m_global_parameters_common diff --git a/src/common/m_helper.fpp b/src/common/m_helper.fpp index dce271120..83023ec4c 100644 --- a/src/common/m_helper.fpp +++ b/src/common/m_helper.fpp @@ -11,8 +11,6 @@ module m_helper use m_global_parameters !< Definitions of the global parameters - use m_mpi_common !< MPI modules - use ieee_arithmetic !< For checking NaN ! ========================================================================== @@ -36,7 +34,7 @@ module m_helper s_print_2D_array, & f_xor, & f_logical_to_int, & - s_prohibit_abort + f_clamp contains @@ -69,9 +67,6 @@ contains nR3 = dot_product(weights, nRtmp**3.d0) ntmp = DSQRT((4.d0*pi/3.d0)*nR3/vftmp) - !ntmp = (3.d0/(4.d0*pi))*0.00001 - - !print *, "nbub", ntmp end subroutine s_comp_n_from_cons @@ -466,20 +461,15 @@ contains end if end function f_logical_to_int - subroutine s_prohibit_abort(condition, message) - character(len=*), intent(in) :: condition, message + function f_clamp(x, xmin, xmax) result(y) - print *, "" - print *, "====================================================================================================" - print *, " CASE FILE ERROR " - print *, "----------------------------------------------------------------------------------------------------" - print *, "Prohibited condition: ", trim(condition) - if (len_trim(message) > 0) then - print *, "Note: ", trim(message) - end if - print *, "====================================================================================================" - print *, "" - call s_mpi_abort - end subroutine s_prohibit_abort + !$acc routine seq + + integer, intent(in) :: x, xmin, xmax + integer :: y + + y = max(xmin, min(x, xmax)) + + end function f_clamp end module m_helper diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index 403ac4f13..fc523922a 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -13,9 +13,14 @@ module m_mpi_common use mpi !< Message passing interface (MPI) module #endif + use m_nvtx + + use m_helper + use m_derived_types !< Definitions of the derived types use m_global_parameters !< Definitions of the global parameters + ! ========================================================================== implicit none @@ -25,6 +30,27 @@ module m_mpi_common integer, private :: ierr !> @} +#ifndef MFC_PRE_PROCESS + real(kind(0d0)), private, allocatable, dimension(:), target :: q_cons_buff_send !< + !! This variable is utilized to pack and send the buffer of the cell-average + !! conservative variables, for a single computational domain boundary at the + !! time, to the relevant neighboring processor. + + real(kind(0d0)), private, allocatable, dimension(:), target :: q_cons_buff_recv !< + !! q_cons_buff_recv is utilized to receive and unpack the buffer of the cell- + !! average conservative variables, for a single computational domain boundary + !! at the time, from the relevant neighboring processor. + + !$acc declare create(q_cons_buff_send, q_cons_buff_recv) + + integer :: v_size + + !$acc declare create(v_size) +#endif + + integer, dimension(3, -1:1) :: neighbor_procs + !$acc declare create(neighbor_procs) + contains !> The subroutine initializes the MPI execution environment @@ -60,6 +86,72 @@ contains end subroutine s_mpi_initialize + subroutine s_initialize_mpi_common_module() + +#if defined(MFC_MPI) && !defined(MFC_PRE_PROCESS) + + ! Allocating q_cons_buff_send/recv and ib_buff_send/recv. Please note that + ! for the sake of simplicity, both variables are provided sufficient + ! storage to hold the largest buffer in the computational domain. + +#ifdef MFC_SIMULATION + if (qbmm .and. .not. polytropic) then + if (n > 0) then + if (p > 0) then + @:ALLOCATE(q_cons_buff_send(0:-1 + buff_size*(sys_size + 2*nb*4)* & + & (m + 2*buff_size + 1)* & + & (n + 2*buff_size + 1)* & + & (p + 2*buff_size + 1)/ & + & (min(m, n, p) + 2*buff_size + 1))) + else + @:ALLOCATE(q_cons_buff_send(0:-1 + buff_size*(sys_size + 2*nb*4)* & + & (max(m, n) + 2*buff_size + 1))) + end if + else + @:ALLOCATE(q_cons_buff_send(0:-1 + buff_size*(sys_size + 2*nb*4))) + end if + + @:ALLOCATE(q_cons_buff_recv(0:ubound(q_cons_buff_send, 1))) + + v_size = sys_size + 2*nb*4 + else +#endif + if (n > 0) then + if (p > 0) then + @:ALLOCATE(q_cons_buff_send(0:-1 + buff_size*sys_size* & + & (m + 2*buff_size + 1)* & + & (n + 2*buff_size + 1)* & + & (p + 2*buff_size + 1)/ & + & (min(m, n, p) + 2*buff_size + 1))) + else + @:ALLOCATE(q_cons_buff_send(0:-1 + buff_size*sys_size* & + & (max(m, n) + 2*buff_size + 1))) + end if + else + @:ALLOCATE(q_cons_buff_send(0:-1 + buff_size*sys_size)) + end if + + @:ALLOCATE(q_cons_buff_recv(0:ubound(q_cons_buff_send, 1))) + + v_size = sys_size +#if MFC_SIMULATION + end if +#endif + +!$acc update device(v_size) + +#endif + + end subroutine s_initialize_mpi_common_module + + subroutine s_finalize_mpi_common_module() + +#if defined(MFC_MPI) && !defined(MFC_PRE_PROCESS) + @:DEALLOCATE(q_cons_buff_send, q_cons_buff_recv) +#endif + + end subroutine s_finalize_mpi_common_module + !! @param q_cons_vf Conservative variables !! @param ib_markers track if a cell is within the immersed boundary !! @param levelset closest distance from every cell to the IB @@ -475,4 +567,699 @@ contains end subroutine s_mpi_finalize +#ifndef MFC_PRE_PROCESS + !> The goal of this procedure is to populate the buffers of + !! the cell-average conservative variables by communicating + !! with the neighboring processors. + !! @param q_cons_vf Cell-average conservative variables + !! @param mpi_dir MPI communication coordinate direction + !! @param pbc_loc Processor boundary condition (PBC) location + subroutine s_mpi_sendrecv_variables_buffers(q_cons_vf, & + bc_id_sfs, & +#ifdef MFC_SIMULATION + pb, mv, & +#endif + mpi_dir, & + pbc_loc, & + bc_id_has_bc) + + type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf + type(t_bc_id_sf), dimension(1:3, -1:1), intent(in) :: bc_id_sfs + +#ifdef MFC_SIMULATION + real(kind(0d0)), dimension(startx:, starty:, startz:, 1:, 1:), intent(inout) :: pb, mv +#endif + integer, intent(in) :: mpi_dir, pbc_loc + + logical, dimension(1:3, -1:1, -num_bcs_max:0), intent(in) :: bc_id_has_bc + + @:BOUNDARY_CONDITION_INTEGER_DECLARATIONS() + + type(bc_patch_parameters) :: bc + + integer :: buffer_counts(1:3) + + real(kind(0d0)), pointer :: p_send, p_recv + + integer :: r + +#ifdef MFC_MPI + +!$acc update device(v_size) + +#ifdef MFC_SIMULATION + if (qbmm .and. .not. polytropic) then + buffer_counts = (/ & + buff_size*(sys_size + 2*nb*4)*(n + 1)*(p + 1), & + buff_size*(sys_size + 2*nb*4)*(m + 2*buff_size + 1)*(p + 1), & + buff_size*v_size*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) & + /) + else +#endif + buffer_counts = (/ & + buff_size*sys_size*(n + 1)*(p + 1), & + buff_size*sys_size*(m + 2*buff_size + 1)*(p + 1), & + buff_size*v_size*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) & + /) +#ifdef MFC_SIMULATION + end if +#endif + + if (bc_id_has_bc(mpi_dir, -pbc_loc, 0)) then + call nvtxStartRange("RHS-COMM-PACKBUF") + + #:block ITERATE_OVER_BUFFER_REGION_SIDED(dir="mpi_dir", loc="-pbc_loc", inner_loops=[("i", 1, "sys_size")]) + q_cons_buff_send(pack_idr) = q_cons_vf(i)%sf(sx, sy, sz) + #:endblock + +#ifdef MFC_SIMULATION + if (qbmm .and. .not. polytropic) then + #:block ITERATE_OVER_BUFFER_REGION_SIDED(dir="mpi_dir", loc="-pbc_loc", inner_loops=[("i", "sys_size + 1", "sys_size + 4"), ("q", 1, "nb")]) + q_cons_buff_send(pack_idr + (q - 1)*4) = pb(sx, sy, sz, i - sys_size, q) + q_cons_buff_send(pack_idr + (q - 1)*4 + nb*4) = mv(sx, sy, sz, i - sys_size, q) + #:endblock + end if +#endif + + call nvtxEndRange ! Packbuf + end if + + p_send => q_cons_buff_send(0) + p_recv => q_cons_buff_recv(0) + + #:for rdma_mpi in [False, True] + if (rdma_mpi .eqv. ${'.true.' if rdma_mpi else '.false.'}$) then + #:if rdma_mpi + !$acc data attach(p_send, p_recv) + !$acc host_data use_device(p_send, p_recv) + + call nvtxStartRange("RHS-COMM-SENDRECV-RDMA") + #:else + call nvtxStartRange("RHS-COMM-DEV2HOST") + !$acc update host(q_cons_buff_send) + call nvtxEndRange + + call nvtxStartRange("RHS-COMM-SENDRECV-NO-RMDA") + #:endif + + call MPI_SENDRECV( & + p_send, buffer_counts(mpi_dir), MPI_DOUBLE_PRECISION, neighbor_procs(mpi_dir, -pbc_loc), 0, & + p_recv, buffer_counts(mpi_dir), MPI_DOUBLE_PRECISION, neighbor_procs(mpi_dir, +pbc_loc), 0, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + call nvtxEndRange ! RHS-MPI-SENDRECV-(NO)-RDMA + + #:if rdma_mpi + !$acc end host_data + !$acc end data + !$acc wait + #:else + call nvtxStartRange("RHS-COMM-HOST2DEV") + !$acc update device(q_cons_buff_recv) + call nvtxEndRange + #:endif + end if + #:endfor + + if (bc_id_has_bc(mpi_dir, pbc_loc, 0)) then + call nvtxStartRange("RHS-COMM-UNPACKBUF") + + #:block ITERATE_OVER_BUFFER_REGION_SIDED(dir="mpi_dir", loc="pbc_loc", inner_loops=[("i", 1, "sys_size")]) + if (bc_id_sfs(mpi_dir, pbc_loc)%sf(exlhs, eylhs, ezlhs)%type >= 0) then + q_cons_vf(i)%sf(x, y, z) = q_cons_buff_recv(pack_idr) + end if + #:endblock + +#ifdef MFC_SIMULATION + if (qbmm .and. .not. polytropic) then + #:block ITERATE_OVER_BUFFER_REGION_SIDED(dir="mpi_dir", loc="pbc_loc", inner_loops=[("i", "sys_size + 1", "sys_size + 4"), ("q", 1, "nb")]) + if (bc_id_sfs(mpi_dir, pbc_loc)%sf(exlhs, eylhs, ezlhs)%type >= 0) then + pb(x, y, z, i - sys_size, q) = q_cons_buff_recv(pack_idr + (q - 1)*4) + mv(x, y, z, i - sys_size, q) = q_cons_buff_recv(pack_idr + (q - 1)*4 + nb*4) + end if + #:endblock + end if +#endif + + call nvtxEndRange + end if +#endif + + end subroutine s_mpi_sendrecv_variables_buffers + + !> The goal of this procedure is to populate the buffers of + !! the grid variables by communicating with the neighboring + !! processors. Note that only the buffers of the cell-width + !! distributions are handled in such a way. This is because + !! the buffers of cell-boundary locations may be calculated + !! directly from those of the cell-width distributions. + subroutine s_mpi_sendrecv_grid_spacing_buffers(bc_id_sfs) + + type(t_bc_id_sf), dimension(1:3, -1:1), intent(in) :: bc_id_sfs + + @:BOUNDARY_CONDITION_INTEGER_DECLARATIONS() + + type(t_bc_id) :: bc + + integer :: iter_loc + + integer :: send_offset, recv_offset + integer :: extra_cell_count + + #:for cmp, dir_id, extent in zip(['x', 'y', 'z'], [1, 2, 3], ['m', 'n', 'p']) + + if (${dir_id}$ > num_dims) then + return + end if + + do iter_loc = -1, 1, 2 + +#ifdef MFC_MPI + if (iter_loc == -1) then + send_offset = ${extent}$-buff_size + 1 + recv_offset = -buff_size + else + send_offset = 0 + recv_offset = ${extent}$+1 + end if + + call MPI_SENDRECV( & + d${cmp}$ (send_offset), buff_size, & + MPI_DOUBLE_PRECISION, neighbor_procs(${dir_id}$, -iter_loc), 0, & + d${cmp}$ (recv_offset), buff_size, & + MPI_DOUBLE_PRECISION, neighbor_procs(${dir_id}$, +iter_loc), 0, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) +#endif + + ! Note: This does a little TOO much work (iterating over too many cells) + ! but this is because dx, dy, dz SHOULD be two dimensional arrays, + ! not one-dimensional arrays. Indeed, the boundary conditions + ! can vary along two axes, not just one if the case is 3D. + #:block ITERATE_OVER_BUFFER_REGION_SIDED(dir=dir_id, loc="iter_loc", gpu=False) + bc = bc_id_sfs(${dir_id}$, iter_loc)%sf(exlhs, eylhs, ezlhs) + + if (bc%type <= -3) then + d${cmp}$ (${cmp}$) = d${cmp}$ (e${cmp}$) + elseif (bc%type == -2) then + d${cmp}$ (${cmp}$) = d${cmp}$ (s${cmp}$) + elseif (bc%type == -1) then + d${cmp}$ (${cmp}$) = d${cmp}$ (p${cmp}$) + end if + #:endblock + + end do + +#ifdef MFC_POST_PROCESS + extra_cell_count = offset_${cmp}$%beg +#else + extra_cell_count = buff_size +#endif + + do i = 1, extra_cell_count + ! Computing the cell-boundary locations buffer, at the beginning of + ! the coordinate direction, from the cell-width distribution buffer. + ${cmp}$_cb(-1 - i) = ${cmp}$_cb(-i) - d${cmp}$ (-i) + end do + +#ifdef MFC_POST_PROCESS + extra_cell_count = offset_${cmp}$%end +#else + extra_cell_count = buff_size +#endif + + do i = 1, extra_cell_count + ! Populating the cell-boundary locations buffer, at the end of the + ! coordinate direction, from buffer of the cell-width distribution. + ${cmp}$_cb(${extent}$+i) = ${cmp}$_cb(${extent}$+(i - 1)) + d${cmp}$ (${extent}$+i) + end do + + do i = 1, buff_size + ! Computing the cell-center locations buffer, at the beginning of + ! the coordinate direction, from the cell-width distribution buffer. + ${cmp}$_cc(-i) = ${cmp}$_cc(1 - i) - (d${cmp}$ (1 - i) + d${cmp}$ (-i))/2d0 + + ! Populating the cell-center locations buffer, at the end of the + ! coordinate direction, from buffer of the cell-width distribution. + ${cmp}$_cc(${extent}$+i) = ${cmp}$_cc(${extent}$+(i - 1)) + (d${cmp}$ (${extent}$+(i - 1)) + d${cmp}$ (${extent}$+i))/2d0 + end do + #:endfor + + end subroutine s_mpi_sendrecv_grid_spacing_buffers +#endif + + !> The purpose of this procedure is to optimally decompose + !! the computational domain among the available processors. + !! This is performed by attempting to award each processor, + !! in each of the coordinate directions, approximately the + !! same number of cells, and then recomputing the affected + !! global parameters. + subroutine s_mpi_decompose_computational_domain + +#ifdef MFC_MPI + + real(kind(0d0)) :: tmp_num_procs_x, tmp_num_procs_y, tmp_num_procs_z !< + !! Non-optimal number of processors in the x-, y- and z-directions + + real(kind(0d0)) :: fct_min !< + !! Processor factorization (fct) minimization parameter + + integer :: MPI_COMM_CART !< + !! Cartesian processor topology communicator + + integer, dimension(1:3) :: rem_cells !< + !! Remaining number of cells, in a particular coordinate direction, + !! after the majority is divided up among the available processors + + integer :: i, j !< Generic loop iterators + + integer :: iter_dir + +#endif + + proc_coords(:) = 0 + proc_nums(:) = 1 + +#ifdef MFC_MPI + + if (num_procs == 1 .and. parallel_io) then + do i = 1, num_dims + start_idx(i) = 0 + end do + return + end if + + ! 3D Cartesian Processor Topology ================================== + if (n > 0) then + + if (p > 0) then + + if (cyl_coord .and. p > 0) then + ! Implement pencil processor blocking if using cylindrical coordinates so + ! that all cells in azimuthal direction are stored on a single processor. + ! This is necessary for efficient application of Fourier filter near axis. + + ! Initial values of the processor factorization optimization + proc_nums(1) = 1 + proc_nums(2) = num_procs + proc_nums(3) = 1 + ierr = -1 + + ! Computing minimization variable for these initial values + tmp_num_procs_x = proc_nums(1) + tmp_num_procs_y = proc_nums(2) + tmp_num_procs_z = proc_nums(3) + fct_min = 10d0*abs((m + 1)/tmp_num_procs_x & + - (n + 1)/tmp_num_procs_y) + + ! Searching for optimal computational domain distribution + do i = 1, num_procs + + if (mod(num_procs, i) == 0 & + .and. & + (m + 1)/i >= num_stcls_min*weno_order) then + + tmp_num_procs_x = i + tmp_num_procs_y = num_procs/i + + if (fct_min >= abs((m + 1)/tmp_num_procs_x & + - (n + 1)/tmp_num_procs_y) & + .and. & + (n + 1)/tmp_num_procs_y & + >= & + num_stcls_min*weno_order) then + + proc_nums(1) = i + proc_nums(2) = num_procs/i + fct_min = abs((m + 1)/tmp_num_procs_x & + - (n + 1)/tmp_num_procs_y) + ierr = 0 + + end if + + end if + + end do + + else + + ! Initial estimate of optimal processor topology + proc_nums(1) = 1 + proc_nums(2) = 1 + proc_nums(3) = num_procs + ierr = -1 + + ! Benchmarking the quality of this initial guess + tmp_num_procs_x = proc_nums(1) + tmp_num_procs_y = proc_nums(2) + tmp_num_procs_z = proc_nums(3) + fct_min = 10d0*abs((m + 1)/tmp_num_procs_x & + - (n + 1)/tmp_num_procs_y) & + + 10d0*abs((n + 1)/tmp_num_procs_y & + - (p + 1)/tmp_num_procs_z) + + ! Optimization of the initial processor topology + do i = 1, num_procs + + if (mod(num_procs, i) == 0 & + .and. & + (m + 1)/i >= num_stcls_min*weno_order) then + + do j = 1, num_procs/i + + if (mod(num_procs/i, j) == 0 & + .and. & + (n + 1)/j >= num_stcls_min*weno_order) then + + tmp_num_procs_x = i + tmp_num_procs_y = j + tmp_num_procs_z = num_procs/(i*j) + + if (fct_min >= abs((m + 1)/tmp_num_procs_x & + - (n + 1)/tmp_num_procs_y) & + + abs((n + 1)/tmp_num_procs_y & + - (p + 1)/tmp_num_procs_z) & + .and. & + (p + 1)/tmp_num_procs_z & + >= & + num_stcls_min*weno_order) & + then + + proc_nums(1) = i + proc_nums(2) = j + proc_nums(3) = num_procs/(i*j) + fct_min = abs((m + 1)/tmp_num_procs_x & + - (n + 1)/tmp_num_procs_y) & + + abs((n + 1)/tmp_num_procs_y & + - (p + 1)/tmp_num_procs_z) + ierr = 0 + + end if + + end if + + end do + + end if + + end do + + end if + + ! Verifying that a valid decomposition of the computational + ! domain has been established. If not, the simulation exits. + if (proc_rank == 0 .and. ierr == -1) then + call s_mpi_abort('Unsupported combination of values '// & + 'of num_procs, m, n, p and '// & + 'weno_order. Exiting ...') + end if + + ! Creating new communicator using the Cartesian topology + call MPI_CART_CREATE(MPI_COMM_WORLD, 3, (/proc_nums(1), & + proc_nums(2), proc_nums(3)/), & + (/.true., .true., .true./), & + .false., MPI_COMM_CART, ierr) + + ! Finding the Cartesian coordinates of the local process + call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 3, & + proc_coords, ierr) + ! END: 3D Cartesian Processor Topology ============================= + + ! Global Parameters for z-direction ================================ + + ! Number of remaining cells + rem_cells(3) = mod(p + 1, proc_nums(3)) + +#ifdef MFC_PRE_PROCESS + ! Preliminary uniform cell-width spacing + if (old_grid .neqv. .true.) then + dz = (z_domain%end - z_domain%beg)/real(p + 1, kind(0d0)) + end if +#endif + + ! Optimal number of cells per processor + p = (p + 1)/proc_nums(3) - 1 + + ! Distributing the remaining cells + do i = 1, rem_cells(3) + if (proc_coords(3) == i - 1) then + p = p + 1; exit + end if + end do + + ! ================================================================== + + ! 2D Cartesian Processor Topology ================================== + else + + ! Initial estimate of optimal processor topology + proc_nums(1) = 1 + proc_nums(2) = num_procs + ierr = -1 + + ! Benchmarking the quality of this initial guess + tmp_num_procs_x = proc_nums(1) + tmp_num_procs_y = proc_nums(2) + fct_min = 10d0*abs((m + 1)/tmp_num_procs_x & + - (n + 1)/tmp_num_procs_y) + + ! Optimization of the initial processor topology + do i = 1, num_procs + + if (mod(num_procs, i) == 0 & + .and. & + (m + 1)/i >= num_stcls_min*weno_order) then + + tmp_num_procs_x = i + tmp_num_procs_y = num_procs/i + + if (fct_min >= abs((m + 1)/tmp_num_procs_x & + - (n + 1)/tmp_num_procs_y) & + .and. & + (n + 1)/tmp_num_procs_y & + >= & + num_stcls_min*weno_order) then + + proc_nums(1) = i + proc_nums(2) = num_procs/i + fct_min = abs((m + 1)/tmp_num_procs_x & + - (n + 1)/tmp_num_procs_y) + ierr = 0 + + end if + + end if + + end do + + ! Verifying that a valid decomposition of the computational + ! domain has been established. If not, the simulation exits. + if (proc_rank == 0 .and. ierr == -1) then + call s_mpi_abort('Unsupported combination of values '// & + 'of num_procs, m, n and '// & + 'weno_order. Exiting ...') + end if + + ! Creating new communicator using the Cartesian topology + call MPI_CART_CREATE(MPI_COMM_WORLD, 2, (/proc_nums(1), & + proc_nums(2)/), (/.true., & + .true./), .false., MPI_COMM_CART, & + ierr) + + ! Finding the Cartesian coordinates of the local process + call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 2, & + proc_coords, ierr) + + end if + ! END: 2D Cartesian Processor Topology ============================= + + ! Global Parameters for y-direction ================================ + + ! Number of remaining cells + rem_cells(2) = mod(n + 1, proc_nums(2)) + +#ifdef MFC_PRE_PROCESS + ! Preliminary uniform cell-width spacing + if (old_grid .neqv. .true.) then + dy = (y_domain%end - y_domain%beg)/real(n + 1, kind(0d0)) + end if +#endif + + ! Optimal number of cells per processor + n = (n + 1)/proc_nums(2) - 1 + + ! Distributing the remaining cells + do i = 1, rem_cells(2) + if (proc_coords(2) == i - 1) then + n = n + 1; exit + end if + end do + + ! ================================================================== + + ! 1D Cartesian Processor Topology ================================== + else + + ! Optimal processor topology + proc_nums(1) = num_procs + +#ifdef MFC_POST_PROCESS + ! Number of cells in undecomposed computational domain needed + ! for sub-domain reassembly during formatted data output + m_root = m +#endif + + ! Creating new communicator using the Cartesian topology + call MPI_CART_CREATE(MPI_COMM_WORLD, 1, (/proc_nums(1)/), & + (/.true./), .false., MPI_COMM_CART, & + ierr) + + ! Finding the Cartesian coordinates of the local process + call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 1, & + proc_coords, ierr) + + end if + ! ================================================================== + + ! Global Parameters for x-direction ================================ + + ! Number of remaining cells + rem_cells(1) = mod(m + 1, proc_nums(1)) + +#ifdef MFC_PRE_PROCESS + ! Preliminary uniform cell-width spacing + if (old_grid .neqv. .true.) then + dx = (x_domain%end - x_domain%beg)/real(m + 1, kind(0d0)) + end if +#endif + + ! Optimal number of cells per processor + m = (m + 1)/proc_nums(1) - 1 + + ! Distributing the remaining cells + do i = 1, rem_cells(1) + if (proc_coords(1) == i - 1) then + m = m + 1; exit + end if + end do + ! ================================================================== + + #:for cmp, dir, extent in zip(['z', 'y', 'x'], [3, 2, 1], ['p', 'n', 'm']) + + if (${dir}$ <= num_dims) then + +#ifdef MFC_PRE_PROCESS + ! Beginning and end sub-domain boundary locations + if (parallel_io .neqv. .true.) then + if (old_grid .neqv. .true.) then + if (proc_coords(${dir}$) < rem_cells(${dir}$)) then + ${cmp}$_domain%beg = ${cmp}$_domain%beg + d${cmp}$*real((${extent}$+1)* & + proc_coords(${dir}$)) + ${cmp}$_domain%end = ${cmp}$_domain%end - d${cmp}$*real((${extent}$+1)* & + (proc_nums(${dir}$) - proc_coords(${dir}$) - 1) & + - (proc_nums(${dir}$) - rem_cells(${dir}$))) + else + ${cmp}$_domain%beg = ${cmp}$_domain%beg + d${cmp}$*real((${extent}$+1)* & + proc_coords(${dir}$) + rem_cells(${dir}$)) + ${cmp}$_domain%end = ${cmp}$_domain%end - d${cmp}$*real((${extent}$+1)* & + (proc_nums(${dir}$) - proc_coords(${dir}$) - 1)) + end if + end if + else + if (proc_coords(${dir}$) < rem_cells(${dir}$)) then + start_idx(${dir}$) = (${extent}$+1)*proc_coords(${dir}$) + else + start_idx(${dir}$) = (${extent}$+1)*proc_coords(${dir}$) + rem_cells(${dir}$) + end if + end if +#endif + +#ifdef MFC_POST_PROCESS + ! Ghost zone at the beginning + if (proc_coords(${dir}$) > 0 .and. format == 1) then + offset_${cmp}$%beg = 2 + else + offset_${cmp}$%beg = 0 + end if + + ! Ghost zone at the end + if (proc_coords(${dir}$) < proc_nums(${dir}$) - 1 .and. format == 1) then + offset_${cmp}$%end = 2 + else + offset_${cmp}$%end = 0 + end if +#endif + +#ifndef MFC_PRE_PROCESS + if (parallel_io) then + if (proc_coords(${dir}$) < rem_cells(${dir}$)) then + start_idx(${dir}$) = (${extent}$+1)*proc_coords(${dir}$) + else + start_idx(${dir}$) = (${extent}$+1)*proc_coords(${dir}$) + rem_cells(${dir}$) + end if + end if +#endif + + end if + + #:endfor + + do iter_dir = 1, num_dims + proc_coords(iter_dir) = proc_coords(iter_dir) - 1 + call MPI_CART_RANK(MPI_COMM_CART, proc_coords, neighbor_procs(iter_dir, -1), ierr) + proc_coords(iter_dir) = proc_coords(iter_dir) + 1 + proc_coords(iter_dir) = proc_coords(iter_dir) + 1 + call MPI_CART_RANK(MPI_COMM_CART, proc_coords, neighbor_procs(iter_dir, +1), ierr) + proc_coords(iter_dir) = proc_coords(iter_dir) - 1 + end do + + if (proc_nums(1) > 1) then + if (bc_x%beg == -1 .or. proc_coords(1) > 0) then + bc_x%beg = neighbor_procs(1, -1) + end if + + if (bc_x%end == +1 .or. proc_coords(1) < proc_nums(1) - 1) then + bc_x%end = neighbor_procs(1, +1) + end if + end if + + if (num_dims >= 2 .and. proc_nums(2) > 1) then + if (bc_y%beg == -1 .or. proc_coords(2) > 0) then + bc_y%beg = neighbor_procs(2, -1) + end if + + if (bc_y%end == +1 .or. proc_coords(2) < proc_nums(2) - 1) then + bc_y%end = neighbor_procs(2, +1) + end if + end if + + if (num_dims >= 3 .and. proc_nums(3) > 1) then + if (bc_z%beg == -1 .or. proc_coords(3) > 0) then + bc_z%beg = neighbor_procs(3, -1) + end if + + if (bc_z%end == +1 .or. proc_coords(3) < proc_nums(3) - 1) then + bc_z%end = neighbor_procs(3, +1) + end if + end if + +#endif + + end subroutine s_mpi_decompose_computational_domain + + subroutine s_prohibit_abort(condition, message) + character(len=*), intent(in) :: condition, message + + print *, "" + print *, "====================================================================================================" + print *, " CASE FILE ERROR " + print *, "----------------------------------------------------------------------------------------------------" + print *, "Prohibited condition: ", trim(condition) + if (len_trim(message) > 0) then + print *, "Note: ", trim(message) + end if + print *, "====================================================================================================" + print *, "" + call s_mpi_abort + end subroutine s_prohibit_abort + end module m_mpi_common diff --git a/src/simulation/m_nvtx.f90 b/src/common/m_nvtx.f90 similarity index 100% rename from src/simulation/m_nvtx.f90 rename to src/common/m_nvtx.f90 diff --git a/src/common/m_variables_conversion.fpp b/src/common/m_variables_conversion.fpp index ffc6e02ba..f6fd7e554 100644 --- a/src/common/m_variables_conversion.fpp +++ b/src/common/m_variables_conversion.fpp @@ -23,7 +23,7 @@ module m_variables_conversion use m_helper use m_thermochem, only: & - num_species, get_temperature, get_pressure, & + num_species, get_temperature, get_pressure, gas_constant, & get_mixture_molecular_weight, get_mixture_energy_mass ! ========================================================================== @@ -1114,7 +1114,7 @@ contains end do call get_mixture_molecular_weight(Ys, mix_mol_weight) - T = q_prim_vf(T_idx)%sf(j, k, l) + T = q_prim_vf(E_idx)%sf(j, k, l)*mix_mol_weight/(gas_constant*rho) call get_mixture_energy_mass(T, Ys, e_mix) q_cons_vf(E_idx)%sf(j, k, l) = & diff --git a/src/post_process/m_data_input.f90 b/src/post_process/m_data_input.fpp similarity index 53% rename from src/post_process/m_data_input.f90 rename to src/post_process/m_data_input.fpp index 567816f73..942186587 100644 --- a/src/post_process/m_data_input.f90 +++ b/src/post_process/m_data_input.fpp @@ -2,6 +2,8 @@ !! @file m_data_input.f90 !> @brief Contains module m_data_input +#:include 'macros.fpp' + !> @brief This module features procedures, which for a specific time-step, !! read in the raw simulation data for the grid and the conservative !! variables and fill out their buffer regions. @@ -21,6 +23,8 @@ module m_data_input use m_compile_specific use m_helper + + use m_boundary_conditions_common ! ========================================================================== implicit none @@ -29,8 +33,6 @@ module m_data_input s_read_data_files, & s_read_serial_data_files, & s_read_parallel_data_files, & - s_populate_grid_variables_buffer_regions, & - s_populate_conservative_variables_buffer_regions, & s_finalize_data_input_module abstract interface ! =================================================== @@ -89,6 +91,8 @@ subroutine s_read_serial_data_files(t_step) logical :: file_check !< !! Generic logical used to test the existence of a particular file + integer :: iter_dir, iter_loc + integer :: i !< Generic loop iterator ! Setting location of time-step folder based on current time-step @@ -226,6 +230,10 @@ subroutine s_read_serial_data_files(t_step) end do + if (t_step == 0) then + call s_read_boundary_condition_files(t_step_dir) + end if + if (ib) then write (file_loc_ib, '(A,I0,A)') & trim(t_step_ib_dir)//'/ib.dat' @@ -235,7 +243,8 @@ subroutine s_read_serial_data_files(t_step) FORM='unformatted', & ACTION='read', & STATUS='old') - call s_mpi_abort(trim(file_loc)//' is missing. Exiting ...') + else + call s_mpi_abort(trim(file_loc_ib)//' is missing. Exiting ...') end if end if @@ -340,6 +349,10 @@ subroutine s_read_parallel_data_files(t_step) end if end if + if (t_step == 0) then + call s_read_boundary_condition_files(trim(trim(case_dir)//'/restart_data')) + end if + if (file_per_process) then call s_int_to_str(t_step, t_step_string) ! Open the file to read conservative variables @@ -506,635 +519,6 @@ subroutine s_read_parallel_data_files(t_step) end subroutine s_read_parallel_data_files - !> The following subroutine populates the buffer regions of - !! the cell-width spacings, the cell-boundary locations and - !! the cell-center locations. Note that the buffer regions - !! of the last two variables should be interpreted slightly - !! differently than usual. They are really ghost zones that - !! are used in aiding the multidimensional visualization of - !! Silo database files, in VisIt, when processor boundary - !! conditions are present. - subroutine s_populate_grid_variables_buffer_regions - - integer :: i !< Generic loop iterator - - ! Populating Buffer Regions in the x-direction ===================== - - ! Ghost-cell extrapolation BC at the beginning - if (bc_x%beg <= -3) then - - do i = 1, buff_size - dx(-i) = dx(0) - end do - - ! Symmetry BC at the beginning - elseif (bc_x%beg == -2) then - - do i = 1, buff_size - dx(-i) = dx(i - 1) - end do - - ! Periodic BC at the beginning - elseif (bc_x%beg == -1) then - - do i = 1, buff_size - dx(-i) = dx((m + 1) - i) - end do - - ! Processor BC at the beginning - else - - call s_mpi_sendrecv_grid_vars_buffer_regions('beg', 'x') - - end if - - do i = 1, offset_x%beg - x_cb(-1 - i) = x_cb(-i) - dx(-i) - end do - - do i = 1, buff_size - x_cc(-i) = x_cc(1 - i) - (dx(1 - i) + dx(-i))/2d0 - end do - - ! Ghost-cell extrapolation BC at the end - if (bc_x%end <= -3) then - - do i = 1, buff_size - dx(m + i) = dx(m) - end do - - ! Symmetry BC at the end - elseif (bc_x%end == -2) then - - do i = 1, buff_size - dx(m + i) = dx((m + 1) - i) - end do - - ! Periodic BC at the end - elseif (bc_x%end == -1) then - - do i = 1, buff_size - dx(m + i) = dx(i - 1) - end do - - ! Processor BC at the end - else - - call s_mpi_sendrecv_grid_vars_buffer_regions('end', 'x') - - end if - - do i = 1, offset_x%end - x_cb(m + i) = x_cb(m + (i - 1)) + dx(m + i) - end do - - do i = 1, buff_size - x_cc(m + i) = x_cc(m + (i - 1)) + (dx(m + (i - 1)) + dx(m + i))/2d0 - end do - - ! END: Populating Buffer Regions in the x-direction ================ - - ! Populating Buffer Regions in the y-direction ===================== - - if (n > 0) then - - ! Ghost-cell extrapolation BC at the beginning - if (bc_y%beg <= -3 .and. bc_y%beg /= -14) then - - do i = 1, buff_size - dy(-i) = dy(0) - end do - - ! Symmetry BC at the beginning - elseif (bc_y%beg == -2 .or. bc_y%beg == -14) then - - do i = 1, buff_size - dy(-i) = dy(i - 1) - end do - - ! Periodic BC at the beginning - elseif (bc_y%beg == -1) then - - do i = 1, buff_size - dy(-i) = dy((n + 1) - i) - end do - - ! Processor BC at the beginning - else - - call s_mpi_sendrecv_grid_vars_buffer_regions('beg', 'y') - - end if - - do i = 1, offset_y%beg - y_cb(-1 - i) = y_cb(-i) - dy(-i) - end do - - do i = 1, buff_size - y_cc(-i) = y_cc(1 - i) - (dy(1 - i) + dy(-i))/2d0 - end do - - ! Ghost-cell extrapolation BC at the end - if (bc_y%end <= -3) then - - do i = 1, buff_size - dy(n + i) = dy(n) - end do - - ! Symmetry BC at the end - elseif (bc_y%end == -2) then - - do i = 1, buff_size - dy(n + i) = dy((n + 1) - i) - end do - - ! Periodic BC at the end - elseif (bc_y%end == -1) then - - do i = 1, buff_size - dy(n + i) = dy(i - 1) - end do - - ! Processor BC at the end - else - - call s_mpi_sendrecv_grid_vars_buffer_regions('end', 'y') - - end if - - do i = 1, offset_y%end - y_cb(n + i) = y_cb(n + (i - 1)) + dy(n + i) - end do - - do i = 1, buff_size - y_cc(n + i) = y_cc(n + (i - 1)) + (dy(n + (i - 1)) + dy(n + i))/2d0 - end do - - ! END: Populating Buffer Regions in the y-direction ================ - - ! Populating Buffer Regions in the z-direction ===================== - - if (p > 0) then - - ! Ghost-cell extrapolation BC at the beginning - if (bc_z%beg <= -3) then - - do i = 1, buff_size - dz(-i) = dz(0) - end do - - ! Symmetry BC at the beginning - elseif (bc_z%beg == -2) then - - do i = 1, buff_size - dz(-i) = dz(i - 1) - end do - - ! Periodic BC at the beginning - elseif (bc_z%beg == -1) then - - do i = 1, buff_size - dz(-i) = dz((p + 1) - i) - end do - - ! Processor BC at the beginning - else - - call s_mpi_sendrecv_grid_vars_buffer_regions('beg', 'z') - - end if - - do i = 1, offset_z%beg - z_cb(-1 - i) = z_cb(-i) - dz(-i) - end do - - do i = 1, buff_size - z_cc(-i) = z_cc(1 - i) - (dz(1 - i) + dz(-i))/2d0 - end do - - ! Ghost-cell extrapolation BC at the end - if (bc_z%end <= -3) then - - do i = 1, buff_size - dz(p + i) = dz(p) - end do - - ! Symmetry BC at the end - elseif (bc_z%end == -2) then - - do i = 1, buff_size - dz(p + i) = dz((p + 1) - i) - end do - - ! Periodic BC at the end - elseif (bc_z%end == -1) then - - do i = 1, buff_size - dz(p + i) = dz(i - 1) - end do - - ! Processor BC at the end - else - - call s_mpi_sendrecv_grid_vars_buffer_regions('end', 'z') - - end if - - do i = 1, offset_z%end - z_cb(p + i) = z_cb(p + (i - 1)) + dz(p + i) - end do - - do i = 1, buff_size - z_cc(p + i) = z_cc(p + (i - 1)) + (dz(p + (i - 1)) + dz(p + i))/2d0 - end do - - end if - - end if - - ! END: Populating Buffer Regions in the z-direction ================ - - end subroutine s_populate_grid_variables_buffer_regions - - !> The purpose of this procedure is to populate the buffers - !! of the cell-average conservative variables, depending on - !! the boundary conditions. - subroutine s_populate_conservative_variables_buffer_regions - - integer :: i, j, k !< Generic loop iterators - - ! Populating Buffer Regions in the x-direction ===================== - - ! Ghost-cell extrapolation BC at the beginning - if (bc_x%beg <= -3) then - - do j = 1, buff_size - do i = 1, sys_size - q_cons_vf(i)%sf(-j, 0:n, 0:p) = q_cons_vf(i)%sf(0, 0:n, 0:p) - end do - end do - - ! Symmetry BC at the beginning - elseif (bc_x%beg == -2) then - - do j = 1, buff_size - - ! Density or partial densities - do i = 1, cont_idx%end - q_cons_vf(i)%sf(-j, 0:n, 0:p) = & - q_cons_vf(i)%sf(j - 1, 0:n, 0:p) - end do - - ! x-component of momentum - q_cons_vf(mom_idx%beg)%sf(-j, 0:n, 0:p) = & - -q_cons_vf(mom_idx%beg)%sf(j - 1, 0:n, 0:p) - - ! Remaining momentum component(s), if any, as well as the - ! energy and the variable(s) from advection equation(s) - do i = mom_idx%beg + 1, sys_size - q_cons_vf(i)%sf(-j, 0:n, 0:p) = & - q_cons_vf(i)%sf(j - 1, 0:n, 0:p) - end do - - end do - - ! Periodic BC at the beginning - elseif (bc_x%beg == -1) then - - do j = 1, buff_size - do i = 1, sys_size - q_cons_vf(i)%sf(-j, 0:n, 0:p) = & - q_cons_vf(i)%sf((m + 1) - j, 0:n, 0:p) - end do - end do - - ! Processor BC at the beginning - else - - call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, & - 'beg', 'x') - - end if - - ! Ghost-cell extrapolation BC at the end - if (bc_x%end <= -3) then - - do j = 1, buff_size - do i = 1, sys_size - q_cons_vf(i)%sf(m + j, 0:n, 0:p) = & - q_cons_vf(i)%sf(m, 0:n, 0:p) - end do - end do - - ! Symmetry BC at the end - elseif (bc_x%end == -2) then - - do j = 1, buff_size - - ! Density or partial densities - do i = 1, cont_idx%end - q_cons_vf(i)%sf(m + j, 0:n, 0:p) = & - q_cons_vf(i)%sf((m + 1) - j, 0:n, 0:p) - end do - - ! x-component of momentum - q_cons_vf(mom_idx%beg)%sf(m + j, 0:n, 0:p) = & - -q_cons_vf(mom_idx%beg)%sf((m + 1) - j, 0:n, 0:p) - - ! Remaining momentum component(s), if any, as well as the - ! energy and the variable(s) from advection equation(s) - do i = mom_idx%beg + 1, sys_size - q_cons_vf(i)%sf(m + j, 0:n, 0:p) = & - q_cons_vf(i)%sf((m + 1) - j, 0:n, 0:p) - end do - - end do - - ! Perodic BC at the end - elseif (bc_x%end == -1) then - - do j = 1, buff_size - do i = 1, sys_size - q_cons_vf(i)%sf(m + j, 0:n, 0:p) = & - q_cons_vf(i)%sf(j - 1, 0:n, 0:p) - end do - end do - - ! Processor BC at the end - else - - call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, & - 'end', 'x') - - end if - - ! END: Populating Buffer Regions in the x-direction ================ - - ! Populating Buffer Regions in the y-direction ===================== - - if (n > 0) then - - ! Ghost-cell extrapolation BC at the beginning - if (bc_y%beg <= -3 .and. bc_y%beg /= -14) then - - do j = 1, buff_size - do i = 1, sys_size - q_cons_vf(i)%sf(:, -j, 0:p) = q_cons_vf(i)%sf(:, 0, 0:p) - end do - end do - - ! Axis BC at the beginning - elseif (bc_y%beg == -14) then - - do j = 1, buff_size - do k = 0, p - if (z_cc(k) < pi) then - do i = 1, mom_idx%beg - q_cons_vf(i)%sf(:, -j, k) = & - q_cons_vf(i)%sf(:, j - 1, k + ((p + 1)/2)) - end do - - q_cons_vf(mom_idx%beg + 1)%sf(:, -j, k) = & - -q_cons_vf(mom_idx%beg + 1)%sf(:, j - 1, k + ((p + 1)/2)) - - q_cons_vf(mom_idx%end)%sf(:, -j, k) = & - -q_cons_vf(mom_idx%end)%sf(:, j - 1, k + ((p + 1)/2)) - - do i = E_idx, sys_size - q_cons_vf(i)%sf(:, -j, k) = & - q_cons_vf(i)%sf(:, j - 1, k + ((p + 1)/2)) - end do - else - do i = 1, mom_idx%beg - q_cons_vf(i)%sf(:, -j, k) = & - q_cons_vf(i)%sf(:, j - 1, k - ((p + 1)/2)) - end do - - q_cons_vf(mom_idx%beg + 1)%sf(:, -j, k) = & - -q_cons_vf(mom_idx%beg + 1)%sf(:, j - 1, k - ((p + 1)/2)) - - q_cons_vf(mom_idx%end)%sf(:, -j, k) = & - -q_cons_vf(mom_idx%end)%sf(:, j - 1, k - ((p + 1)/2)) - - do i = E_idx, sys_size - q_cons_vf(i)%sf(:, -j, k) = & - q_cons_vf(i)%sf(:, j - 1, k - ((p + 1)/2)) - end do - end if - end do - end do - - ! Symmetry BC at the beginning - elseif (bc_y%beg == -2) then - - do j = 1, buff_size - - ! Density or partial densities and x-momentum component - do i = 1, mom_idx%beg - q_cons_vf(i)%sf(:, -j, 0:p) = & - q_cons_vf(i)%sf(:, j - 1, 0:p) - end do - - ! y-component of momentum - q_cons_vf(mom_idx%beg + 1)%sf(:, -j, 0:p) = & - -q_cons_vf(mom_idx%beg + 1)%sf(:, j - 1, 0:p) - - ! Remaining z-momentum component, if any, as well as the - ! energy and variable(s) from advection equation(s) - do i = mom_idx%beg + 2, sys_size - q_cons_vf(i)%sf(:, -j, 0:p) = & - q_cons_vf(i)%sf(:, j - 1, 0:p) - end do - - end do - - ! Periodic BC at the beginning - elseif (bc_y%beg == -1) then - - do j = 1, buff_size - do i = 1, sys_size - q_cons_vf(i)%sf(:, -j, 0:p) = & - q_cons_vf(i)%sf(:, (n + 1) - j, 0:p) - end do - end do - - ! Processor BC at the beginning - else - - call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, & - 'beg', 'y') - - end if - - ! Ghost-cell extrapolation BC at the end - if (bc_y%end <= -3) then - - do j = 1, buff_size - do i = 1, sys_size - q_cons_vf(i)%sf(:, n + j, 0:p) = & - q_cons_vf(i)%sf(:, n, 0:p) - end do - end do - - ! Symmetry BC at the end - elseif (bc_y%end == -2) then - - do j = 1, buff_size - - ! Density or partial densities and x-momentum component - do i = 1, mom_idx%beg - q_cons_vf(i)%sf(:, n + j, 0:p) = & - q_cons_vf(i)%sf(:, (n + 1) - j, 0:p) - end do - - ! y-component of momentum - q_cons_vf(mom_idx%beg + 1)%sf(:, n + j, 0:p) = & - -q_cons_vf(mom_idx%beg + 1)%sf(:, (n + 1) - j, 0:p) - - ! Remaining z-momentum component, if any, as well as the - ! energy and variable(s) from advection equation(s) - do i = mom_idx%beg + 2, sys_size - q_cons_vf(i)%sf(:, n + j, 0:p) = & - q_cons_vf(i)%sf(:, (n + 1) - j, 0:p) - end do - - end do - - ! Perodic BC at the end - elseif (bc_y%end == -1) then - - do j = 1, buff_size - do i = 1, sys_size - q_cons_vf(i)%sf(:, n + j, 0:p) = & - q_cons_vf(i)%sf(:, j - 1, 0:p) - end do - end do - - ! Processor BC at the end - else - - call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, & - 'end', 'y') - - end if - - ! END: Populating Buffer Regions in the y-direction ================ - - ! Populating Buffer Regions in the z-direction ===================== - - if (p > 0) then - - ! Ghost-cell extrapolation BC at the beginning - if (bc_z%beg <= -3) then - - do j = 1, buff_size - do i = 1, sys_size - q_cons_vf(i)%sf(:, :, -j) = q_cons_vf(i)%sf(:, :, 0) - end do - end do - - ! Symmetry BC at the beginning - elseif (bc_z%beg == -2) then - - do j = 1, buff_size - - ! Density or the partial densities and the momentum - ! components in x- and y-directions - do i = 1, mom_idx%beg + 1 - q_cons_vf(i)%sf(:, :, -j) = & - q_cons_vf(i)%sf(:, :, j - 1) - end do - - ! z-component of momentum - q_cons_vf(mom_idx%end)%sf(:, :, -j) = & - -q_cons_vf(mom_idx%end)%sf(:, :, j - 1) - - ! Energy and advection equation(s) variable(s) - do i = E_idx, sys_size - q_cons_vf(i)%sf(:, :, -j) = & - q_cons_vf(i)%sf(:, :, j - 1) - end do - - end do - - ! Periodic BC at the beginning - elseif (bc_z%beg == -1) then - - do j = 1, buff_size - do i = 1, sys_size - q_cons_vf(i)%sf(:, :, -j) = & - q_cons_vf(i)%sf(:, :, (p + 1) - j) - end do - end do - - ! Processor BC at the beginning - else - - call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, & - 'beg', 'z') - - end if - - ! Ghost-cell extrapolation BC at the end - if (bc_z%end <= -3) then - - do j = 1, buff_size - do i = 1, sys_size - q_cons_vf(i)%sf(:, :, p + j) = & - q_cons_vf(i)%sf(:, :, p) - end do - end do - - ! Symmetry BC at the end - elseif (bc_z%end == -2) then - - do j = 1, buff_size - - ! Density or the partial densities and the momentum - ! components in x- and y-directions - do i = 1, mom_idx%beg + 1 - q_cons_vf(i)%sf(:, :, p + j) = & - q_cons_vf(i)%sf(:, :, (p + 1) - j) - end do - - ! z-component of momentum - q_cons_vf(mom_idx%end)%sf(:, :, p + j) = & - -q_cons_vf(mom_idx%end)%sf(:, :, (p + 1) - j) - - ! Energy and advection equation(s) variable(s) - do i = E_idx, sys_size - q_cons_vf(i)%sf(:, :, p + j) = & - q_cons_vf(i)%sf(:, :, (p + 1) - j) - end do - - end do - - ! Perodic BC at the end - elseif (bc_z%end == -1) then - - do j = 1, buff_size - do i = 1, sys_size - q_cons_vf(i)%sf(:, :, p + j) = & - q_cons_vf(i)%sf(:, :, j - 1) - end do - end do - - ! Processor BC at the end - else - - call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, & - 'end', 'z') - - end if - - end if - - end if - - ! END: Populating Buffer Regions in the z-direction ================ - - end subroutine s_populate_conservative_variables_buffer_regions - !> Computation of parameters, allocation procedures, and/or !! any other tasks needed to properly setup the module subroutine s_initialize_data_input_module diff --git a/src/post_process/m_derived_variables.fpp b/src/post_process/m_derived_variables.fpp index e08973bd2..8f9bb0192 100644 --- a/src/post_process/m_derived_variables.fpp +++ b/src/post_process/m_derived_variables.fpp @@ -410,6 +410,7 @@ contains - fd_coeff_z(r, l)* & q_prim_vf(mom_idx%beg + 1)%sf(j, k, r + l)) else + q_sf(j, k, l) = & q_sf(j, k, l) + fd_coeff_y(r, k)* & q_prim_vf(mom_idx%end)%sf(j, r + k, l) & diff --git a/src/post_process/m_global_parameters.fpp b/src/post_process/m_global_parameters.fpp index b150f23e7..7730c7c07 100644 --- a/src/post_process/m_global_parameters.fpp +++ b/src/post_process/m_global_parameters.fpp @@ -20,6 +20,8 @@ module m_global_parameters use m_thermochem, only: num_species, species_names + use m_global_parameters_common + ! ========================================================================== implicit none @@ -109,6 +111,7 @@ module m_global_parameters logical :: mixture_err !< Mixture error limiter logical :: alt_soundspeed !< Alternate sound speed logical :: hypoelasticity !< Turn hypoelasticity on + logical :: rdma_mpi !< Turn on RDMA for MPI logical, parameter :: chemistry = .${chemistry}$. !< Chemistry modeling !> @} @@ -144,14 +147,20 @@ module m_global_parameters !> @name Boundary conditions in the x-, y- and z-coordinate directions !> @{ type(int_bounds_info) :: bc_x, bc_y, bc_z + integer :: num_bc_patches + type(bc_patch_parameters) :: patch_bc(num_bc_patches_max) + type(bounds_info) :: x_domain, y_domain, z_domain !> @} logical :: parallel_io !< Format of the data files logical :: file_per_process !< output format - integer, allocatable, dimension(:) :: proc_coords !< + integer, dimension(1:3) :: proc_coords !< !! Processor coordinates in MPI_CART_COMM + integer, dimension(1:3) :: proc_nums !< + !! Processor dimensions in MPI_CART_COMM + integer, allocatable, dimension(:) :: start_idx !< !! Starting cell-center index of local processor in global grid @@ -331,18 +340,19 @@ contains relax = .false. relax_model = dflt_int hypoelasticity = .false. + rdma_mpi = .false. bc_x%beg = dflt_int; bc_x%end = dflt_int bc_y%beg = dflt_int; bc_y%end = dflt_int bc_z%beg = dflt_int; bc_z%end = dflt_int #:for DIM in ['x', 'y', 'z'] - #:for DIR in [1, 2, 3] - bc_${DIM}$%vb${DIR}$ = 0d0 - bc_${DIM}$%ve${DIR}$ = 0d0 - #:endfor + bc_${DIM}$%vel_beg = 0d0 + bc_${DIM}$%vel_end = 0d0 #:endfor + call s_bc_assign_default_values_to_user_inputs(num_bc_patches, patch_bc) + ! Fluids physical parameters do i = 1, num_fluids_max fluid_pp(i)%gamma = dflt_real @@ -772,8 +782,6 @@ contains num_dims = 1 + min(1, n) + min(1, p) - allocate (proc_coords(1:num_dims)) - if (parallel_io .neqv. .true.) return #ifdef MFC_MPI @@ -818,8 +826,6 @@ contains end if - deallocate (proc_coords) - deallocate (adv) #ifdef MFC_MPI diff --git a/src/post_process/m_mpi_proxy.fpp b/src/post_process/m_mpi_proxy.fpp index 20c5346d2..2bfe9c91d 100644 --- a/src/post_process/m_mpi_proxy.fpp +++ b/src/post_process/m_mpi_proxy.fpp @@ -199,1214 +199,6 @@ contains end subroutine s_mpi_bcast_user_inputs - !> This subroutine takes care of efficiently distributing - !! the computational domain among the available processors - !! as well as recomputing some of the global parameters so - !! that they reflect the configuration of sub-domain that - !! is overseen by the local processor. - subroutine s_mpi_decompose_computational_domain - -#ifdef MFC_MPI - - ! # of processors in the x-, y- and z-coordinate directions - integer :: num_procs_x, num_procs_y, num_procs_z - - ! Temporary # of processors in x-, y- and z-coordinate directions - ! used during the processor factorization optimization procedure - real(kind(0d0)) :: tmp_num_procs_x, tmp_num_procs_y, tmp_num_procs_z - - ! Processor factorization (fct) minimization parameter - real(kind(0d0)) :: fct_min - - ! Cartesian processor topology communicator - integer :: MPI_COMM_CART - - ! Number of remaining cells for a particular coordinate direction - ! after the bulk has evenly been distributed among the available - ! processors for that coordinate direction - integer :: rem_cells - - ! Generic loop iterators - integer :: i, j - - if (num_procs == 1 .and. parallel_io) then - do i = 1, num_dims - start_idx(i) = 0 - end do - return - end if - - ! Performing the computational domain decomposition. The procedure - ! is optimized by ensuring that each processor contains a close to - ! equivalent piece of the computational domain. Note that explicit - ! type-casting is omitted here for code legibility purposes. - - ! Generating 3D Cartesian Processor Topology ======================= - - if (n > 0) then - - if (p > 0) then - - if (cyl_coord .and. p > 0) then - ! Implement pencil processor blocking if using cylindrical coordinates so - ! that all cells in azimuthal direction are stored on a single processor. - ! This is necessary for efficient application of Fourier filter near axis. - - ! Initial values of the processor factorization optimization - num_procs_x = 1 - num_procs_y = num_procs - num_procs_z = 1 - ierr = -1 - - ! Computing minimization variable for these initial values - tmp_num_procs_x = num_procs_x - tmp_num_procs_y = num_procs_y - tmp_num_procs_z = num_procs_z - fct_min = 10d0*abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) - - ! Searching for optimal computational domain distribution - do i = 1, num_procs - - if (mod(num_procs, i) == 0 & - .and. & - (m + 1)/i >= num_stcls_min*weno_order) then - - tmp_num_procs_x = i - tmp_num_procs_y = num_procs/i - - if (fct_min >= abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) & - .and. & - (n + 1)/tmp_num_procs_y & - >= & - num_stcls_min*weno_order) then - - num_procs_x = i - num_procs_y = num_procs/i - fct_min = abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) - ierr = 0 - - end if - - end if - - end do - - else - - ! Initial values of the processor factorization optimization - num_procs_x = 1 - num_procs_y = 1 - num_procs_z = num_procs - ierr = -1 - - ! Computing minimization variable for these initial values - tmp_num_procs_x = num_procs_x - tmp_num_procs_y = num_procs_y - tmp_num_procs_z = num_procs_z - fct_min = 10d0*abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) & - + 10d0*abs((n + 1)/tmp_num_procs_y & - - (p + 1)/tmp_num_procs_z) - - ! Searching for optimal computational domain distribution - do i = 1, num_procs - - if (mod(num_procs, i) == 0 & - .and. & - (m + 1)/i >= num_stcls_min*weno_order) then - - do j = 1, (num_procs/i) - - if (mod(num_procs/i, j) == 0 & - .and. & - (n + 1)/j >= num_stcls_min*weno_order) then - - tmp_num_procs_x = i - tmp_num_procs_y = j - tmp_num_procs_z = num_procs/(i*j) - - if (fct_min >= abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) & - + abs((n + 1)/tmp_num_procs_y & - - (p + 1)/tmp_num_procs_z) & - .and. & - (p + 1)/tmp_num_procs_z & - >= & - num_stcls_min*weno_order) & - then - - num_procs_x = i - num_procs_y = j - num_procs_z = num_procs/(i*j) - fct_min = abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) & - + abs((n + 1)/tmp_num_procs_y & - - (p + 1)/tmp_num_procs_z) - ierr = 0 - - end if - - end if - - end do - - end if - - end do - - end if - - ! Checking whether the decomposition of the computational - ! domain was successful - if (proc_rank == 0 .and. ierr == -1) then - print '(A)', 'Unable to decompose computational '// & - 'domain for selected number of '// & - 'processors. Exiting ...' - call MPI_ABORT(MPI_COMM_WORLD, 1, ierr) - end if - - ! Creating a new communicator using Cartesian topology - call MPI_CART_CREATE(MPI_COMM_WORLD, 3, (/num_procs_x, & - num_procs_y, num_procs_z/), & - (/.true., .true., .true./), & - .false., MPI_COMM_CART, ierr) - - ! Finding corresponding Cartesian coordinates of the local - ! processor rank in newly declared cartesian communicator - call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 3, & - proc_coords, ierr) - - ! END: Generating 3D Cartesian Processor Topology ================== - - ! Sub-domain Global Parameters in z-direction ====================== - - ! Number of remaining cells after majority is distributed - rem_cells = mod(p + 1, num_procs_z) - - ! Optimal number of cells per processor - p = (p + 1)/num_procs_z - 1 - - ! Distributing any remaining cells - do i = 1, rem_cells - if (proc_coords(3) == i - 1) then - p = p + 1 - exit - end if - end do - - ! Boundary condition at the beginning - if (proc_coords(3) > 0 .or. bc_z%beg == -1) then - proc_coords(3) = proc_coords(3) - 1 - call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & - bc_z%beg, ierr) - proc_coords(3) = proc_coords(3) + 1 - end if - - ! Ghost zone at the beginning - if (proc_coords(3) > 0 .and. format == 1) then - offset_z%beg = 2 - else - offset_z%beg = 0 - end if - - ! Boundary condition at the end - if (proc_coords(3) < num_procs_z - 1 .or. bc_z%end == -1) then - proc_coords(3) = proc_coords(3) + 1 - call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & - bc_z%end, ierr) - proc_coords(3) = proc_coords(3) - 1 - end if - - ! Ghost zone at the end - if (proc_coords(3) < num_procs_z - 1 .and. format == 1) then - offset_z%end = 2 - else - offset_z%end = 0 - end if - - if (parallel_io) then - if (proc_coords(3) < rem_cells) then - start_idx(3) = (p + 1)*proc_coords(3) - else - start_idx(3) = (p + 1)*proc_coords(3) + rem_cells - end if - end if - ! ================================================================== - - ! Generating 2D Cartesian Processor Topology ======================= - - else - - ! Initial values of the processor factorization optimization - num_procs_x = 1 - num_procs_y = num_procs - ierr = -1 - - ! Computing minimization variable for these initial values - tmp_num_procs_x = num_procs_x - tmp_num_procs_y = num_procs_y - fct_min = 10d0*abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) - - ! Searching for optimal computational domain distribution - do i = 1, num_procs - - if (mod(num_procs, i) == 0 & - .and. & - (m + 1)/i >= num_stcls_min*weno_order) then - - tmp_num_procs_x = i - tmp_num_procs_y = num_procs/i - - if (fct_min >= abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) & - .and. & - (n + 1)/tmp_num_procs_y & - >= & - num_stcls_min*weno_order) then - - num_procs_x = i - num_procs_y = num_procs/i - fct_min = abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) - ierr = 0 - - end if - - end if - - end do - - ! Checking whether the decomposition of the computational - ! domain was successful - if (proc_rank == 0 .and. ierr == -1) then - print '(A)', 'Unable to decompose computational '// & - 'domain for selected number of '// & - 'processors. Exiting ...' - call MPI_ABORT(MPI_COMM_WORLD, 1, ierr) - end if - - ! Creating a new communicator using Cartesian topology - call MPI_CART_CREATE(MPI_COMM_WORLD, 2, (/num_procs_x, & - num_procs_y/), (/.true., & - .true./), .false., MPI_COMM_CART, & - ierr) - - ! Finding corresponding Cartesian coordinates of the local - ! processor rank in newly declared cartesian communicator - call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 2, & - proc_coords, ierr) - - end if - - ! END: Generating 2D Cartesian Processor Topology ================== - - ! Sub-domain Global Parameters in y-direction ====================== - - ! Number of remaining cells after majority has been distributed - rem_cells = mod(n + 1, num_procs_y) - - ! Optimal number of cells per processor - n = (n + 1)/num_procs_y - 1 - - ! Distributing any remaining cells - do i = 1, rem_cells - if (proc_coords(2) == i - 1) then - n = n + 1 - exit - end if - end do - - ! Boundary condition at the beginning - if (proc_coords(2) > 0 .or. bc_y%beg == -1) then - proc_coords(2) = proc_coords(2) - 1 - call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & - bc_y%beg, ierr) - proc_coords(2) = proc_coords(2) + 1 - end if - - ! Ghost zone at the beginning - if (proc_coords(2) > 0 .and. format == 1) then - offset_y%beg = 2 - else - offset_y%beg = 0 - end if - - ! Boundary condition at the end - if (proc_coords(2) < num_procs_y - 1 .or. bc_y%end == -1) then - proc_coords(2) = proc_coords(2) + 1 - call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & - bc_y%end, ierr) - proc_coords(2) = proc_coords(2) - 1 - end if - - ! Ghost zone at the end - if (proc_coords(2) < num_procs_y - 1 .and. format == 1) then - offset_y%end = 2 - else - offset_y%end = 0 - end if - - if (parallel_io) then - if (proc_coords(2) < rem_cells) then - start_idx(2) = (n + 1)*proc_coords(2) - else - start_idx(2) = (n + 1)*proc_coords(2) + rem_cells - end if - end if - ! ================================================================== - - ! Generating 1D Cartesian Processor Topology ======================= - - else - - ! Number of processors in the coordinate direction is equal to - ! the total number of processors available - num_procs_x = num_procs - - ! Number of cells in undecomposed computational domain needed - ! for sub-domain reassembly during formatted data output - m_root = m - - ! Creating a new communicator using Cartesian topology - call MPI_CART_CREATE(MPI_COMM_WORLD, 1, (/num_procs_x/), & - (/.true./), .false., MPI_COMM_CART, & - ierr) - - ! Finding the corresponding Cartesian coordinates of the local - ! processor rank in the newly declared cartesian communicator - call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 1, & - proc_coords, ierr) - - end if - - ! ================================================================== - - ! Sub-domain Global Parameters in x-direction ====================== - - ! Number of remaining cells after majority has been distributed - rem_cells = mod(m + 1, num_procs_x) - - ! Optimal number of cells per processor - m = (m + 1)/num_procs_x - 1 - - ! Distributing any remaining cells - do i = 1, rem_cells - if (proc_coords(1) == i - 1) then - m = m + 1 - exit - end if - end do - - ! Boundary condition at the beginning - if (proc_coords(1) > 0 .or. bc_x%beg == -1) then - proc_coords(1) = proc_coords(1) - 1 - call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%beg, ierr) - proc_coords(1) = proc_coords(1) + 1 - end if - - ! Ghost zone at the beginning - if (proc_coords(1) > 0 .and. format == 1 .and. n > 0) then - offset_x%beg = 2 - else - offset_x%beg = 0 - end if - - ! Boundary condition at the end - if (proc_coords(1) < num_procs_x - 1 .or. bc_x%end == -1) then - proc_coords(1) = proc_coords(1) + 1 - call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%end, ierr) - proc_coords(1) = proc_coords(1) - 1 - end if - - ! Ghost zone at the end - if (proc_coords(1) < num_procs_x - 1 .and. format == 1 .and. n > 0) then - offset_x%end = 2 - else - offset_x%end = 0 - end if - - if (parallel_io) then - if (proc_coords(1) < rem_cells) then - start_idx(1) = (m + 1)*proc_coords(1) - else - start_idx(1) = (m + 1)*proc_coords(1) + rem_cells - end if - end if - ! ================================================================== - -#endif - - end subroutine s_mpi_decompose_computational_domain - - !> Communicates the buffer regions associated with the grid - !! variables with processors in charge of the neighboring - !! sub-domains. Note that only cell-width spacings feature - !! buffer regions so that no information relating to the - !! cell-boundary locations is communicated. - !! @param pbc_loc Processor boundary condition (PBC) location - !! @param sweep_coord Coordinate direction normal to the processor boundary - subroutine s_mpi_sendrecv_grid_vars_buffer_regions(pbc_loc, sweep_coord) - - character(LEN=3), intent(in) :: pbc_loc - character, intent(in) :: sweep_coord - -#ifdef MFC_MPI - - ! Communications in the x-direction ================================ - - if (sweep_coord == 'x') then - - if (pbc_loc == 'beg') then ! Buffer region at the beginning - - ! PBC at both ends of the sub-domain - if (bc_x%end >= 0) then - - ! Sending/receiving the data to/from bc_x%end/bc_x%beg - call MPI_SENDRECV(dx(m - buff_size + 1), buff_size, & - MPI_DOUBLE_PRECISION, bc_x%end, 0, & - dx(-buff_size), buff_size, & - MPI_DOUBLE_PRECISION, bc_x%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - ! PBC only at beginning of the sub-domain - else - - ! Sending/receiving the data to/from bc_x%beg/bc_x%beg - call MPI_SENDRECV(dx(0), buff_size, & - MPI_DOUBLE_PRECISION, bc_x%beg, 1, & - dx(-buff_size), buff_size, & - MPI_DOUBLE_PRECISION, bc_x%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - end if - - else ! Buffer region at the end - - ! PBC at both ends of the sub-domain - if (bc_x%beg >= 0) then - - ! Sending/receiving the data to/from bc_x%beg/bc_x%end - call MPI_SENDRECV(dx(0), buff_size, & - MPI_DOUBLE_PRECISION, bc_x%beg, 1, & - dx(m + 1), buff_size, & - MPI_DOUBLE_PRECISION, bc_x%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - ! PBC only at end of the sub-domain - else - - ! Sending/receiving the data to/from bc_x%end/bc_x%end - call MPI_SENDRECV(dx(m - buff_size + 1), buff_size, & - MPI_DOUBLE_PRECISION, bc_x%end, 0, & - dx(m + 1), buff_size, & - MPI_DOUBLE_PRECISION, bc_x%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - end if - - end if - - ! END: Communications in the x-direction =========================== - - ! Communications in the y-direction ================================ - - elseif (sweep_coord == 'y') then - - if (pbc_loc == 'beg') then ! Buffer region at the beginning - - ! PBC at both ends of the sub-domain - if (bc_y%end >= 0) then - - ! Sending/receiving the data to/from bc_y%end/bc_y%beg - call MPI_SENDRECV(dy(n - buff_size + 1), buff_size, & - MPI_DOUBLE_PRECISION, bc_y%end, 0, & - dy(-buff_size), buff_size, & - MPI_DOUBLE_PRECISION, bc_y%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - ! PBC only at beginning of the sub-domain - else - - ! Sending/receiving the data to/from bc_y%beg/bc_y%beg - call MPI_SENDRECV(dy(0), buff_size, & - MPI_DOUBLE_PRECISION, bc_y%beg, 1, & - dy(-buff_size), buff_size, & - MPI_DOUBLE_PRECISION, bc_y%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - end if - - else ! Buffer region at the end - - ! PBC at both ends of the sub-domain - if (bc_y%beg >= 0) then - - ! Sending/receiving the data to/from bc_y%beg/bc_y%end - call MPI_SENDRECV(dy(0), buff_size, & - MPI_DOUBLE_PRECISION, bc_y%beg, 1, & - dy(n + 1), buff_size, & - MPI_DOUBLE_PRECISION, bc_y%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - ! PBC only at end of the sub-domain - else - - ! Sending/receiving the data to/from bc_y%end/bc_y%end - call MPI_SENDRECV(dy(n - buff_size + 1), buff_size, & - MPI_DOUBLE_PRECISION, bc_y%end, 0, & - dy(n + 1), buff_size, & - MPI_DOUBLE_PRECISION, bc_y%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - end if - - end if - - ! END: Communications in the y-direction =========================== - - ! Communications in the z-direction ================================ - - else - - if (pbc_loc == 'beg') then ! Buffer region at the beginning - - ! PBC at both ends of the sub-domain - if (bc_z%end >= 0) then - - ! Sending/receiving the data to/from bc_z%end/bc_z%beg - call MPI_SENDRECV(dz(p - buff_size + 1), buff_size, & - MPI_DOUBLE_PRECISION, bc_z%end, 0, & - dz(-buff_size), buff_size, & - MPI_DOUBLE_PRECISION, bc_z%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - ! PBC only at beginning of the sub-domain - else - - ! Sending/receiving the data to/from bc_z%beg/bc_z%beg - call MPI_SENDRECV(dz(0), buff_size, & - MPI_DOUBLE_PRECISION, bc_z%beg, 1, & - dz(-buff_size), buff_size, & - MPI_DOUBLE_PRECISION, bc_z%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - end if - - else ! Buffer region at the end - - ! PBC at both ends of the sub-domain - if (bc_z%beg >= 0) then - - ! Sending/receiving the data to/from bc_z%beg/bc_z%end - call MPI_SENDRECV(dz(0), buff_size, & - MPI_DOUBLE_PRECISION, bc_z%beg, 1, & - dz(p + 1), buff_size, & - MPI_DOUBLE_PRECISION, bc_z%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - ! PBC only at end of the sub-domain - else - - ! Sending/receiving the data to/from bc_z%end/bc_z%end - call MPI_SENDRECV(dz(p - buff_size + 1), buff_size, & - MPI_DOUBLE_PRECISION, bc_z%end, 0, & - dz(p + 1), buff_size, & - MPI_DOUBLE_PRECISION, bc_z%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - end if - - end if - - end if - - ! END: Communications in the z-direction =========================== - -#endif - - end subroutine s_mpi_sendrecv_grid_vars_buffer_regions - - !> Communicates buffer regions associated with conservative - !! variables with processors in charge of the neighboring - !! sub-domains - !! @param q_cons_vf Conservative variables - !! @param pbc_loc Processor boundary condition (PBC) location - !! @param sweep_coord Coordinate direction normal to the processor boundary - subroutine s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, pbc_loc, & - sweep_coord) - - type(scalar_field), & - dimension(sys_size), & - intent(inout) :: q_cons_vf - - character(LEN=3), intent(in) :: pbc_loc - - character, intent(in) :: sweep_coord - -#ifdef MFC_MPI - - integer :: i, j, k, l, r !< Generic loop iterators - - ! Communications in the x-direction ================================ - - if (sweep_coord == 'x') then - - if (pbc_loc == 'beg') then ! Buffer region at the beginning - - ! PBC at both ends of the sub-domain - if (bc_x%end >= 0) then - - ! Packing the data to be sent to bc_x%end - do l = 0, p - do k = 0, n - do j = m - buff_size + 1, m - do i = 1, sys_size - r = sys_size*(j - m + buff_size - 1) & - + sys_size*buff_size*k + (i - 1) & - + sys_size*buff_size*(n + 1)*l - q_cons_buffer_out(r) = & - q_cons_vf(i)%sf(j, k, l) - end do - end do - end do - end do - - ! Sending/receiving the data to/from bc_x%end/bc_x%beg - call MPI_SENDRECV(q_cons_buffer_out(0), & - buff_size*sys_size*(n + 1)*(p + 1), & - MPI_DOUBLE_PRECISION, bc_x%end, 0, & - q_cons_buffer_in(0), & - buff_size*sys_size*(n + 1)*(p + 1), & - MPI_DOUBLE_PRECISION, bc_x%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - ! PBC only at beginning of the sub-domain - else - - ! Packing the data to be sent to bc_x%beg - do l = 0, p - do k = 0, n - do j = 0, buff_size - 1 - do i = 1, sys_size - r = (i - 1) + sys_size*j & - + sys_size*buff_size*k & - + sys_size*buff_size*(n + 1)*l - q_cons_buffer_out(r) = & - q_cons_vf(i)%sf(j, k, l) - end do - end do - end do - end do - - ! Sending/receiving the data to/from bc_x%beg/bc_x%beg - call MPI_SENDRECV(q_cons_buffer_out(0), & - buff_size*sys_size*(n + 1)*(p + 1), & - MPI_DOUBLE_PRECISION, bc_x%beg, 1, & - q_cons_buffer_in(0), & - buff_size*sys_size*(n + 1)*(p + 1), & - MPI_DOUBLE_PRECISION, bc_x%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - end if - - ! Unpacking the data received from bc_x%beg - do l = 0, p - do k = 0, n - do j = -buff_size, -1 - do i = 1, sys_size - r = sys_size*(j + buff_size) & - + sys_size*buff_size*k + (i - 1) & - + sys_size*buff_size*(n + 1)*l - q_cons_vf(i)%sf(j, k, l) = q_cons_buffer_in(r) -#if defined(__INTEL_COMPILER) - if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if -#endif - end do - end do - end do - end do - - else ! Buffer region at the end - - ! PBC at both ends of the sub-domain - if (bc_x%beg >= 0) then - - ! Packing the data to be sent to bc_x%beg - do l = 0, p - do k = 0, n - do j = 0, buff_size - 1 - do i = 1, sys_size - r = (i - 1) + sys_size*j & - + sys_size*buff_size*k & - + sys_size*buff_size*(n + 1)*l - q_cons_buffer_out(r) = & - q_cons_vf(i)%sf(j, k, l) - end do - end do - end do - end do - - ! Sending/receiving the data to/from bc_x%beg/bc_x%end - call MPI_SENDRECV(q_cons_buffer_out(0), & - buff_size*sys_size*(n + 1)*(p + 1), & - MPI_DOUBLE_PRECISION, bc_x%beg, 1, & - q_cons_buffer_in(0), & - buff_size*sys_size*(n + 1)*(p + 1), & - MPI_DOUBLE_PRECISION, bc_x%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - ! PBC only at end of the sub-domain - else - - ! Packing the data to be sent to bc_x%end - do l = 0, p - do k = 0, n - do j = m - buff_size + 1, m - do i = 1, sys_size - r = sys_size*(j - m + buff_size - 1) & - + sys_size*buff_size*k + (i - 1) & - + sys_size*buff_size*(n + 1)*l - q_cons_buffer_out(r) = & - q_cons_vf(i)%sf(j, k, l) - end do - end do - end do - end do - - ! Sending/receiving the data to/from bc_x%end/bc_x%end - call MPI_SENDRECV(q_cons_buffer_out(0), & - buff_size*sys_size*(n + 1)*(p + 1), & - MPI_DOUBLE_PRECISION, bc_x%end, 0, & - q_cons_buffer_in(0), & - buff_size*sys_size*(n + 1)*(p + 1), & - MPI_DOUBLE_PRECISION, bc_x%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - end if - - ! Unpacking the data received from bc_x%end - do l = 0, p - do k = 0, n - do j = m + 1, m + buff_size - do i = 1, sys_size - r = (i - 1) + sys_size*(j - m - 1) & - + sys_size*buff_size*k & - + sys_size*buff_size*(n + 1)*l - q_cons_vf(i)%sf(j, k, l) = q_cons_buffer_in(r) -#if defined(__INTEL_COMPILER) - if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if -#endif - end do - end do - end do - end do - - end if - - ! END: Communications in the x-direction =========================== - - ! Communications in the y-direction ================================ - - elseif (sweep_coord == 'y') then - - if (pbc_loc == 'beg') then ! Buffer region at the beginning - - ! PBC at both ends of the sub-domain - if (bc_y%end >= 0) then - - ! Packing the data to be sent to bc_y%end - do l = 0, p - do k = n - buff_size + 1, n - do j = -buff_size, m + buff_size - do i = 1, sys_size - r = sys_size*(j + buff_size) & - + sys_size*(m + 2*buff_size + 1)* & - (k - n + buff_size - 1) + (i - 1) & - + sys_size*(m + 2*buff_size + 1)* & - buff_size*l - q_cons_buffer_out(r) = & - q_cons_vf(i)%sf(j, k, l) - end do - end do - end do - end do - - ! Sending/receiving the data to/from bc_y%end/bc_y%beg - call MPI_SENDRECV(q_cons_buffer_out(0), buff_size* & - sys_size*(m + 2*buff_size + 1)* & - (p + 1), MPI_DOUBLE_PRECISION, & - bc_y%end, 0, q_cons_buffer_in(0), & - buff_size*sys_size* & - (m + 2*buff_size + 1)*(p + 1), & - MPI_DOUBLE_PRECISION, bc_y%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - ! PBC only at beginning of the sub-domain - else - - ! Packing the data to be sent to bc_y%beg - do l = 0, p - do k = 0, buff_size - 1 - do j = -buff_size, m + buff_size - do i = 1, sys_size - r = sys_size*(j + buff_size) & - + sys_size*(m + 2*buff_size + 1)*k & - + sys_size*(m + 2*buff_size + 1)* & - buff_size*l + (i - 1) - q_cons_buffer_out(r) = & - q_cons_vf(i)%sf(j, k, l) - end do - end do - end do - end do - - ! Sending/receiving the data to/from bc_y%beg/bc_y%beg - call MPI_SENDRECV(q_cons_buffer_out(0), buff_size* & - sys_size*(m + 2*buff_size + 1)* & - (p + 1), MPI_DOUBLE_PRECISION, & - bc_y%beg, 1, q_cons_buffer_in(0), & - buff_size*sys_size* & - (m + 2*buff_size + 1)*(p + 1), & - MPI_DOUBLE_PRECISION, bc_y%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - end if - - ! Unpacking the data received from bc_y%beg - do l = 0, p - do k = -buff_size, -1 - do j = -buff_size, m + buff_size - do i = 1, sys_size - r = (i - 1) + sys_size*(j + buff_size) & - + sys_size*(m + 2*buff_size + 1)* & - (k + buff_size) + sys_size* & - (m + 2*buff_size + 1)*buff_size*l - q_cons_vf(i)%sf(j, k, l) = q_cons_buffer_in(r) -#if defined(__INTEL_COMPILER) - if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if -#endif - end do - end do - end do - end do - - else ! Buffer region at the end - - ! PBC at both ends of the sub-domain - if (bc_y%beg >= 0) then - - ! Packing the data to be sent to bc_y%beg - do l = 0, p - do k = 0, buff_size - 1 - do j = -buff_size, m + buff_size - do i = 1, sys_size - r = sys_size*(j + buff_size) & - + sys_size*(m + 2*buff_size + 1)*k & - + sys_size*(m + 2*buff_size + 1)* & - buff_size*l + (i - 1) - q_cons_buffer_out(r) = & - q_cons_vf(i)%sf(j, k, l) - end do - end do - end do - end do - - ! Sending/receiving the data to/from bc_y%beg/bc_y%end - call MPI_SENDRECV(q_cons_buffer_out(0), buff_size* & - sys_size*(m + 2*buff_size + 1)* & - (p + 1), MPI_DOUBLE_PRECISION, & - bc_y%beg, 1, q_cons_buffer_in(0), & - buff_size*sys_size* & - (m + 2*buff_size + 1)*(p + 1), & - MPI_DOUBLE_PRECISION, bc_y%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - ! PBC only at end of the sub-domain - else - - ! Packing the data to be sent to bc_y%end - do l = 0, p - do k = n - buff_size + 1, n - do j = -buff_size, m + buff_size - do i = 1, sys_size - r = sys_size*(j + buff_size) & - + sys_size*(m + 2*buff_size + 1)* & - (k - n + buff_size - 1) + (i - 1) & - + sys_size*(m + 2*buff_size + 1)* & - buff_size*l - q_cons_buffer_out(r) = & - q_cons_vf(i)%sf(j, k, l) - end do - end do - end do - end do - - ! Sending/receiving the data to/from bc_y%end/bc_y%end - call MPI_SENDRECV(q_cons_buffer_out(0), buff_size* & - sys_size*(m + 2*buff_size + 1)* & - (p + 1), MPI_DOUBLE_PRECISION, & - bc_y%end, 0, q_cons_buffer_in(0), & - buff_size*sys_size* & - (m + 2*buff_size + 1)*(p + 1), & - MPI_DOUBLE_PRECISION, bc_y%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - end if - - ! Unpacking the data received form bc_y%end - do l = 0, p - do k = n + 1, n + buff_size - do j = -buff_size, m + buff_size - do i = 1, sys_size - r = (i - 1) + sys_size*(j + buff_size) & - + sys_size*(m + 2*buff_size + 1)* & - (k - n - 1) + sys_size* & - (m + 2*buff_size + 1)*buff_size*l - q_cons_vf(i)%sf(j, k, l) = q_cons_buffer_in(r) -#if defined(__INTEL_COMPILER) - if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if -#endif - end do - end do - end do - end do - - end if - - ! END: Communications in the y-direction =========================== - - ! Communications in the z-direction ================================ - - else - - if (pbc_loc == 'beg') then ! Buffer region at the beginning - - ! PBC at both ends of the sub-domain - if (bc_z%end >= 0) then - - ! Packing the data to be sent to bc_z%end - do l = p - buff_size + 1, p - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do i = 1, sys_size - r = sys_size*(j + buff_size) & - + sys_size*(m + 2*buff_size + 1)* & - (k + buff_size) + sys_size* & - (m + 2*buff_size + 1)* & - (n + 2*buff_size + 1)* & - (l - p + buff_size - 1) + (i - 1) - q_cons_buffer_out(r) = & - q_cons_vf(i)%sf(j, k, l) - end do - end do - end do - end do - - ! Sending/receiving the data to/from bc_z%end/bc_z%beg - call MPI_SENDRECV(q_cons_buffer_out(0), buff_size* & - sys_size*(m + 2*buff_size + 1)* & - (n + 2*buff_size + 1), & - MPI_DOUBLE_PRECISION, bc_z%end, 0, & - q_cons_buffer_in(0), buff_size* & - sys_size*(m + 2*buff_size + 1)* & - (n + 2*buff_size + 1), & - MPI_DOUBLE_PRECISION, bc_z%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - ! PBC only at beginning of the sub-domain - else - - ! Packing the data to be sent to bc_z%beg - do l = 0, buff_size - 1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do i = 1, sys_size - r = sys_size*(j + buff_size) & - + sys_size*(m + 2*buff_size + 1)* & - (k + buff_size) + (i - 1) & - + sys_size*(m + 2*buff_size + 1)* & - (n + 2*buff_size + 1)*l - q_cons_buffer_out(r) = & - q_cons_vf(i)%sf(j, k, l) - end do - end do - end do - end do - - ! Sending/receiving the data to/from bc_z%beg/bc_z%beg - call MPI_SENDRECV(q_cons_buffer_out(0), buff_size* & - sys_size*(m + 2*buff_size + 1)* & - (n + 2*buff_size + 1), & - MPI_DOUBLE_PRECISION, bc_z%beg, 1, & - q_cons_buffer_in(0), buff_size* & - sys_size*(m + 2*buff_size + 1)* & - (n + 2*buff_size + 1), & - MPI_DOUBLE_PRECISION, bc_z%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - end if - - ! Unpacking the data from bc_z%beg - do l = -buff_size, -1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do i = 1, sys_size - r = sys_size*(j + buff_size) & - + sys_size*(m + 2*buff_size + 1)* & - (k + buff_size) + (i - 1) & - + sys_size*(m + 2*buff_size + 1)* & - (n + 2*buff_size + 1)*(l + buff_size) - q_cons_vf(i)%sf(j, k, l) = q_cons_buffer_in(r) -#if defined(__INTEL_COMPILER) - if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if -#endif - end do - end do - end do - end do - - else ! Buffer region at the end - - ! PBC at both ends of the sub-domain - if (bc_z%beg >= 0) then - - ! Packing the data to be sent to bc_z%beg - do l = 0, buff_size - 1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do i = 1, sys_size - r = sys_size*(j + buff_size) & - + sys_size*(m + 2*buff_size + 1)* & - (k + buff_size) + (i - 1) & - + sys_size*(m + 2*buff_size + 1)* & - (n + 2*buff_size + 1)*l - q_cons_buffer_out(r) = & - q_cons_vf(i)%sf(j, k, l) - end do - end do - end do - end do - - ! Sending/receiving the data to/from bc_z%beg/bc_z%end - call MPI_SENDRECV(q_cons_buffer_out(0), buff_size* & - sys_size*(m + 2*buff_size + 1)* & - (n + 2*buff_size + 1), & - MPI_DOUBLE_PRECISION, bc_z%beg, 1, & - q_cons_buffer_in(0), buff_size* & - sys_size*(m + 2*buff_size + 1)* & - (n + 2*buff_size + 1), & - MPI_DOUBLE_PRECISION, bc_z%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - ! PBC only at end of the sub-domain - else - - ! Packing the data to be sent to bc_z%end - do l = p - buff_size + 1, p - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do i = 1, sys_size - r = sys_size*(j + buff_size) & - + sys_size*(m + 2*buff_size + 1)* & - (k + buff_size) + sys_size* & - (m + 2*buff_size + 1)* & - (n + 2*buff_size + 1)* & - (l - p + buff_size - 1) + (i - 1) - q_cons_buffer_out(r) = & - q_cons_vf(i)%sf(j, k, l) - end do - end do - end do - end do - - ! Sending/receiving the data to/from bc_z%end/bc_z%end - call MPI_SENDRECV(q_cons_buffer_out(0), buff_size* & - sys_size*(m + 2*buff_size + 1)* & - (n + 2*buff_size + 1), & - MPI_DOUBLE_PRECISION, bc_z%end, 0, & - q_cons_buffer_in(0), buff_size* & - sys_size*(m + 2*buff_size + 1)* & - (n + 2*buff_size + 1), & - MPI_DOUBLE_PRECISION, bc_z%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, & - ierr) - - end if - - ! Unpacking the data received from bc_z%end - do l = p + 1, p + buff_size - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do i = 1, sys_size - r = sys_size*(j + buff_size) & - + sys_size*(m + 2*buff_size + 1)* & - (k + buff_size) + (i - 1) & - + sys_size*(m + 2*buff_size + 1)* & - (n + 2*buff_size + 1)*(l - p - 1) - q_cons_vf(i)%sf(j, k, l) = q_cons_buffer_in(r) -#if defined(__INTEL_COMPILER) - if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if -#endif - end do - end do - end do - end do - - end if - - end if - - ! END: Communications in the z-direction =========================== - -#endif - - end subroutine s_mpi_sendrecv_cons_vars_buffer_regions - !> This subroutine gathers the Silo database metadata for !! the spatial extents in order to boost the performance of !! the multidimensional visualization. diff --git a/src/post_process/m_start_up.f90 b/src/post_process/m_start_up.f90 index 301e385d3..97e7062dc 100644 --- a/src/post_process/m_start_up.f90 +++ b/src/post_process/m_start_up.f90 @@ -39,6 +39,8 @@ module m_start_up use m_finite_differences + use m_boundary_conditions_common + ! ========================================================================== implicit none @@ -66,9 +68,12 @@ subroutine s_read_input_file namelist /user_inputs/ case_dir, m, n, p, t_step_start, & t_step_stop, t_step_save, model_eqns, & num_fluids, mpp_lim, & - weno_order, bc_x, & - bc_y, bc_z, fluid_pp, format, precision, & - hypoelasticity, G, & + weno_order, & + bc_x, bc_y, bc_z, & + num_bc_patches, patch_bc, & + fluid_pp, format, precision, & + x_domain, y_domain, z_domain, & + hypoelasticity, rdma_mpi, G, & chem_wrt_Y, chem_wrt_T, avg_state, & alpha_rho_wrt, rho_wrt, mom_wrt, vel_wrt, & E_wrt, pres_wrt, alpha_wrt, gamma_wrt, & @@ -170,12 +175,12 @@ subroutine s_perform_time_step(t_step) call s_read_data_files(t_step) ! Populating the buffer regions of the grid variables if (buff_size > 0) then - call s_populate_grid_variables_buffer_regions() + call s_mpi_sendrecv_grid_spacing_buffers(bc_id_sfs) end if ! Populating the buffer regions of the conservative variables if (buff_size > 0) then - call s_populate_conservative_variables_buffer_regions() + call s_populate_cons_buffers(q_cons_vf) end if ! Converting the conservative variables to the primitive ones @@ -679,13 +684,17 @@ subroutine s_initialize_modules ! Computation of parameters, allocation procedures, and/or any other tasks ! needed to properly setup the modules call s_initialize_global_parameters_module() + call s_initialize_boundary_conditions_module() if (bubbles .and. nb > 1) then call s_simpson end if if (bubbles .and. .not. polytropic) then call s_initialize_nonpoly() end if - if (num_procs > 1) call s_initialize_mpi_proxy_module() + if (num_procs > 1) then + call s_initialize_mpi_common_module() + call s_initialize_mpi_proxy_module() + end if call s_initialize_variables_conversion_module() call s_initialize_data_input_module() call s_initialize_derived_variables_module() @@ -733,7 +742,10 @@ subroutine s_finalize_modules call s_finalize_derived_variables_module() call s_finalize_data_input_module() call s_finalize_variables_conversion_module() - if (num_procs > 1) call s_finalize_mpi_proxy_module() + if (num_procs > 1) then + call s_finalize_mpi_proxy_module() + call s_finalize_mpi_common_module() + end if call s_finalize_global_parameters_module() ! Finalizing the MPI environment diff --git a/src/pre_process/m_compute_levelset.fpp b/src/pre_process/m_compute_levelset.fpp index 18faf7a46..499a88cc9 100644 --- a/src/pre_process/m_compute_levelset.fpp +++ b/src/pre_process/m_compute_levelset.fpp @@ -315,23 +315,25 @@ contains if (side_dists(2) == 0) then levelset_norm%sf(i, j, 0, ib_patch_id, 1) = 0d0 else - levelset_norm%sf(i, j, 0, ib_patch_id, 1) = side_dists(2)/ & + levelset_norm%sf(i, j, 0, ib_patch_id, 1) = -side_dists(2)/ & abs(side_dists(2)) end if else if (min_dist == abs(side_dists(3))) then + levelset%sf(i, j, 0, ib_patch_id) = side_dists(3) if (side_dists(3) == 0) then - levelset_norm%sf(i, j, 0, ib_patch_id, 1) = 0d0 + levelset_norm%sf(i, j, 0, ib_patch_id, 2) = 0d0 else - levelset_norm%sf(i, j, 0, ib_patch_id, 1) = side_dists(3)/ & + levelset_norm%sf(i, j, 0, ib_patch_id, 2) = side_dists(3)/ & abs(side_dists(3)) end if else if (min_dist == abs(side_dists(4))) then + levelset%sf(i, j, 0, ib_patch_id) = side_dists(4) if (side_dists(4) == 0) then - levelset_norm%sf(i, j, 0, ib_patch_id, 1) = 0d0 + levelset_norm%sf(i, j, 0, ib_patch_id, 2) = 0d0 else - levelset_norm%sf(i, j, 0, ib_patch_id, 1) = side_dists(4)/ & + levelset_norm%sf(i, j, 0, ib_patch_id, 2) = -side_dists(4)/ & abs(side_dists(4)) end if diff --git a/src/pre_process/m_data_output.fpp b/src/pre_process/m_data_output.fpp index 8c6147ade..541b49eba 100644 --- a/src/pre_process/m_data_output.fpp +++ b/src/pre_process/m_data_output.fpp @@ -32,6 +32,8 @@ module m_data_output use m_thermochem, only: species_names + use m_boundary_conditions_common + ! ========================================================================== implicit none @@ -124,6 +126,8 @@ contains integer :: i, j, k, l, r, c, dir !< Generic loop iterator integer :: t_step + integer :: iter_dir, iter_loc + real(kind(0d0)), dimension(nb) :: nRtmp !< Temporary bubble concentration real(kind(0d0)) :: nbub !< Temporary bubble number density real(kind(0d0)) :: gamma, lit_gamma, pi_inf, qv !< Temporary EOS params @@ -145,6 +149,9 @@ contains status = 'new' end if + ! Boundary Conditions + call s_write_boundary_condition_files(t_step_dir) + ! x-coordinate direction file_loc = trim(t_step_dir)//'/x_cb.dat' open (1, FILE=trim(file_loc), FORM='unformatted', STATUS=status) @@ -561,6 +568,8 @@ contains ! Generic loop iterator integer :: i + call s_write_boundary_condition_files(trim(case_dir)//'/restart_data') + if (file_per_process) then if (proc_rank == 0) then file_loc = trim(case_dir)//'/restart_data/lustre_0' diff --git a/src/pre_process/m_global_parameters.fpp b/src/pre_process/m_global_parameters.fpp index ef240f21d..a4ae6b801 100644 --- a/src/pre_process/m_global_parameters.fpp +++ b/src/pre_process/m_global_parameters.fpp @@ -20,6 +20,8 @@ module m_global_parameters use m_thermochem, only: num_species + use m_global_parameters_common + ! ========================================================================== implicit none @@ -92,6 +94,8 @@ module m_global_parameters logical :: hypoelasticity !< activate hypoelasticity logical, parameter :: chemistry = .${chemistry}$. !< Chemistry modeling + logical :: rdma_mpi !< Use RDMA for MPI communication + ! Annotations of the structure, i.e. the organization, of the state vectors type(int_bounds_info) :: cont_idx !< Indexes of first & last continuity eqns. type(int_bounds_info) :: mom_idx !< Indexes of first & last momentum eqns. @@ -118,6 +122,8 @@ module m_global_parameters type(int_bounds_info) :: idwbuff(1:3) type(int_bounds_info) :: bc_x, bc_y, bc_z !< + integer :: num_bc_patches + type(bc_patch_parameters) :: patch_bc(num_bc_patches_max) !! Boundary conditions in the x-, y- and z-coordinate directions logical :: parallel_io !< Format of the data files @@ -139,9 +145,12 @@ module m_global_parameters integer :: perturb_sph_fluid !< Fluid to be perturbed with perturb_sph flag real(kind(0d0)), dimension(num_fluids_max) :: fluid_rho - integer, allocatable, dimension(:) :: proc_coords !< + integer, dimension(1:3) :: proc_coords !< !! Processor coordinates in MPI_CART_COMM + integer, dimension(1:3) :: proc_nums + !! Processor dimensions in MPI_CART_COMM + integer, allocatable, dimension(:) :: start_idx !< !! Starting cell-center index of local processor in global grid @@ -309,16 +318,15 @@ contains weno_order = dflt_int hypoelasticity = .false. + rdma_mpi = .false. bc_x%beg = dflt_int; bc_x%end = dflt_int bc_y%beg = dflt_int; bc_y%end = dflt_int bc_z%beg = dflt_int; bc_z%end = dflt_int #:for DIM in ['x', 'y', 'z'] - #:for DIR in [1, 2, 3] - bc_${DIM}$%vb${DIR}$ = 0d0 - bc_${DIM}$%ve${DIR}$ = 0d0 - #:endfor + bc_${DIM}$%vel_beg = 0d0 + bc_${DIM}$%vel_end = 0d0 #:endfor parallel_io = .false. @@ -386,6 +394,8 @@ contains end if end do + call s_bc_assign_default_values_to_user_inputs(num_bc_patches, patch_bc) + ! Tait EOS rhoref = dflt_real pref = dflt_real @@ -793,8 +803,6 @@ contains num_dims = 1 + min(1, n) + min(1, p) - allocate (proc_coords(1:num_dims)) - if (parallel_io .neqv. .true.) return #ifdef MFC_MPI @@ -830,8 +838,6 @@ contains end if end if - deallocate (proc_coords) - #ifdef MFC_MPI if (parallel_io) then diff --git a/src/pre_process/m_initial_condition.fpp b/src/pre_process/m_initial_condition.fpp index dc5182906..13848be7e 100644 --- a/src/pre_process/m_initial_condition.fpp +++ b/src/pre_process/m_initial_condition.fpp @@ -142,7 +142,7 @@ contains do i = 1, num_patches if (proc_rank == 0) then - print *, 'Processing patch', i + print *, 'Processing 3D patch', i end if !> ICPP Patches @@ -218,7 +218,7 @@ contains do i = 1, num_patches if (proc_rank == 0) then - print *, 'Processing patch', i + print *, 'Processing 2D patch', i end if !> ICPP Patches @@ -296,7 +296,7 @@ contains do i = 1, num_patches if (proc_rank == 0) then - print *, 'Processing patch', i + print *, 'Processing 1D patch', i end if ! Line segment patch diff --git a/src/pre_process/m_mpi_proxy.fpp b/src/pre_process/m_mpi_proxy.fpp index 33895c7c4..b84610bd9 100644 --- a/src/pre_process/m_mpi_proxy.fpp +++ b/src/pre_process/m_mpi_proxy.fpp @@ -72,6 +72,25 @@ contains call MPI_BCAST(${VAR}$, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) #:endfor + #:for VAR in ['x', 'y', 'z'] + call MPI_BCAST(bc_${VAR}$%vel_beg, 3, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) + call MPI_BCAST(bc_${VAR}$%vel_end, 3, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) + #:endfor + + do i = 1, num_bc_patches_max + #:for VAR in [ 'type', 'dir', 'loc', 'geometry' ] + call MPI_BCAST(patch_bc(i)%${VAR}$, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) + #:endfor + + #:for VAR in [ 'centroid', 'length', 'vel' ] + call MPI_BCAST(patch_bc(i)%${VAR}$, size(patch_bc(i)%${VAR}$), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) + #:endfor + + #:for VAR in [ 'radius' ] + call MPI_BCAST(patch_bc(i)%${VAR}$, 1, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr) + #:endfor + end do + do i = 1, num_patches_max #:for VAR in [ 'geometry', 'smooth_patch_id'] call MPI_BCAST(patch_icpp(i)%${VAR}$, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) @@ -116,421 +135,4 @@ contains end subroutine s_mpi_bcast_user_inputs - !> Description: This subroutine takes care of efficiently distributing - !! the computational domain among the available processors - !! as well as recomputing some of the global parameters so - !! that they reflect the configuration of sub-domain that is - !! overseen by the local processor. - subroutine s_mpi_decompose_computational_domain - -#ifdef MFC_MPI - - ! # of processors in the x-, y- and z-coordinate directions - integer :: num_procs_x, num_procs_y, num_procs_z - - ! Temporary # of processors in x-, y- and z-coordinate directions - ! used during the processor factorization optimization procedure - real(kind(0d0)) :: tmp_num_procs_x, tmp_num_procs_y, tmp_num_procs_z - - ! Processor factorization (fct) minimization parameter - real(kind(0d0)) :: fct_min - - ! Cartesian processor topology communicator - integer :: MPI_COMM_CART - - ! Number of remaining cells for a particular coordinate direction - ! after the bulk has evenly been distributed among the available - ! processors for that coordinate direction - integer :: rem_cells - - ! Generic loop iterators - integer :: i, j - - if (num_procs == 1 .and. parallel_io) then - do i = 1, num_dims - start_idx(i) = 0 - end do - return - end if - - ! Performing the computational domain decomposition. The procedure - ! is optimized by ensuring that each processor contains a close to - ! equivalent piece of the computational domain. Note that explicit - ! type-casting is omitted here for code legibility purposes. - - ! Generating 3D Cartesian Processor Topology ======================= - - if (n > 0) then - - if (p > 0) then - - if (cyl_coord .and. p > 0) then - ! Implement pencil processor blocking if using cylindrical coordinates so - ! that all cells in azimuthal direction are stored on a single processor. - ! This is necessary for efficient application of Fourier filter near axis. - - ! Initial values of the processor factorization optimization - num_procs_x = 1 - num_procs_y = num_procs - num_procs_z = 1 - ierr = -1 - - ! Computing minimization variable for these initial values - tmp_num_procs_x = num_procs_x - tmp_num_procs_y = num_procs_y - tmp_num_procs_z = num_procs_z - fct_min = 10d0*abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) - - ! Searching for optimal computational domain distribution - do i = 1, num_procs - - if (mod(num_procs, i) == 0 & - .and. & - (m + 1)/i >= num_stcls_min*weno_order) then - - tmp_num_procs_x = i - tmp_num_procs_y = num_procs/i - - if (fct_min >= abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) & - .and. & - (n + 1)/tmp_num_procs_y & - >= & - num_stcls_min*weno_order) then - - num_procs_x = i - num_procs_y = num_procs/i - fct_min = abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) - ierr = 0 - - end if - - end if - - end do - - else - - ! Initial values of the processor factorization optimization - num_procs_x = 1 - num_procs_y = 1 - num_procs_z = num_procs - ierr = -1 - - ! Computing minimization variable for these initial values - tmp_num_procs_x = num_procs_x - tmp_num_procs_y = num_procs_y - tmp_num_procs_z = num_procs_z - fct_min = 10d0*abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) & - + 10d0*abs((n + 1)/tmp_num_procs_y & - - (p + 1)/tmp_num_procs_z) - - ! Searching for optimal computational domain distribution - do i = 1, num_procs - - if (mod(num_procs, i) == 0 & - .and. & - (m + 1)/i >= num_stcls_min*weno_order) then - - do j = 1, (num_procs/i) - - if (mod(num_procs/i, j) == 0 & - .and. & - (n + 1)/j >= num_stcls_min*weno_order) then - - tmp_num_procs_x = i - tmp_num_procs_y = j - tmp_num_procs_z = num_procs/(i*j) - - if (fct_min >= abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) & - + abs((n + 1)/tmp_num_procs_y & - - (p + 1)/tmp_num_procs_z) & - .and. & - (p + 1)/tmp_num_procs_z & - >= & - num_stcls_min*weno_order) & - then - - num_procs_x = i - num_procs_y = j - num_procs_z = num_procs/(i*j) - fct_min = abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) & - + abs((n + 1)/tmp_num_procs_y & - - (p + 1)/tmp_num_procs_z) - ierr = 0 - - end if - - end if - - end do - - end if - - end do - - end if - - ! Checking whether the decomposition of the computational - ! domain was successful - if (proc_rank == 0 .and. ierr == -1) then - print '(A)', 'Unable to decompose computational '// & - 'domain for selected number of '// & - 'processors. Exiting ...' - call MPI_ABORT(MPI_COMM_WORLD, 1, ierr) - end if - - ! Creating a new communicator using Cartesian topology - call MPI_CART_CREATE(MPI_COMM_WORLD, 3, (/num_procs_x, & - num_procs_y, num_procs_z/), & - (/.true., .true., .true./), & - .false., MPI_COMM_CART, ierr) - - ! Finding corresponding Cartesian coordinates of the local - ! processor rank in newly declared cartesian communicator - call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 3, & - proc_coords, ierr) - - ! END: Generating 3D Cartesian Processor Topology ================== - - ! Sub-domain Global Parameters in z-direction ====================== - - ! Number of remaining cells after majority is distributed - rem_cells = mod(p + 1, num_procs_z) - - ! Preliminary uniform cell-width spacing - if (old_grid .neqv. .true.) then - dz = (z_domain%end - z_domain%beg)/real(p + 1, kind(0d0)) - end if - - ! Optimal number of cells per processor - p = (p + 1)/num_procs_z - 1 - - ! Distributing any remaining cells - do i = 1, rem_cells - if (proc_coords(3) == i - 1) then - p = p + 1 - exit - end if - end do - - ! Beginning and end sub-domain boundary locations - if (parallel_io .neqv. .true.) then - if (old_grid .neqv. .true.) then - if (proc_coords(3) < rem_cells) then - z_domain%beg = z_domain%beg + dz*real((p + 1)* & - proc_coords(3)) - z_domain%end = z_domain%end - dz*real((p + 1)* & - (num_procs_z - proc_coords(3) - 1) & - - (num_procs_z - rem_cells)) - else - z_domain%beg = z_domain%beg + dz*real((p + 1)* & - proc_coords(3) + rem_cells) - z_domain%end = z_domain%end - dz*real((p + 1)* & - (num_procs_z - proc_coords(3) - 1)) - end if - end if - else - if (proc_coords(3) < rem_cells) then - start_idx(3) = (p + 1)*proc_coords(3) - else - start_idx(3) = (p + 1)*proc_coords(3) + rem_cells - end if - end if - - ! ================================================================== - - ! Generating 2D Cartesian Processor Topology ======================= - - else - - ! Initial values of the processor factorization optimization - num_procs_x = 1 - num_procs_y = num_procs - ierr = -1 - - ! Computing minimization variable for these initial values - tmp_num_procs_x = num_procs_x - tmp_num_procs_y = num_procs_y - fct_min = 10d0*abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) - - ! Searching for optimal computational domain distribution - do i = 1, num_procs - - if (mod(num_procs, i) == 0 & - .and. & - (m + 1)/i >= num_stcls_min*weno_order) then - - tmp_num_procs_x = i - tmp_num_procs_y = num_procs/i - - if (fct_min >= abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) & - .and. & - (n + 1)/tmp_num_procs_y & - >= & - num_stcls_min*weno_order) then - - num_procs_x = i - num_procs_y = num_procs/i - fct_min = abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) - ierr = 0 - - end if - - end if - - end do - - ! Checking whether the decomposition of the computational - ! domain was successful - if (proc_rank == 0 .and. ierr == -1) then - print '(A)', 'Unable to decompose computational '// & - 'domain for selected number of '// & - 'processors. Exiting ...' - call MPI_ABORT(MPI_COMM_WORLD, 1, ierr) - end if - - ! Creating a new communicator using Cartesian topology - call MPI_CART_CREATE(MPI_COMM_WORLD, 2, (/num_procs_x, & - num_procs_y/), (/.true., & - .true./), .false., MPI_COMM_CART, & - ierr) - - ! Finding corresponding Cartesian coordinates of the local - ! processor rank in newly declared cartesian communicator - call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 2, & - proc_coords, ierr) - - end if - - ! END: Generating 2D Cartesian Processor Topology ================== - - ! Sub-domain Global Parameters in y-direction ====================== - - ! Number of remaining cells after majority has been distributed - rem_cells = mod(n + 1, num_procs_y) - - ! Preliminary uniform cell-width spacing - if (old_grid .neqv. .true.) then - dy = (y_domain%end - y_domain%beg)/real(n + 1, kind(0d0)) - end if - - ! Optimal number of cells per processor - n = (n + 1)/num_procs_y - 1 - - ! Distributing any remaining cells - do i = 1, rem_cells - if (proc_coords(2) == i - 1) then - n = n + 1 - exit - end if - end do - - ! Beginning and end sub-domain boundary locations - if (parallel_io .neqv. .true.) then - if (old_grid .neqv. .true.) then - if (proc_coords(2) < rem_cells) then - y_domain%beg = y_domain%beg + dy*real((n + 1)* & - proc_coords(2)) - y_domain%end = y_domain%end - dy*real((n + 1)* & - (num_procs_y - proc_coords(2) - 1) & - - (num_procs_y - rem_cells)) - else - y_domain%beg = y_domain%beg + dy*real((n + 1)* & - proc_coords(2) + rem_cells) - y_domain%end = y_domain%end - dy*real((n + 1)* & - (num_procs_y - proc_coords(2) - 1)) - end if - end if - else - if (proc_coords(2) < rem_cells) then - start_idx(2) = (n + 1)*proc_coords(2) - else - start_idx(2) = (n + 1)*proc_coords(2) + rem_cells - end if - end if - - ! ================================================================== - - ! Generating 1D Cartesian Processor Topology ======================= - - else - - ! Number of processors in the coordinate direction is equal to - ! the total number of processors available - num_procs_x = num_procs - - ! Creating a new communicator using Cartesian topology - call MPI_CART_CREATE(MPI_COMM_WORLD, 1, (/num_procs_x/), & - (/.true./), .false., MPI_COMM_CART, & - ierr) - - ! Finding the corresponding Cartesian coordinates of the local - ! processor rank in the newly declared cartesian communicator - call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 1, & - proc_coords, ierr) - - end if - - ! ================================================================== - - ! Sub-domain Global Parameters in x-direction ====================== - - ! Number of remaining cells after majority has been distributed - rem_cells = mod(m + 1, num_procs_x) - - ! Preliminary uniform cell-width spacing - if (old_grid .neqv. .true.) then - dx = (x_domain%end - x_domain%beg)/real(m + 1, kind(0d0)) - end if - - ! Optimal number of cells per processor - m = (m + 1)/num_procs_x - 1 - - ! Distributing any remaining cells - do i = 1, rem_cells - if (proc_coords(1) == i - 1) then - m = m + 1 - exit - end if - end do - - ! Beginning and end sub-domain boundary locations - if (parallel_io .neqv. .true.) then - if (old_grid .neqv. .true.) then - if (proc_coords(1) < rem_cells) then - x_domain%beg = x_domain%beg + dx*real((m + 1)* & - proc_coords(1)) - x_domain%end = x_domain%end - dx*real((m + 1)* & - (num_procs_x - proc_coords(1) - 1) & - - (num_procs_x - rem_cells)) - else - x_domain%beg = x_domain%beg + dx*real((m + 1)* & - proc_coords(1) + rem_cells) - x_domain%end = x_domain%end - dx*real((m + 1)* & - (num_procs_x - proc_coords(1) - 1)) - end if - end if - else - if (proc_coords(1) < rem_cells) then - start_idx(1) = (m + 1)*proc_coords(1) - else - start_idx(1) = (m + 1)*proc_coords(1) + rem_cells - end if - end if - - ! ================================================================== - -#endif - - end subroutine s_mpi_decompose_computational_domain - end module m_mpi_proxy diff --git a/src/pre_process/m_start_up.fpp b/src/pre_process/m_start_up.fpp index 3d7843259..6a0738438 100644 --- a/src/pre_process/m_start_up.fpp +++ b/src/pre_process/m_start_up.fpp @@ -50,6 +50,8 @@ module m_start_up use m_checker_common use m_checker + + use m_boundary_conditions_common ! ========================================================================== implicit none @@ -128,8 +130,8 @@ contains stretch_x, stretch_y, stretch_z, a_x, a_y, & a_z, x_a, y_a, z_a, x_b, y_b, z_b, & model_eqns, num_fluids, mpp_lim, & - weno_order, bc_x, bc_y, bc_z, num_patches, & - hypoelasticity, patch_icpp, fluid_pp, precision, parallel_io, & + weno_order, bc_x, bc_y, bc_z, patch_bc, num_patches, num_bc_patches, & + hypoelasticity, rdma_mpi, patch_icpp, fluid_pp, precision, parallel_io, & mixlayer_vel_profile, mixlayer_vel_coef, mixlayer_domain, & mixlayer_perturb, & pi_fac, perturb_flow, perturb_flow_fluid, perturb_flow_mag, & @@ -735,6 +737,8 @@ contains call s_mpi_abort('File '//trim(file_loc)//' is missing. Exiting... ') end if + call s_read_boundary_condition_files(restart_dir) + if (ib) then write (file_loc, '(A)') 'ib.dat' @@ -768,6 +772,8 @@ contains ! Computation of parameters, allocation procedures, and/or any other tasks ! needed to properly setup the modules call s_initialize_global_parameters_module() + call s_initialize_boundary_conditions_module() + !Quadrature weights and nodes for polydisperse simulations if (bubbles .and. nb > 1) then call s_simpson @@ -823,6 +829,8 @@ contains call s_read_grid_data_files() call s_check_grid_data_files() end if + + call s_generate_boundary_condition_patch_buffers() end if end subroutine s_read_grid @@ -922,6 +930,7 @@ contains call s_mpi_bcast_user_inputs() call s_initialize_parallel_io() call s_mpi_decompose_computational_domain() + end subroutine s_initialize_mpi_domain subroutine s_finalize_modules diff --git a/src/simulation/m_boundary_conditions.fpp b/src/simulation/m_boundary_conditions.fpp index 266be8ed0..85a85da31 100644 --- a/src/simulation/m_boundary_conditions.fpp +++ b/src/simulation/m_boundary_conditions.fpp @@ -2,6 +2,8 @@ !! @file m_boundary_conditions.fpp !! @brief Contains module m_boundary_conditions +#:include 'macros.fpp' + !> @brief The purpose of the module is to apply noncharacteristic and processor !! boundary condiitons module m_boundary_conditions @@ -11,1628 +13,49 @@ module m_boundary_conditions use m_global_parameters !< Definitions of the global parameters - use m_mpi_proxy + use m_mpi_proxy !< Definitions of the MPI proxy use m_constants + + use m_boundary_conditions_common + ! ========================================================================== implicit none private; - public :: s_populate_variables_buffers, & - s_populate_capillary_buffers + public :: s_populate_prim_buffers, & + s_populate_capillary_buffers, & + s_initialize_boundary_conditions_module contains - !> The purpose of this procedure is to populate the buffers - !! of the primitive variables, depending on the selected - !! boundary conditions. - subroutine s_populate_variables_buffers(q_prim_vf, pb, mv) - - type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf - real(kind(0d0)), dimension(startx:, starty:, startz:, 1:, 1:), intent(inout) :: pb, mv - - integer :: bc_loc, bc_dir - - ! Population of Buffers in x-direction ============================= - - select case (bc_x%beg) - case (-13:-3) ! Ghost-cell extrap. BC at beginning - call s_ghost_cell_extrapolation(q_prim_vf, pb, mv, 1, -1) - case (-2) ! Symmetry BC at beginning - call s_symmetry(q_prim_vf, pb, mv, 1, -1) - case (-1) ! Periodic BC at beginning - call s_periodic(q_prim_vf, pb, mv, 1, -1) - case (-15) ! Slip wall BC at beginning - call s_slip_wall(q_prim_vf, pb, mv, 1, -1) - case (-16) ! No-slip wall BC at beginning - call s_no_slip_wall(q_prim_vf, pb, mv, 1, -1) - case default ! Processor BC at beginning - call s_mpi_sendrecv_variables_buffers( & - q_prim_vf, pb, mv, 1, -1) - end select - - select case (bc_x%end) - case (-13:-3) ! Ghost-cell extrap. BC at end - call s_ghost_cell_extrapolation(q_prim_vf, pb, mv, 1, 1) - case (-2) ! Symmetry BC at end - call s_symmetry(q_prim_vf, pb, mv, 1, 1) - case (-1) ! Periodic BC at end - call s_periodic(q_prim_vf, pb, mv, 1, 1) - case (-15) ! Slip wall BC at end - call s_slip_wall(q_prim_vf, pb, mv, 1, 1) - case (-16) ! No-slip wall bc at end - call s_no_slip_wall(q_prim_vf, pb, mv, 1, 1) - case default ! Processor BC at end - call s_mpi_sendrecv_variables_buffers( & - q_prim_vf, pb, mv, 1, 1) - end select - - if (qbmm .and. .not. polytropic) then - select case (bc_x%beg) - case (-13:-3) ! Ghost-cell extrap. BC at beginning - call s_qbmm_extrapolation(pb, mv, 1, -1) - case (-15) ! Slip wall BC at beginning - call s_qbmm_extrapolation(pb, mv, 1, -1) - case (-16) ! No-slip wall BC at beginning - call s_qbmm_extrapolation(pb, mv, 1, -1) - end select - - select case (bc_x%end) - case (-13:-3) ! Ghost-cell extrap. BC at end - call s_qbmm_extrapolation(pb, mv, 1, 1) - case (-15) ! Slip wall BC at end - call s_qbmm_extrapolation(pb, mv, 1, 1) - case (-16) ! No-slip wall bc at end - call s_qbmm_extrapolation(pb, mv, 1, 1) - end select - end if - - ! END: Population of Buffers in x-direction ======================== - - ! Population of Buffers in y-direction ============================= - - if (n == 0) return - - select case (bc_y%beg) - case (-13:-3) ! Ghost-cell extrap. BC at beginning - call s_ghost_cell_extrapolation(q_prim_vf, pb, mv, 2, -1) - case (-14) ! Axis BC at beginning - call s_axis(q_prim_vf, pb, mv, 2, -1) - case (-2) ! Symmetry BC at beginning - call s_symmetry(q_prim_vf, pb, mv, 2, -1) - case (-1) ! Periodic BC at beginning - call s_periodic(q_prim_vf, pb, mv, 2, -1) - case (-15) ! Slip wall BC at beginning - call s_slip_wall(q_prim_vf, pb, mv, 2, -1) - case (-16) ! No-slip wall BC at beginning - call s_no_slip_wall(q_prim_vf, pb, mv, 2, -1) - case default ! Processor BC at beginning - call s_mpi_sendrecv_variables_buffers( & - q_prim_vf, pb, mv, 2, -1) - end select - - select case (bc_y%end) - case (-13:-3) ! Ghost-cell extrap. BC at end - call s_ghost_cell_extrapolation(q_prim_vf, pb, mv, 2, 1) - case (-2) ! Symmetry BC at end - call s_symmetry(q_prim_vf, pb, mv, 2, 1) - case (-1) ! Periodic BC at end - call s_periodic(q_prim_vf, pb, mv, 2, 1) - case (-15) ! Slip wall BC at end - call s_slip_wall(q_prim_vf, pb, mv, 2, 1) - case (-16) ! No-slip wall BC at end - call s_no_slip_wall(q_prim_vf, pb, mv, 2, 1) - case default ! Processor BC at end - call s_mpi_sendrecv_variables_buffers( & - q_prim_vf, pb, mv, 2, 1) - end select - - if (qbmm .and. .not. polytropic) then - - select case (bc_y%beg) - case (-13:-3) ! Ghost-cell extrap. BC at beginning - call s_qbmm_extrapolation(pb, mv, 2, -1) - case (-15) ! Slip wall BC at beginning - call s_qbmm_extrapolation(pb, mv, 2, -1) - case (-16) ! No-slip wall BC at beginning - call s_qbmm_extrapolation(pb, mv, 2, -1) - end select - - select case (bc_y%end) - case (-13:-3) ! Ghost-cell extrap. BC at end - call s_qbmm_extrapolation(pb, mv, 2, 1) - case (-15) ! Slip wall BC at end - call s_qbmm_extrapolation(pb, mv, 2, 1) - case (-16) ! No-slip wall BC at end - call s_qbmm_extrapolation(pb, mv, 2, 1) - end select - - end if - - ! END: Population of Buffers in y-direction ======================== - - ! Population of Buffers in z-direction ============================= - - if (p == 0) return - - select case (bc_z%beg) - case (-13:-3) ! Ghost-cell extrap. BC at beginning - call s_ghost_cell_extrapolation(q_prim_vf, pb, mv, 3, -1) - case (-2) ! Symmetry BC at beginning - call s_symmetry(q_prim_vf, pb, mv, 3, -1) - case (-1) ! Periodic BC at beginning - call s_periodic(q_prim_vf, pb, mv, 3, -1) - case (-15) ! Slip wall BC at beginning - call s_slip_wall(q_prim_vf, pb, mv, 3, -1) - case (-16) ! No-slip wall BC at beginning - call s_no_slip_wall(q_prim_vf, pb, mv, 3, -1) - case default ! Processor BC at beginning - call s_mpi_sendrecv_variables_buffers( & - q_prim_vf, pb, mv, 3, -1) - end select - - select case (bc_z%end) - case (-13:-3) ! Ghost-cell extrap. BC at end - call s_ghost_cell_extrapolation(q_prim_vf, pb, mv, 3, 1) - case (-2) ! Symmetry BC at end - call s_symmetry(q_prim_vf, pb, mv, 3, 1) - case (-1) ! Periodic BC at end - call s_periodic(q_prim_vf, pb, mv, 3, 1) - case (-15) ! Slip wall BC at end - call s_slip_wall(q_prim_vf, pb, mv, 3, 1) - case (-16) ! No-slip wall BC at end - call s_no_slip_wall(q_prim_vf, pb, mv, 3, 1) - case default ! Processor BC at end - call s_mpi_sendrecv_variables_buffers( & - q_prim_vf, pb, mv, 3, 1) - end select - - if (qbmm .and. .not. polytropic) then - - select case (bc_z%beg) - case (-13:-3) ! Ghost-cell extrap. BC at beginning - call s_qbmm_extrapolation(pb, mv, 3, -1) - case (-15) ! Slip wall BC at beginning - call s_qbmm_extrapolation(pb, mv, 3, -1) - case (-16) ! No-slip wall BC at beginning - call s_qbmm_extrapolation(pb, mv, 3, -1) - end select - - select case (bc_z%end) - case (-13:-3) ! Ghost-cell extrap. BC at end - call s_qbmm_extrapolation(pb, mv, 3, 1) - case (-15) ! Slip wall BC at end - call s_qbmm_extrapolation(pb, mv, 3, 1) - case (-16) ! No-slip wall BC at end - call s_qbmm_extrapolation(pb, mv, 3, 1) - end select - - end if - - ! END: Population of Buffers in z-direction ======================== - - end subroutine s_populate_variables_buffers - - subroutine s_ghost_cell_extrapolation(q_prim_vf, pb, mv, bc_dir, bc_loc) - - type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf - real(kind(0d0)), dimension(startx:, starty:, startz:, 1:, 1:), intent(inout) :: pb, mv - integer, intent(in) :: bc_dir, bc_loc - integer :: j, k, l, q, i - - !< x-direction ========================================================= - if (bc_dir == 1) then !< x-direction - - if (bc_loc == -1) then !bc_x%beg - - !$acc parallel loop collapse(4) gang vector default(present) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 1, buff_size - q_prim_vf(i)%sf(-j, k, l) = & - q_prim_vf(i)%sf(0, k, l) - end do - end do - end do - end do - - else !< bc_x%end - - !$acc parallel loop collapse(4) gang vector default(present) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 1, buff_size - q_prim_vf(i)%sf(m + j, k, l) = & - q_prim_vf(i)%sf(m, k, l) - end do - end do - end do - end do - - end if - - !< y-direction ========================================================= - elseif (bc_dir == 2) then !< y-direction - - if (bc_loc == -1) then !< bc_y%beg - - !$acc parallel loop collapse(4) gang vector default(present) - do i = 1, sys_size - do k = 0, p - do j = 1, buff_size - do l = -buff_size, m + buff_size - q_prim_vf(i)%sf(l, -j, k) = & - q_prim_vf(i)%sf(l, 0, k) - end do - end do - end do - end do - - else !< bc_y%end - - !$acc parallel loop collapse(4) gang vector default(present) - do i = 1, sys_size - do k = 0, p - do j = 1, buff_size - do l = -buff_size, m + buff_size - q_prim_vf(i)%sf(l, n + j, k) = & - q_prim_vf(i)%sf(l, n, k) - end do - end do - end do - end do - - end if - - !< z-direction ========================================================= - elseif (bc_dir == 3) then !< z-direction - - if (bc_loc == -1) then !< bc_z%beg - - !$acc parallel loop collapse(4) gang vector default(present) - do i = 1, sys_size - do j = 1, buff_size - do l = -buff_size, n + buff_size - do k = -buff_size, m + buff_size - q_prim_vf(i)%sf(k, l, -j) = & - q_prim_vf(i)%sf(k, l, 0) - end do - end do - end do - end do - - else !< bc_z%end - - !$acc parallel loop collapse(4) gang vector default(present) - do i = 1, sys_size - do j = 1, buff_size - do l = -buff_size, n + buff_size - do k = -buff_size, m + buff_size - q_prim_vf(i)%sf(k, l, p + j) = & - q_prim_vf(i)%sf(k, l, p) - end do - end do - end do - end do - - end if - - end if - !< ===================================================================== - - end subroutine s_ghost_cell_extrapolation - - subroutine s_symmetry(q_prim_vf, pb, mv, bc_dir, bc_loc) - - type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf - real(kind(0d0)), dimension(startx:, starty:, startz:, 1:, 1:), intent(inout) :: pb, mv - integer, intent(in) :: bc_dir, bc_loc - - integer :: j, k, l, q, i - - !< x-direction ========================================================= - if (bc_dir == 1) then - - if (bc_loc == -1) then !< bc_x%beg - - !$acc parallel loop collapse(3) gang vector default(present) - do l = 0, p - do k = 0, n - do j = 1, buff_size - !$acc loop seq - do i = 1, contxe - q_prim_vf(i)%sf(-j, k, l) = & - q_prim_vf(i)%sf(j - 1, k, l) - end do - - q_prim_vf(momxb)%sf(-j, k, l) = & - -q_prim_vf(momxb)%sf(j - 1, k, l) - - !$acc loop seq - do i = momxb + 1, sys_size - q_prim_vf(i)%sf(-j, k, l) = & - q_prim_vf(i)%sf(j - 1, k, l) - end do - end do - end do - end do - - if (qbmm .and. .not. polytropic) then - !$acc parallel loop collapse(5) gang vector default(present) - do i = 1, nb - do q = 1, nnode - do l = 0, p - do k = 0, n - do j = 1, buff_size - pb(-j, k, l, q, i) = & - pb(j - 1, k, l, q, i) - mv(-j, k, l, q, i) = & - mv(j - 1, k, l, q, i) - end do - end do - end do - end do - end do - end if - - else !< bc_x%end - - !$acc parallel loop collapse(3) default(present) - do l = 0, p - do k = 0, n - do j = 1, buff_size - - !$acc loop seq - do i = 1, contxe - q_prim_vf(i)%sf(m + j, k, l) = & - q_prim_vf(i)%sf(m - (j - 1), k, l) - end do - - q_prim_vf(momxb)%sf(m + j, k, l) = & - -q_prim_vf(momxb)%sf(m - (j - 1), k, l) - - !$acc loop seq - do i = momxb + 1, sys_size - q_prim_vf(i)%sf(m + j, k, l) = & - q_prim_vf(i)%sf(m - (j - 1), k, l) - end do - - end do - end do - end do - - if (qbmm .and. .not. polytropic) then - !$acc parallel loop collapse(5) gang vector default(present) - do i = 1, nb - do q = 1, nnode - do l = 0, p - do k = 0, n - do j = 1, buff_size - pb(m + j, k, l, q, i) = & - pb(m - (j - 1), k, l, q, i) - mv(m + j, k, l, q, i) = & - mv(m - (j - 1), k, l, q, i) - end do - end do - end do - end do - end do - end if - - end if - - !< y-direction ========================================================= - elseif (bc_dir == 2) then - - if (bc_loc == -1) then !< bc_y%beg - - !$acc parallel loop collapse(3) gang vector default(present) - do k = 0, p - do j = 1, buff_size - do l = -buff_size, m + buff_size - !$acc loop seq - do i = 1, momxb - q_prim_vf(i)%sf(l, -j, k) = & - q_prim_vf(i)%sf(l, j - 1, k) - end do - - q_prim_vf(momxb + 1)%sf(l, -j, k) = & - -q_prim_vf(momxb + 1)%sf(l, j - 1, k) - - !$acc loop seq - do i = momxb + 2, sys_size - q_prim_vf(i)%sf(l, -j, k) = & - q_prim_vf(i)%sf(l, j - 1, k) - end do - end do - end do - end do - - if (qbmm .and. .not. polytropic) then - !$acc parallel loop collapse(5) gang vector default(present) - do i = 1, nb - do q = 1, nnode - do k = 0, p - do j = 1, buff_size - do l = -buff_size, m + buff_size - pb(l, -j, k, q, i) = & - pb(l, j - 1, k, q, i) - mv(l, -j, k, q, i) = & - mv(l, j - 1, k, q, i) - end do - end do - end do - end do - end do - end if - - else !< bc_y%end - - !$acc parallel loop collapse(3) gang vector default(present) - do k = 0, p - do j = 1, buff_size - do l = -buff_size, m + buff_size - !$acc loop seq - do i = 1, momxb - q_prim_vf(i)%sf(l, n + j, k) = & - q_prim_vf(i)%sf(l, n - (j - 1), k) - end do - - q_prim_vf(momxb + 1)%sf(l, n + j, k) = & - -q_prim_vf(momxb + 1)%sf(l, n - (j - 1), k) - - !$acc loop seq - do i = momxb + 2, sys_size - q_prim_vf(i)%sf(l, n + j, k) = & - q_prim_vf(i)%sf(l, n - (j - 1), k) - end do - end do - end do - end do - - if (qbmm .and. .not. polytropic) then - !$acc parallel loop collapse(5) gang vector default(present) - do i = 1, nb - do q = 1, nnode - do k = 0, p - do j = 1, buff_size - do l = -buff_size, m + buff_size - pb(l, n + j, k, q, i) = & - pb(l, n - (j - 1), k, q, i) - mv(l, n + j, k, q, i) = & - mv(l, n - (j - 1), k, q, i) - end do - end do - end do - end do - end do - end if - - end if - - !< z-direction ========================================================= - elseif (bc_dir == 3) then - - if (bc_loc == -1) then !< bc_z%beg - - !$acc parallel loop collapse(3) gang vector default(present) - do j = 1, buff_size - do l = -buff_size, n + buff_size - do k = -buff_size, m + buff_size - !$acc loop seq - do i = 1, momxb + 1 - q_prim_vf(i)%sf(k, l, -j) = & - q_prim_vf(i)%sf(k, l, j - 1) - end do - - q_prim_vf(momxe)%sf(k, l, -j) = & - -q_prim_vf(momxe)%sf(k, l, j - 1) - - !$acc loop seq - do i = E_idx, sys_size - q_prim_vf(i)%sf(k, l, -j) = & - q_prim_vf(i)%sf(k, l, j - 1) - end do - end do - end do - end do - - if (qbmm .and. .not. polytropic) then - !$acc parallel loop collapse(5) gang vector default(present) - do i = 1, nb - do q = 1, nnode - do j = 1, buff_size - do l = -buff_size, n + buff_size - do k = -buff_size, m + buff_size - pb(k, l, -j, q, i) = & - pb(k, l, j - 1, q, i) - mv(k, l, -j, q, i) = & - mv(k, l, j - 1, q, i) - end do - end do - end do - end do - end do - end if - - else !< bc_z%end - - !$acc parallel loop collapse(3) gang vector default(present) - do j = 1, buff_size - do l = -buff_size, n + buff_size - do k = -buff_size, m + buff_size - !$acc loop seq - do i = 1, momxb + 1 - q_prim_vf(i)%sf(k, l, p + j) = & - q_prim_vf(i)%sf(k, l, p - (j - 1)) - end do - - q_prim_vf(momxe)%sf(k, l, p + j) = & - -q_prim_vf(momxe)%sf(k, l, p - (j - 1)) - - !$acc loop seq - do i = E_idx, sys_size - q_prim_vf(i)%sf(k, l, p + j) = & - q_prim_vf(i)%sf(k, l, p - (j - 1)) - end do - end do - end do - end do - - if (qbmm .and. .not. polytropic) then - !$acc parallel loop collapse(5) gang vector default(present) - do i = 1, nb - do q = 1, nnode - do j = 1, buff_size - do l = -buff_size, n + buff_size - do k = -buff_size, m + buff_size - pb(k, l, p + j, q, i) = & - pb(k, l, p - (j - 1), q, i) - mv(k, l, p + j, q, i) = & - mv(k, l, p - (j - 1), q, i) - end do - end do - end do - end do - end do - end if - - end if - - end if - !< ===================================================================== - - end subroutine s_symmetry - - subroutine s_periodic(q_prim_vf, pb, mv, bc_dir, bc_loc) - - type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf - real(kind(0d0)), dimension(startx:, starty:, startz:, 1:, 1:), intent(inout) :: pb, mv - integer, intent(in) :: bc_dir, bc_loc - - integer :: j, k, l, q, i - - !< x-direction ========================================================= - if (bc_dir == 1) then - - if (bc_loc == -1) then !< bc_x%beg - - !$acc parallel loop collapse(4) gang vector default(present) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 1, buff_size - q_prim_vf(i)%sf(-j, k, l) = & - q_prim_vf(i)%sf(m - (j - 1), k, l) - end do - end do - end do - end do - - if (qbmm .and. .not. polytropic) then - !$acc parallel loop collapse(5) gang vector default(present) - do i = 1, nb - do q = 1, nnode - do l = 0, p - do k = 0, n - do j = 1, buff_size - pb(-j, k, l, q, i) = & - pb(m - (j - 1), k, l, q, i) - mv(-j, k, l, q, i) = & - mv(m - (j - 1), k, l, q, i) - end do - end do - end do - end do - end do - end if - - else !< bc_x%end - - !$acc parallel loop collapse(4) gang vector default(present) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 1, buff_size - q_prim_vf(i)%sf(m + j, k, l) = & - q_prim_vf(i)%sf(j - 1, k, l) - end do - end do - end do - end do - - if (qbmm .and. .not. polytropic) then - !$acc parallel loop collapse(5) gang vector default(present) - do i = 1, nb - do q = 1, nnode - do l = 0, p - do k = 0, n - do j = 1, buff_size - pb(m + j, k, l, q, i) = & - pb(j - 1, k, l, q, i) - mv(m + j, k, l, q, i) = & - mv(j - 1, k, l, q, i) - end do - end do - end do - end do - end do - end if - - end if - - !< y-direction ========================================================= - elseif (bc_dir == 2) then - - if (bc_loc == -1) then !< bc_y%beg - - !$acc parallel loop collapse(4) gang vector default(present) - do i = 1, sys_size - do k = 0, p - do j = 1, buff_size - do l = -buff_size, m + buff_size - q_prim_vf(i)%sf(l, -j, k) = & - q_prim_vf(i)%sf(l, n - (j - 1), k) - end do - end do - end do - end do - - if (qbmm .and. .not. polytropic) then - !$acc parallel loop collapse(4) gang vector default(present) - do i = 1, nb - do q = 1, nnode - do k = 0, p - do j = 1, buff_size - do l = -buff_size, m + buff_size - pb(l, -j, k, q, i) = & - pb(l, n - (j - 1), k, q, i) - mv(l, -j, k, q, i) = & - mv(l, n - (j - 1), k, q, i) - end do - end do - end do - end do - end do - end if - - else !< bc_y%end - - !$acc parallel loop collapse(4) gang vector default(present) - do i = 1, sys_size - do k = 0, p - do j = 1, buff_size - do l = -buff_size, m + buff_size - q_prim_vf(i)%sf(l, n + j, k) = & - q_prim_vf(i)%sf(l, j - 1, k) - end do - end do - end do - end do - - if (qbmm .and. .not. polytropic) then - !$acc parallel loop collapse(5) gang vector default(present) - do i = 1, nb - do q = 1, nnode - do k = 0, p - do j = 1, buff_size - do l = -buff_size, m + buff_size - pb(l, n + j, k, q, i) = & - pb(l, (j - 1), k, q, i) - mv(l, n + j, k, q, i) = & - mv(l, (j - 1), k, q, i) - end do - end do - end do - end do - end do - end if - - end if - - !< z-direction ========================================================= - elseif (bc_dir == 3) then - - if (bc_loc == -1) then !< bc_z%beg - - !$acc parallel loop collapse(4) gang vector default(present) - do i = 1, sys_size - do j = 1, buff_size - do l = -buff_size, n + buff_size - do k = -buff_size, m + buff_size - q_prim_vf(i)%sf(k, l, -j) = & - q_prim_vf(i)%sf(k, l, p - (j - 1)) - end do - end do - end do - end do - - if (qbmm .and. .not. polytropic) then - !$acc parallel loop collapse(5) gang vector default(present) - do i = 1, nb - do q = 1, nnode - do j = 1, buff_size - do l = -buff_size, n + buff_size - do k = -buff_size, m + buff_size - pb(k, l, -j, q, i) = & - pb(k, l, p - (j - 1), q, i) - mv(k, l, -j, q, i) = & - mv(k, l, p - (j - 1), q, i) - end do - end do - end do - end do - end do - end if - - else !< bc_z%end - - !$acc parallel loop collapse(4) gang vector default(present) - do i = 1, sys_size - do j = 1, buff_size - do l = -buff_size, n + buff_size - do k = -buff_size, m + buff_size - q_prim_vf(i)%sf(k, l, p + j) = & - q_prim_vf(i)%sf(k, l, j - 1) - end do - end do - end do - end do - - if (qbmm .and. .not. polytropic) then - !$acc parallel loop collapse(5) gang vector default(present) - do i = 1, nb - do q = 1, nnode - do j = 1, buff_size - do l = -buff_size, n + buff_size - do k = -buff_size, m + buff_size - pb(k, l, p + j, q, i) = & - pb(k, l, j - 1, q, i) - mv(k, l, p + j, q, i) = & - mv(k, l, j - 1, q, i) - end do - end do - end do - end do - end do - end if - - end if - - end if - !< ===================================================================== - - end subroutine s_periodic - - subroutine s_axis(q_prim_vf, pb, mv, bc_dir, bc_loc) - - type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf - real(kind(0d0)), dimension(startx:, starty:, startz:, 1:, 1:), intent(inout) :: pb, mv - integer, intent(in) :: bc_dir, bc_loc - - integer :: j, k, l, q, i - - !$acc parallel loop collapse(3) gang vector default(present) - do k = 0, p - do j = 1, buff_size - do l = -buff_size, m + buff_size - if (z_cc(k) < pi) then - !$acc loop seq - do i = 1, momxb - q_prim_vf(i)%sf(l, -j, k) = & - q_prim_vf(i)%sf(l, j - 1, k + ((p + 1)/2)) - end do + subroutine s_populate_capillary_buffers(c_divs) - q_prim_vf(momxb + 1)%sf(l, -j, k) = & - -q_prim_vf(momxb + 1)%sf(l, j - 1, k + ((p + 1)/2)) + type(scalar_field), dimension(num_dims + 1), intent(inout) :: c_divs - q_prim_vf(momxe)%sf(l, -j, k) = & - -q_prim_vf(momxe)%sf(l, j - 1, k + ((p + 1)/2)) + @:BOUNDARY_CONDITION_INTEGER_DECLARATIONS() - !$acc loop seq - do i = E_idx, sys_size - q_prim_vf(i)%sf(l, -j, k) = & - q_prim_vf(i)%sf(l, j - 1, k + ((p + 1)/2)) - end do + #:for dir, loc in itertools.product([1, 2, 3], [-1, 1]) + #:block ITERATE_OVER_BUFFER_REGION_SIDED(dir=dir, loc=loc, outer_loops=[("i", 1, "num_dims + 1")]) + select case (bc_id_sfs(${dir}$, ${loc}$)%sf(exlhs, eylhs, ezlhs)%type) + case (-13:-3); + c_divs(i)%sf(x, y, z) = c_divs(i)%sf(ex, ey, ez) + case (-2); !< slip wall or reflective + if (i == 1) then + c_divs(i)%sf(x, y, z) = -c_divs(i)%sf(sx, sy, sz) else - !$acc loop seq - do i = 1, momxb - q_prim_vf(i)%sf(l, -j, k) = & - q_prim_vf(i)%sf(l, j - 1, k - ((p + 1)/2)) - end do - - q_prim_vf(momxb + 1)%sf(l, -j, k) = & - -q_prim_vf(momxb + 1)%sf(l, j - 1, k - ((p + 1)/2)) - - q_prim_vf(momxe)%sf(l, -j, k) = & - -q_prim_vf(momxe)%sf(l, j - 1, k - ((p + 1)/2)) - - !$acc loop seq - do i = E_idx, sys_size - q_prim_vf(i)%sf(l, -j, k) = & - q_prim_vf(i)%sf(l, j - 1, k - ((p + 1)/2)) - end do + c_divs(i)%sf(x, y, z) = +c_divs(i)%sf(sx, sy, sz) end if - end do - end do - end do - - if (qbmm .and. .not. polytropic) then - !$acc parallel loop collapse(5) gang vector default(present) - do i = 1, nb - do q = 1, nnode - do k = 0, p - do j = 1, buff_size - do l = -buff_size, m + buff_size - pb(l, -j, k, q, i) = & - pb(l, j - 1, k - ((p + 1)/2), q, i) - mv(l, -j, k, q, i) = & - mv(l, j - 1, k - ((p + 1)/2), q, i) - end do - end do - end do - end do - end do - end if - - end subroutine s_axis - - subroutine s_slip_wall(q_prim_vf, pb, mv, bc_dir, bc_loc) - - type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf - real(kind(0d0)), dimension(startx:, starty:, startz:, 1:, 1:), intent(inout) :: pb, mv - integer, intent(in) :: bc_dir, bc_loc - - integer :: j, k, l, q, i - - !< x-direction ========================================================= - if (bc_dir == 1) then - - if (bc_loc == -1) then !< bc_x%beg - - !$acc parallel loop collapse(4) gang vector default(present) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 1, buff_size - if (i == momxb) then - q_prim_vf(i)%sf(-j, k, l) = & - -q_prim_vf(i)%sf(j - 1, k, l) + 2d0*bc_x%vb1 - else - q_prim_vf(i)%sf(-j, k, l) = & - q_prim_vf(i)%sf(0, k, l) - end if - end do - end do - end do - end do - - else !< bc_x%end - - !$acc parallel loop collapse(4) gang vector default(present) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 1, buff_size - if (i == momxb) then - q_prim_vf(i)%sf(m + j, k, l) = & - -q_prim_vf(i)%sf(m - (j - 1), k, l) + 2d0*bc_x%ve1 - else - q_prim_vf(i)%sf(m + j, k, l) = & - q_prim_vf(i)%sf(m, k, l) - end if - end do - end do - end do - end do - - end if - - !< y-direction ========================================================= - elseif (bc_dir == 2) then - - if (bc_loc == -1) then !< bc_y%beg - - !$acc parallel loop collapse(4) gang vector default(present) - do i = 1, sys_size - do k = 0, p - do j = 1, buff_size - do l = -buff_size, m + buff_size - if (i == momxb + 1) then - q_prim_vf(i)%sf(l, -j, k) = & - -q_prim_vf(i)%sf(l, j - 1, k) + 2d0*bc_y%vb2 - else - q_prim_vf(i)%sf(l, -j, k) = & - q_prim_vf(i)%sf(l, 0, k) - end if - end do - end do - end do - end do - - else !< bc_y%end - - !$acc parallel loop collapse(4) gang vector default(present) - do i = 1, sys_size - do k = 0, p - do j = 1, buff_size - do l = -buff_size, m + buff_size - if (i == momxb + 1) then - q_prim_vf(i)%sf(l, n + j, k) = & - -q_prim_vf(i)%sf(l, n - (j - 1), k) + 2d0*bc_y%ve2 - else - q_prim_vf(i)%sf(l, n + j, k) = & - q_prim_vf(i)%sf(l, n, k) - end if - end do - end do - end do - end do + case (-1); + c_divs(i)%sf(x, y, z) = c_divs(i)%sf(px, py, pz) + end select + #:endblock + if (${dir}$ <= num_dims) then + call s_mpi_sendrecv_capilary_variables_buffers(c_divs, bc_id_sfs, ${dir}$, ${loc}$) end if - - !< z-direction ========================================================= - elseif (bc_dir == 3) then - - if (bc_loc == -1) then !< bc_z%beg - - !$acc parallel loop collapse(4) gang vector default(present) - do i = 1, sys_size - do j = 1, buff_size - do l = -buff_size, n + buff_size - do k = -buff_size, m + buff_size - if (i == momxe) then - q_prim_vf(i)%sf(k, l, -j) = & - -q_prim_vf(i)%sf(k, l, j - 1) + 2d0*bc_z%vb3 - else - q_prim_vf(i)%sf(k, l, -j) = & - q_prim_vf(i)%sf(k, l, 0) - end if - end do - end do - end do - end do - - else !< bc_z%end - - !$acc parallel loop collapse(4) gang vector default(present) - do i = 1, sys_size - do j = 1, buff_size - do l = -buff_size, n + buff_size - do k = -buff_size, m + buff_size - if (i == momxe) then - q_prim_vf(i)%sf(k, l, p + j) = & - -q_prim_vf(i)%sf(k, l, p - (j - 1)) + 2d0*bc_z%ve3 - else - q_prim_vf(i)%sf(k, l, p + j) = & - q_prim_vf(i)%sf(k, l, p) - end if - end do - end do - end do - end do - - end if - - end if - !< ===================================================================== - - end subroutine s_slip_wall - - subroutine s_no_slip_wall(q_prim_vf, pb, mv, bc_dir, bc_loc) - - type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf - real(kind(0d0)), dimension(startx:, starty:, startz:, 1:, 1:), intent(inout) :: pb, mv - integer, intent(in) :: bc_dir, bc_loc - - integer :: j, k, l, q, i - - !< x-direction ========================================================= - if (bc_dir == 1) then - - if (bc_loc == -1) then !< bc_x%beg - - !$acc parallel loop collapse(4) gang vector default(present) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 1, buff_size - if (i == momxb) then - q_prim_vf(i)%sf(-j, k, l) = & - -q_prim_vf(i)%sf(j - 1, k, l) + 2d0*bc_x%vb1 - elseif (i == momxb + 1 .and. num_dims > 1) then - q_prim_vf(i)%sf(-j, k, l) = & - -q_prim_vf(i)%sf(j - 1, k, l) + 2d0*bc_x%vb2 - elseif (i == momxb + 2 .and. num_dims > 2) then - q_prim_vf(i)%sf(-j, k, l) = & - -q_prim_vf(i)%sf(j - 1, k, l) + 2d0*bc_x%vb3 - else - q_prim_vf(i)%sf(-j, k, l) = & - q_prim_vf(i)%sf(0, k, l) - end if - end do - end do - end do - end do - - else !< bc_x%end - - !$acc parallel loop collapse(4) gang vector default(present) - do i = 1, sys_size - do l = 0, p - do k = 0, n - do j = 1, buff_size - if (i == momxb) then - q_prim_vf(i)%sf(m + j, k, l) = & - -q_prim_vf(i)%sf(m - (j - 1), k, l) + 2d0*bc_x%ve1 - elseif (i == momxb + 1 .and. num_dims > 1) then - q_prim_vf(i)%sf(m + j, k, l) = & - -q_prim_vf(i)%sf(m - (j - 1), k, l) + 2d0*bc_x%ve2 - elseif (i == momxb + 2 .and. num_dims > 2) then - q_prim_vf(i)%sf(m + j, k, l) = & - -q_prim_vf(i)%sf(m - (j - 1), k, l) + 2d0*bc_x%ve3 - else - q_prim_vf(i)%sf(m + j, k, l) = & - q_prim_vf(i)%sf(m, k, l) - end if - end do - end do - end do - end do - - end if - - !< y-direction ========================================================= - elseif (bc_dir == 2) then - - if (bc_loc == -1) then !< bc_y%beg - - !$acc parallel loop collapse(4) gang vector default(present) - do i = 1, sys_size - do k = 0, p - do j = 1, buff_size - do l = -buff_size, m + buff_size - if (i == momxb) then - q_prim_vf(i)%sf(l, -j, k) = & - -q_prim_vf(i)%sf(l, j - 1, k) + 2d0*bc_y%vb1 - elseif (i == momxb + 1 .and. num_dims > 1) then - q_prim_vf(i)%sf(l, -j, k) = & - -q_prim_vf(i)%sf(l, j - 1, k) + 2d0*bc_y%vb2 - elseif (i == momxb + 2 .and. num_dims > 2) then - q_prim_vf(i)%sf(l, -j, k) = & - -q_prim_vf(i)%sf(l, j - 1, k) + 2d0*bc_y%vb3 - else - q_prim_vf(i)%sf(l, -j, k) = & - q_prim_vf(i)%sf(l, 0, k) - end if - end do - end do - end do - end do - - else !< bc_y%end - - !$acc parallel loop collapse(4) gang vector default(present) - do i = 1, sys_size - do k = 0, p - do j = 1, buff_size - do l = -buff_size, m + buff_size - if (i == momxb) then - q_prim_vf(i)%sf(l, n + j, k) = & - -q_prim_vf(i)%sf(l, n - (j - 1), k) + 2d0*bc_y%ve1 - elseif (i == momxb + 1 .and. num_dims > 1) then - q_prim_vf(i)%sf(l, n + j, k) = & - -q_prim_vf(i)%sf(l, n - (j - 1), k) + 2d0*bc_y%ve2 - elseif (i == momxb + 2 .and. num_dims > 2) then - q_prim_vf(i)%sf(l, n + j, k) = & - -q_prim_vf(i)%sf(l, n - (j - 1), k) + 2d0*bc_y%ve3 - else - q_prim_vf(i)%sf(l, n + j, k) = & - q_prim_vf(i)%sf(l, n, k) - end if - end do - end do - end do - end do - - end if - - !< z-direction ========================================================= - elseif (bc_dir == 3) then - - if (bc_loc == -1) then !< bc_z%beg - - !$acc parallel loop collapse(4) gang vector default(present) - do i = 1, sys_size - do j = 1, buff_size - do l = -buff_size, n + buff_size - do k = -buff_size, m + buff_size - if (i == momxb) then - q_prim_vf(i)%sf(k, l, -j) = & - -q_prim_vf(i)%sf(k, l, j - 1) + 2d0*bc_z%vb1 - elseif (i == momxb + 1 .and. num_dims > 1) then - q_prim_vf(i)%sf(k, l, -j) = & - -q_prim_vf(i)%sf(k, l, j - 1) + 2d0*bc_z%vb2 - elseif (i == momxb + 2 .and. num_dims > 2) then - q_prim_vf(i)%sf(k, l, -j) = & - -q_prim_vf(i)%sf(k, l, j - 1) + 2d0*bc_z%vb3 - else - q_prim_vf(i)%sf(k, l, -j) = & - q_prim_vf(i)%sf(k, l, 0) - end if - end do - end do - end do - end do - - else !< bc_z%end - - !$acc parallel loop collapse(4) gang vector default(present) - do i = 1, sys_size - do j = 1, buff_size - do l = -buff_size, n + buff_size - do k = -buff_size, m + buff_size - if (i == momxb) then - q_prim_vf(i)%sf(k, l, p + j) = & - -q_prim_vf(i)%sf(k, l, p - (j - 1)) + 2d0*bc_z%ve1 - elseif (i == momxb + 1 .and. num_dims > 1) then - q_prim_vf(i)%sf(k, l, p + j) = & - -q_prim_vf(i)%sf(k, l, p - (j - 1)) + 2d0*bc_z%ve2 - elseif (i == momxb + 2 .and. num_dims > 2) then - q_prim_vf(i)%sf(k, l, p + j) = & - -q_prim_vf(i)%sf(k, l, p - (j - 1)) + 2d0*bc_z%ve3 - else - q_prim_vf(i)%sf(k, l, p + j) = & - q_prim_vf(i)%sf(k, l, p) - end if - end do - end do - end do - end do - - end if - - end if - !< ===================================================================== - - end subroutine s_no_slip_wall - - subroutine s_qbmm_extrapolation(pb, mv, bc_dir, bc_loc) - - real(kind(0d0)), dimension(startx:, starty:, startz:, 1:, 1:), intent(inout) :: pb, mv - integer, intent(in) :: bc_dir, bc_loc - - integer :: j, k, l, q, i - - !< x-direction ========================================================= - if (bc_dir == 1) then - - if (bc_loc == -1) then !< bc_x%beg - - !$acc parallel loop collapse(4) gang vector default(present) - do i = 1, nb - do q = 1, nnode - do l = 0, p - do k = 0, n - do j = 1, buff_size - pb(-j, k, l, q, i) = & - pb(0, k, l, q, i) - mv(-j, k, l, q, i) = & - mv(0, k, l, q, i) - end do - end do - end do - end do - end do - - else !< bc_x%end - - !$acc parallel loop collapse(5) gang vector default(present) - do i = 1, nb - do q = 1, nnode - do l = 0, p - do k = 0, n - do j = 1, buff_size - pb(m + j, k, l, q, i) = & - pb(m, k, l, q, i) - mv(m + j, k, l, q, i) = & - mv(m, k, l, q, i) - end do - end do - end do - end do - end do - - end if - - !< y-direction ========================================================= - elseif (bc_dir == 2) then - - if (bc_loc == -1) then !< bc_y%beg - - !$acc parallel loop collapse(5) gang vector default(present) - do i = 1, nb - do q = 1, nnode - do k = 0, p - do j = 1, buff_size - do l = -buff_size, m + buff_size - pb(l, -j, k, q, i) = & - pb(l, 0, k, q, i) - mv(l, -j, k, q, i) = & - mv(l, 0, k, q, i) - end do - end do - end do - end do - end do - - else !< bc_y%end - - !$acc parallel loop collapse(5) gang vector default(present) - do i = 1, nb - do q = 1, nnode - do k = 0, p - do j = 1, buff_size - do l = -buff_size, m + buff_size - pb(l, n + j, k, q, i) = & - pb(l, n, k, q, i) - mv(l, n + j, k, q, i) = & - mv(l, n, k, q, i) - end do - end do - end do - end do - end do - - end if - - !< z-direction ========================================================= - elseif (bc_dir == 3) then - - if (bc_loc == -1) then !< bc_z%beg - - !$acc parallel loop collapse(5) gang vector default(present) - do i = 1, nb - do q = 1, nnode - do j = 1, buff_size - do l = -buff_size, n + buff_size - do k = -buff_size, m + buff_size - pb(k, l, -j, q, i) = & - pb(k, l, 0, q, i) - mv(k, l, -j, q, i) = & - mv(k, l, 0, q, i) - end do - end do - end do - end do - end do - - else !< bc_z%end - - !$acc parallel loop collapse(5) gang vector default(present) - do i = 1, nb - do q = 1, nnode - do j = 1, buff_size - do l = -buff_size, n + buff_size - do k = -buff_size, m + buff_size - pb(k, l, p + j, q, i) = & - pb(k, l, p, q, i) - mv(k, l, p + j, q, i) = & - mv(k, l, p, q, i) - end do - end do - end do - end do - end do - - end if - - end if - - end subroutine - - subroutine s_populate_capillary_buffers(c_divs) - - type(scalar_field), dimension(num_dims + 1), intent(inout) :: c_divs - integer :: i, j, k, l - - ! x - direction - if (bc_x%beg <= -3) then !< ghost cell extrapolation - !$acc parallel loop collapse(4) gang vector default(present) - do i = 1, num_dims + 1 - do l = 0, p - do k = 0, n - do j = 1, buff_size - c_divs(i)%sf(-j, k, l) = & - c_divs(i)%sf(0, k, l) - end do - end do - end do - end do - elseif (bc_x%beg == -2) then !< slip wall or reflective - !$acc parallel loop collapse(4) gang vector default(present) - do i = 1, num_dims + 1 - do l = 0, p - do k = 0, n - do j = 1, buff_size - if (i == 1) then - c_divs(i)%sf(-j, k, l) = & - -c_divs(i)%sf(j - 1, k, l) - else - c_divs(i)%sf(-j, k, l) = & - c_divs(i)%sf(j - 1, k, l) - end if - end do - end do - end do - end do - elseif (bc_x%beg == -1) then - !$acc parallel loop collapse(4) gang vector default(present) - do i = 1, num_dims + 1 - do l = 0, p - do k = 0, n - do j = 1, buff_size - c_divs(i)%sf(-j, k, l) = & - c_divs(i)%sf(m - (j - 1), k, l) - end do - end do - end do - end do - else - call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 1, -1) - end if - - if (bc_x%end <= -3) then !< ghost-cell extrapolation - !$acc parallel loop collapse(4) gang vector default(present) - do i = 1, num_dims + 1 - do l = 0, p - do k = 0, n - do j = 1, buff_size - c_divs(i)%sf(m + j, k, l) = & - c_divs(i)%sf(m, k, l) - end do - end do - end do - end do - elseif (bc_x%end == -2) then - !$acc parallel loop collapse(4) default(present) - do i = 1, num_dims + 1 - do l = 0, p - do k = 0, n - do j = 1, buff_size - if (i == 1) then - c_divs(i)%sf(m + j, k, l) = & - -c_divs(i)%sf(m - (j - 1), k, l) - else - c_divs(i)%sf(m + j, k, l) = & - c_divs(i)%sf(m - (j - 1), k, l) - end if - end do - end do - end do - end do - else if (bc_x%end == -1) then - !$acc parallel loop collapse(4) gang vector default(present) - do i = 1, num_dims + 1 - do l = 0, p - do k = 0, n - do j = 1, buff_size - c_divs(i)%sf(m + j, k, l) = & - c_divs(i)%sf(j - 1, k, l) - end do - end do - end do - end do - else - call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 1, 1) - end if - - if (n == 0) then - return - elseif (bc_y%beg <= -3) then !< ghost-cell extrapolation - !$acc parallel loop collapse(4) gang vector default(present) - do i = 1, num_dims + 1 - do k = 0, p - do j = 1, buff_size - do l = -buff_size, m + buff_size - c_divs(i)%sf(l, -j, k) = & - c_divs(i)%sf(l, 0, k) - end do - end do - end do - end do - elseif (bc_y%beg == -2) then !< slip wall or reflective - !$acc parallel loop collapse(4) gang vector default(present) - do i = 1, num_dims + 1 - do k = 0, p - do j = 1, buff_size - do l = -buff_size, m + buff_size - if (i == 2) then - c_divs(i)%sf(l, -j, k) = & - -c_divs(i)%sf(l, j - 1, k) - else - c_divs(i)%sf(l, -j, k) = & - c_divs(i)%sf(l, j - 1, k) - end if - end do - end do - end do - end do - elseif (bc_y%beg == -1) then - !$acc parallel loop collapse(4) gang vector default(present) - do i = 1, num_dims + 1 - do k = 0, p - do j = 1, buff_size - do l = -buff_size, m + buff_size - c_divs(i)%sf(l, -j, k) = & - c_divs(i)%sf(l, n - (j - 1), k) - end do - end do - end do - end do - else - call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 2, -1) - end if - - if (bc_y%end <= -3) then !< ghost-cell extrapolation - !$acc parallel loop collapse(4) gang vector default(present) - do i = 1, num_dims + 1 - do k = 0, p - do j = 1, buff_size - do l = -buff_size, m + buff_size - c_divs(i)%sf(l, n + j, k) = & - c_divs(i)%sf(l, n, k) - end do - end do - end do - end do - elseif (bc_y%end == -2) then !< slip wall or reflective - !$acc parallel loop collapse(4) gang vector default(present) - do i = 1, num_dims + 1 - do k = 0, p - do j = 1, buff_size - do l = -buff_size, m + buff_size - if (i == 2) then - c_divs(i)%sf(l, n + j, k) = & - -c_divs(i)%sf(l, n - (j - 1), k) - else - c_divs(i)%sf(l, n + j, k) = & - c_divs(i)%sf(l, n - (j - 1), k) - end if - end do - end do - end do - end do - elseif (bc_y%end == -1) then - !$acc parallel loop collapse(4) gang vector default(present) - do i = 1, num_dims + 1 - do k = 0, p - do j = 1, buff_size - do l = -buff_size, m + buff_size - c_divs(i)%sf(l, n + j, k) = & - c_divs(i)%sf(l, j - 1, k) - end do - end do - end do - end do - else - call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 2, 1) - end if - - if (p == 0) then - return - elseif (bc_z%beg <= -3) then !< ghost-cell extrapolation - !$acc parallel loop collapse(4) gang vector default(present) - do i = 1, num_dims + 1 - do j = 1, buff_size - do l = -buff_size, n + buff_size - do k = -buff_size, m + buff_size - c_divs(i)%sf(k, l, -j) = & - c_divs(i)%sf(k, l, 0) - end do - end do - end do - end do - elseif (bc_z%beg == -2) then !< symmetry - !$acc parallel loop collapse(4) gang vector default(present) - do i = 1, num_dims + 1 - do j = 1, buff_size - do l = -buff_size, n + buff_size - do k = -buff_size, m + buff_size - if (i == 3) then - c_divs(i)%sf(k, l, -j) = & - -c_divs(i)%sf(k, l, j - 1) - else - c_divs(i)%sf(k, l, -j) = & - c_divs(i)%sf(k, l, j - 1) - end if - end do - end do - end do - end do - elseif (bc_z%beg == -1) then - !$acc parallel loop collapse(4) gang vector default(present) - do i = 1, num_dims + 1 - do j = 1, buff_size - do l = -buff_size, n + buff_size - do k = -buff_size, m + buff_size - c_divs(i)%sf(k, l, -j) = & - c_divs(i)%sf(k, l, p - (j - 1)) - end do - end do - end do - end do - else - call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 3, -1) - end if - - if (bc_z%end <= -3) then !< ghost-cell extrapolation - !$acc parallel loop collapse(4) gang vector default(present) - do i = 1, num_dims + 1 - do j = 1, buff_size - do l = -buff_size, n + buff_size - do k = -buff_size, m + buff_size - c_divs(i)%sf(k, l, p + j) = & - c_divs(i)%sf(k, l, p) - end do - end do - end do - end do - elseif (bc_z%end == -2) then !< symmetry - !$acc parallel loop collapse(4) gang vector default(present) - do i = 1, num_dims + 1 - do j = 1, buff_size - do l = -buff_size, n + buff_size - do k = -buff_size, m + buff_size - if (i == 3) then - c_divs(i)%sf(k, l, p + j) = & - -c_divs(i)%sf(k, l, p - (j - 1)) - else - c_divs(i)%sf(k, l, p + j) = & - c_divs(i)%sf(k, l, p - (j - 1)) - end if - end do - end do - end do - end do - elseif (bc_z%end == -1) then - !$acc parallel loop collapse(4) gang vector default(present) - do i = 1, num_dims + 1 - do j = 1, buff_size - do l = -buff_size, n + buff_size - do k = -buff_size, m + buff_size - c_divs(i)%sf(k, l, p + j) = & - c_divs(i)%sf(k, l, j - 1) - end do - end do - end do - end do - else - call s_mpi_sendrecv_capilary_variables_buffers(c_divs, 3, 1) - end if + #:endfor end subroutine s_populate_capillary_buffers diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp index db5d17656..fef58ba7b 100644 --- a/src/simulation/m_global_parameters.fpp +++ b/src/simulation/m_global_parameters.fpp @@ -22,6 +22,8 @@ module m_global_parameters use m_helper_basic !< Functions to compare floating point numbers + use m_global_parameters_common + #ifdef MFC_OpenACC use openacc #endif @@ -184,6 +186,8 @@ module m_global_parameters !> @name Boundary conditions (BC) in the x-, y- and z-directions, respectively !> @{ type(int_bounds_info) :: bc_x, bc_y, bc_z + integer :: num_bc_patches + type(bc_patch_parameters) :: patch_bc(num_bc_patches_max) !> @} type(bounds_info) :: x_domain, y_domain, z_domain @@ -191,9 +195,12 @@ module m_global_parameters logical :: file_per_process !< shared file or not when using parallel io integer :: precision !< Precision of output files - integer, allocatable, dimension(:) :: proc_coords !< + integer, dimension(1:3) :: proc_coords !< !! Processor coordinates in MPI_CART_COMM + integer, dimension(1:3) :: proc_nums !< + !! Processor dimensions in MPI_CART_COMM + integer, allocatable, dimension(:) :: start_idx !< !! Starting cell-center index of local processor in global grid @@ -295,9 +302,9 @@ module m_global_parameters !! in the flow. These include the stiffened gas equation of state parameters, !! the Reynolds numbers and the Weber numbers. - !$acc declare create(bc_x%vb1, bc_x%vb2, bc_x%vb3, bc_x%ve1, bc_x%ve2, bc_x%ve3) - !$acc declare create(bc_y%vb1, bc_y%vb2, bc_y%vb3, bc_y%ve1, bc_y%ve2, bc_y%ve3) - !$acc declare create(bc_z%vb1, bc_z%vb2, bc_z%vb3, bc_z%ve1, bc_z%ve2, bc_z%ve3) + !$acc declare create(bc_x%vel_beg, bc_x%vel_end) + !$acc declare create(bc_y%vel_beg, bc_y%vel_end) + !$acc declare create(bc_z%vel_beg, bc_z%vel_end) ! ========================================================================== @@ -535,16 +542,16 @@ contains bc_z%beg = dflt_int; bc_z%end = dflt_int #:for DIM in ['x', 'y', 'z'] - #:for DIR in [1, 2, 3] - bc_${DIM}$%vb${DIR}$ = 0d0 - bc_${DIM}$%ve${DIR}$ = 0d0 - #:endfor + bc_${DIM}$%vel_beg = 0d0 + bc_${DIM}$%vel_end = 0d0 #:endfor x_domain%beg = dflt_int; x_domain%end = dflt_int y_domain%beg = dflt_int; y_domain%end = dflt_int z_domain%beg = dflt_int; z_domain%end = dflt_int + call s_bc_assign_default_values_to_user_inputs(num_bc_patches, patch_bc) + ! Fluids physical parameters do i = 1, num_fluids_max fluid_pp(i)%gamma = dflt_real @@ -1130,8 +1137,6 @@ contains num_dims = 1 + min(1, n) + min(1, p) #:endif - allocate (proc_coords(1:num_dims)) - if (parallel_io .neqv. .true.) return #ifdef MFC_MPI @@ -1166,7 +1171,6 @@ contains @:DEALLOCATE(Re_idx) end if - deallocate (proc_coords) if (parallel_io) then deallocate (start_idx) do i = 1, sys_size diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index 8421fda52..a8b1043f9 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -19,6 +19,10 @@ module m_ibm use m_helper + use m_constants + + use m_boundary_conditions_common + ! ========================================================================== implicit none @@ -42,7 +46,6 @@ module m_ibm type(ghost_point), dimension(:), allocatable :: inner_points !$acc declare create(ghost_points, inner_points) - integer :: gp_layers !< Number of ghost point layers integer :: num_gps !< Number of ghost points integer :: num_inner_gps !< Number of ghost points !$acc declare create(gp_layers, num_gps, num_inner_gps) @@ -52,8 +55,6 @@ contains !> Allocates memory for the variables in the IBM module subroutine s_initialize_ibm_module() - gp_layers = 3 - if (p > 0) then @:ALLOCATE(ib_markers%sf(-gp_layers:m+gp_layers, & -gp_layers:n+gp_layers, -gp_layers:p+gp_layers)) @@ -74,7 +75,7 @@ contains @:ACC_SETUP_SFs(levelset) ! @:ALLOCATE(ib_markers%sf(0:m, 0:n, 0:p)) - !$acc enter data copyin(gp_layers, num_gps, num_inner_gps) + !$acc enter data copyin(num_gps, num_inner_gps) end subroutine s_initialize_ibm_module @@ -87,7 +88,7 @@ contains !$acc update device(ib_markers%sf) ! Get neighboring IB variables from other processors - call s_mpi_sendrecv_ib_buffers(ib_markers, gp_layers) + call s_mpi_sendrecv_ib_buffers(ib_markers, bc_id_sfs) !$acc update host(ib_markers%sf) @@ -212,7 +213,7 @@ contains ! Calculate velocity of ghost cell if (gp%slip) then - norm = gp%ip_loc - physical_loc ! + norm = levelset_norm%sf(gp%loc(1), gp%loc(2), gp%loc(3), gp%ib_patch_id, :) buf = sqrt(sum(norm**2)) norm = norm/buf vel_norm_IP = sum(vel_IP*norm)*norm @@ -447,6 +448,7 @@ contains integer :: i, j, k, l, q !< Iterator variables num_gps = 0 + num_inner_gps = 0 do i = 0, m do j = 0, n diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index 8b1ea7749..94dbb1364 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -30,19 +30,12 @@ module m_mpi_proxy use m_nvtx use ieee_arithmetic - ! ========================================================================== - implicit none + use m_constants - real(kind(0d0)), private, allocatable, dimension(:), target :: q_cons_buff_send !< - !! This variable is utilized to pack and send the buffer of the cell-average - !! conservative variables, for a single computational domain boundary at the - !! time, to the relevant neighboring processor. + ! ========================================================================== - real(kind(0d0)), private, allocatable, dimension(:), target :: q_cons_buff_recv !< - !! q_cons_buff_recv is utilized to receive and unpack the buffer of the cell- - !! average conservative variables, for a single computational domain boundary - !! at the time, from the relevant neighboring processor. + implicit none real(kind(0d0)), private, allocatable, dimension(:), target :: c_divs_buff_send !< !! c_divs_buff_send is utilized to send and unpack the buffer of the cell- @@ -64,15 +57,13 @@ module m_mpi_proxy !! immersed boundary markers, for a single computational domain boundary !! at the time, from the relevant neighboring processor. - !$acc declare create(q_cons_buff_send, q_cons_buff_recv) - !$acc declare create( ib_buff_send, ib_buff_recv) + !$acc declare create(ib_buff_send, ib_buff_recv) !$acc declare create(c_divs_buff_send, c_divs_buff_recv) !> @name Generic flags used to identify and report MPI errors !> @{ - integer, private :: err_code, ierr, v_size + integer, private :: err_code, ierr !> @} - !$acc declare create(v_size) !real :: s_time, e_time !real :: compress_time, mpi_time, decompress_time @@ -90,70 +81,45 @@ contains #ifdef MFC_MPI - ! Allocating q_cons_buff_send/recv and ib_buff_send/recv. Please note that - ! for the sake of simplicity, both variables are provided sufficient - ! storage to hold the largest buffer in the computational domain. - - if (qbmm .and. .not. polytropic) then - if (n > 0) then - if (p > 0) then - @:ALLOCATE(q_cons_buff_send(0:-1 + buff_size*(sys_size + 2*nb*4)* & - & (m + 2*buff_size + 1)* & - & (n + 2*buff_size + 1)* & - & (p + 2*buff_size + 1)/ & - & (min(m, n, p) + 2*buff_size + 1))) - else - @:ALLOCATE(q_cons_buff_send(0:-1 + buff_size*(sys_size + 2*nb*4)* & - & (max(m, n) + 2*buff_size + 1))) - end if - else - @:ALLOCATE(q_cons_buff_send(0:-1 + buff_size*(sys_size + 2*nb*4))) - end if - - @:ALLOCATE(q_cons_buff_recv(0:ubound(q_cons_buff_send, 1))) - - v_size = sys_size + 2*nb*4 - else + if (surface_tension) then + nVars = num_dims + 1 if (n > 0) then if (p > 0) then - @:ALLOCATE(q_cons_buff_send(0:-1 + buff_size*sys_size* & + @:ALLOCATE(c_divs_buff_send(0:-1 + buff_size*(num_dims+1)* & & (m + 2*buff_size + 1)* & & (n + 2*buff_size + 1)* & & (p + 2*buff_size + 1)/ & & (min(m, n, p) + 2*buff_size + 1))) else - @:ALLOCATE(q_cons_buff_send(0:-1 + buff_size*sys_size* & + @:ALLOCATE(c_divs_buff_send(0:-1 + buff_size*(num_dims+1)* & & (max(m, n) + 2*buff_size + 1))) end if else - @:ALLOCATE(q_cons_buff_send(0:-1 + buff_size*sys_size)) + @:ALLOCATE(c_divs_buff_send(0:-1 + buff_size*(num_dims+1))) end if - @:ALLOCATE(q_cons_buff_recv(0:ubound(q_cons_buff_send, 1))) - - v_size = sys_size + @:ALLOCATE(c_divs_buff_recv(0:ubound(c_divs_buff_send, 1))) end if + !$acc update device(nVars) - if (surface_tension) then - nVars = num_dims + 1 + if (ib) then if (n > 0) then if (p > 0) then - @:ALLOCATE(c_divs_buff_send(0:-1 + buff_size*(num_dims+1)* & - & (m + 2*buff_size + 1)* & - & (n + 2*buff_size + 1)* & - & (p + 2*buff_size + 1)/ & - & (min(m, n, p) + 2*buff_size + 1))) + @:ALLOCATE(ib_buff_send(0:-1 + gp_layers * & + & (m + 2*gp_layers + 1)* & + & (n + 2*gp_layers + 1)* & + & (p + 2*gp_layers + 1)/ & + & (min(m, n, p) + 2*gp_layers + 1))) else - @:ALLOCATE(c_divs_buff_send(0:-1 + buff_size*(num_dims+1)* & - & (max(m, n) + 2*buff_size + 1))) + @:ALLOCATE(ib_buff_send(0:-1 + gp_layers* & + & (max(m, n) + 2*gp_layers + 1))) end if else - @:ALLOCATE(c_divs_buff_send(0:-1 + buff_size*(num_dims+1))) + @:ALLOCATE(ib_buff_send(0:-1 + gp_layers)) end if - @:ALLOCATE(c_divs_buff_recv(0:ubound(c_divs_buff_send, 1))) + @:ALLOCATE(ib_buff_recv(0:ubound(ib_buff_send, 1))) end if - !$acc update device(v_size, nVars) #endif @@ -214,9 +180,6 @@ contains #:for VAR in [ 'dt','weno_eps','teno_CT','pref','rhoref','R0ref','Web','Ca', 'sigma', & & 'Re_inv', 'poly_sigma', 'palpha_eps', 'ptgalpha_eps', 'pi_fac', & - & 'bc_x%vb1','bc_x%vb2','bc_x%vb3','bc_x%ve1','bc_x%ve2','bc_x%ve2', & - & 'bc_y%vb1','bc_y%vb2','bc_y%vb3','bc_y%ve1','bc_y%ve2','bc_y%ve3', & - & 'bc_z%vb1','bc_z%vb2','bc_z%vb3','bc_z%ve1','bc_z%ve2','bc_z%ve3', & & 'bc_x%pres_in','bc_x%pres_out','bc_y%pres_in','bc_y%pres_out', 'bc_z%pres_in','bc_z%pres_out', & & 'x_domain%beg', 'x_domain%end', 'y_domain%beg', 'y_domain%end', & & 'z_domain%beg', 'z_domain%end', 't_stop', 't_save', 'cfl_target'] @@ -294,812 +257,146 @@ contains end subroutine s_mpi_bcast_user_inputs - !> The purpose of this procedure is to optimally decompose - !! the computational domain among the available processors. - !! This is performed by attempting to award each processor, - !! in each of the coordinate directions, approximately the - !! same number of cells, and then recomputing the affected - !! global parameters. - subroutine s_mpi_decompose_computational_domain - -#ifdef MFC_MPI - - integer :: num_procs_x, num_procs_y, num_procs_z !< - !! Optimal number of processors in the x-, y- and z-directions - - real(kind(0d0)) :: tmp_num_procs_x, tmp_num_procs_y, tmp_num_procs_z !< - !! Non-optimal number of processors in the x-, y- and z-directions - - real(kind(0d0)) :: fct_min !< - !! Processor factorization (fct) minimization parameter - - integer :: MPI_COMM_CART !< - !! Cartesian processor topology communicator - - integer :: rem_cells !< - !! Remaining number of cells, in a particular coordinate direction, - !! after the majority is divided up among the available processors - - integer :: i, j !< Generic loop iterators - - if (num_procs == 1 .and. parallel_io) then - do i = 1, num_dims - start_idx(i) = 0 - end do - return - end if - - ! 3D Cartesian Processor Topology ================================== - if (n > 0) then - - if (p > 0) then - - if (cyl_coord .and. p > 0) then - ! Implement pencil processor blocking if using cylindrical coordinates so - ! that all cells in azimuthal direction are stored on a single processor. - ! This is necessary for efficient application of Fourier filter near axis. - - ! Initial values of the processor factorization optimization - num_procs_x = 1 - num_procs_y = num_procs - num_procs_z = 1 - ierr = -1 - - ! Computing minimization variable for these initial values - tmp_num_procs_x = num_procs_x - tmp_num_procs_y = num_procs_y - tmp_num_procs_z = num_procs_z - fct_min = 10d0*abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) - - ! Searching for optimal computational domain distribution - do i = 1, num_procs - - if (mod(num_procs, i) == 0 & - .and. & - (m + 1)/i >= num_stcls_min*weno_order) then - - tmp_num_procs_x = i - tmp_num_procs_y = num_procs/i - - if (fct_min >= abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) & - .and. & - (n + 1)/tmp_num_procs_y & - >= & - num_stcls_min*weno_order) then - - num_procs_x = i - num_procs_y = num_procs/i - fct_min = abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) - ierr = 0 - - end if - - end if - - end do + subroutine s_mpi_sendrecv_ib_buffers(ib_markers, bc_id_sfs) - else - - ! Initial estimate of optimal processor topology - num_procs_x = 1 - num_procs_y = 1 - num_procs_z = num_procs - ierr = -1 - - ! Benchmarking the quality of this initial guess - tmp_num_procs_x = num_procs_x - tmp_num_procs_y = num_procs_y - tmp_num_procs_z = num_procs_z - fct_min = 10d0*abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) & - + 10d0*abs((n + 1)/tmp_num_procs_y & - - (p + 1)/tmp_num_procs_z) - - ! Optimization of the initial processor topology - do i = 1, num_procs - - if (mod(num_procs, i) == 0 & - .and. & - (m + 1)/i >= num_stcls_min*weno_order) then - - do j = 1, num_procs/i - - if (mod(num_procs/i, j) == 0 & - .and. & - (n + 1)/j >= num_stcls_min*weno_order) then - - tmp_num_procs_x = i - tmp_num_procs_y = j - tmp_num_procs_z = num_procs/(i*j) - - if (fct_min >= abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) & - + abs((n + 1)/tmp_num_procs_y & - - (p + 1)/tmp_num_procs_z) & - .and. & - (p + 1)/tmp_num_procs_z & - >= & - num_stcls_min*weno_order) & - then - - num_procs_x = i - num_procs_y = j - num_procs_z = num_procs/(i*j) - fct_min = abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) & - + abs((n + 1)/tmp_num_procs_y & - - (p + 1)/tmp_num_procs_z) - ierr = 0 + type(integer_field), intent(inout) :: ib_markers + type(t_bc_id_sf), dimension(1:3, -1:1), intent(in) :: bc_id_sfs - end if + @:BOUNDARY_CONDITION_INTEGER_DECLARATIONS() - end if + integer :: buffer_counts(1:3) + integer :: iter_dir, iter_loc + integer, pointer :: p_send, p_recv - end do +#ifdef MFC_MPI - end if + buffer_counts = (/ & + gp_layers*(n + 1)*(p + 1), & + gp_layers*(m + 2*gp_layers + 1)*(p + 1), & + gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1) & + /) - end do + do iter_dir = 1, num_dims - end if + do iter_loc = -1, 1, 2 - ! Verifying that a valid decomposition of the computational - ! domain has been established. If not, the simulation exits. - if (proc_rank == 0 .and. ierr == -1) then - call s_mpi_abort('Unsupported combination of values '// & - 'of num_procs, m, n, p and '// & - 'weno_order. Exiting ...') - end if + call nvtxStartRange("RHS-COMM-PACKBUF") - ! Creating new communicator using the Cartesian topology - call MPI_CART_CREATE(MPI_COMM_WORLD, 3, (/num_procs_x, & - num_procs_y, num_procs_z/), & - (/.true., .true., .true./), & - .false., MPI_COMM_CART, ierr) + #:block ITERATE_OVER_BUFFER_REGION_SIDED(dir="iter_dir", loc="-iter_loc", pack_v_size='1', thickness='gp_layers', inner_loops=[("i", 1, 1)]) + ib_buff_send(pack_idr) = ib_markers%sf(sx, sy, sz) + #:endblock - ! Finding the Cartesian coordinates of the local process - call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 3, & - proc_coords, ierr) - ! END: 3D Cartesian Processor Topology ============================= + call nvtxEndRange ! Packbuf - ! Global Parameters for z-direction ================================ + p_send => ib_buff_send(0) + p_recv => ib_buff_recv(0) + #:for rdma_mpi in [False, True] + if (rdma_mpi .eqv. ${'.true.' if rdma_mpi else '.false.'}$) then + #:if rdma_mpi + !$acc data attach(p_send, p_recv) + !$acc host_data use_device(p_send, p_recv) - ! Number of remaining cells - rem_cells = mod(p + 1, num_procs_z) + call nvtxStartRange("RHS-COMM-SENDRECV-RDMA") + #:else + call nvtxStartRange("RHS-COMM-DEV2HOST") + !$acc update host(ib_buff_send) + call nvtxEndRange ! Dev2Host - ! Optimal number of cells per processor - p = (p + 1)/num_procs_z - 1 + call nvtxStartRange("RHS-COMM-SENDRECV-NO-RMDA") + #:endif - ! Distributing the remaining cells - do i = 1, rem_cells - if (proc_coords(3) == i - 1) then - p = p + 1; exit - end if - end do - - ! Boundary condition at the beginning - if (proc_coords(3) > 0 .or. bc_z%beg == -1) then - proc_coords(3) = proc_coords(3) - 1 - call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & - bc_z%beg, ierr) - proc_coords(3) = proc_coords(3) + 1 - end if + call MPI_SENDRECV( & + p_send, buffer_counts(iter_dir), MPI_INTEGER, neighbor_procs(iter_dir, -iter_loc), 0, & + p_recv, buffer_counts(iter_dir), MPI_INTEGER, neighbor_procs(iter_dir, +iter_loc), 0, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - ! Boundary condition at the end - if (proc_coords(3) < num_procs_z - 1 .or. bc_z%end == -1) then - proc_coords(3) = proc_coords(3) + 1 - call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & - bc_z%end, ierr) - proc_coords(3) = proc_coords(3) - 1 - end if + call nvtxEndRange ! RHS-MPI-SENDRECV-(NO)-RDMA - if (parallel_io) then - if (proc_coords(3) < rem_cells) then - start_idx(3) = (p + 1)*proc_coords(3) - else - start_idx(3) = (p + 1)*proc_coords(3) + rem_cells + #:if rdma_mpi + !$acc end host_data + !$acc end data + !$acc wait + #:else + call nvtxStartRange("RHS-COMM-HOST2DEV") + !$acc update device(ib_buff_recv) + call nvtxEndRange ! Host2Dev + #:endif end if - end if - ! ================================================================== - - ! 2D Cartesian Processor Topology ================================== - else + #:endfor - ! Initial estimate of optimal processor topology - num_procs_x = 1 - num_procs_y = num_procs - ierr = -1 - - ! Benchmarking the quality of this initial guess - tmp_num_procs_x = num_procs_x - tmp_num_procs_y = num_procs_y - fct_min = 10d0*abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) - - ! Optimization of the initial processor topology - do i = 1, num_procs - - if (mod(num_procs, i) == 0 & - .and. & - (m + 1)/i >= num_stcls_min*weno_order) then - - tmp_num_procs_x = i - tmp_num_procs_y = num_procs/i - - if (fct_min >= abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) & - .and. & - (n + 1)/tmp_num_procs_y & - >= & - num_stcls_min*weno_order) then - - num_procs_x = i - num_procs_y = num_procs/i - fct_min = abs((m + 1)/tmp_num_procs_x & - - (n + 1)/tmp_num_procs_y) - ierr = 0 - - end if + call nvtxStartRange("RHS-COMM-UNPACKBUF") + #:block ITERATE_OVER_BUFFER_REGION_SIDED(dir="iter_dir", loc="iter_loc", pack_v_size='1', thickness='gp_layers', inner_loops=[("i", 1, 1)]) + if (bc_id_sfs(iter_dir, iter_loc)%sf(exlhs, eylhs, ezlhs)%type >= 0) then + ib_markers%sf(x, y, z) = ib_buff_recv(pack_idr) end if + #:endblock - end do - - ! Verifying that a valid decomposition of the computational - ! domain has been established. If not, the simulation exits. - if (proc_rank == 0 .and. ierr == -1) then - call s_mpi_abort('Unsupported combination of values '// & - 'of num_procs, m, n and '// & - 'weno_order. Exiting ...') - end if - - ! Creating new communicator using the Cartesian topology - call MPI_CART_CREATE(MPI_COMM_WORLD, 2, (/num_procs_x, & - num_procs_y/), (/.true., & - .true./), .false., MPI_COMM_CART, & - ierr) - - ! Finding the Cartesian coordinates of the local process - call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 2, & - proc_coords, ierr) - - end if - ! END: 2D Cartesian Processor Topology ============================= - - ! Global Parameters for y-direction ================================ - - ! Number of remaining cells - rem_cells = mod(n + 1, num_procs_y) - - ! Optimal number of cells per processor - n = (n + 1)/num_procs_y - 1 + call nvtxEndRange ! Unpackbuf - ! Distributing the remaining cells - do i = 1, rem_cells - if (proc_coords(2) == i - 1) then - n = n + 1; exit - end if end do - ! Boundary condition at the beginning - if (proc_coords(2) > 0 .or. bc_y%beg == -1) then - proc_coords(2) = proc_coords(2) - 1 - call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & - bc_y%beg, ierr) - proc_coords(2) = proc_coords(2) + 1 - end if - - ! Boundary condition at the end - if (proc_coords(2) < num_procs_y - 1 .or. bc_y%end == -1) then - proc_coords(2) = proc_coords(2) + 1 - call MPI_CART_RANK(MPI_COMM_CART, proc_coords, & - bc_y%end, ierr) - proc_coords(2) = proc_coords(2) - 1 - end if - - if (parallel_io) then - if (proc_coords(2) < rem_cells) then - start_idx(2) = (n + 1)*proc_coords(2) - else - start_idx(2) = (n + 1)*proc_coords(2) + rem_cells - end if - end if - - ! ================================================================== - - ! 1D Cartesian Processor Topology ================================== - else - - ! Optimal processor topology - num_procs_x = num_procs - - ! Creating new communicator using the Cartesian topology - call MPI_CART_CREATE(MPI_COMM_WORLD, 1, (/num_procs_x/), & - (/.true./), .false., MPI_COMM_CART, & - ierr) - - ! Finding the Cartesian coordinates of the local process - call MPI_CART_COORDS(MPI_COMM_CART, proc_rank, 1, & - proc_coords, ierr) - - end if - ! ================================================================== - - ! Global Parameters for x-direction ================================ - - ! Number of remaining cells - rem_cells = mod(m + 1, num_procs_x) - - ! Optimal number of cells per processor - m = (m + 1)/num_procs_x - 1 - - ! Distributing the remaining cells - do i = 1, rem_cells - if (proc_coords(1) == i - 1) then - m = m + 1; exit - end if end do - ! Boundary condition at the beginning - if (proc_coords(1) > 0 .or. bc_x%beg == -1) then - proc_coords(1) = proc_coords(1) - 1 - call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%beg, ierr) - proc_coords(1) = proc_coords(1) + 1 - end if - - ! Boundary condition at the end - if (proc_coords(1) < num_procs_x - 1 .or. bc_x%end == -1) then - proc_coords(1) = proc_coords(1) + 1 - call MPI_CART_RANK(MPI_COMM_CART, proc_coords, bc_x%end, ierr) - proc_coords(1) = proc_coords(1) - 1 - end if - - if (parallel_io) then - if (proc_coords(1) < rem_cells) then - start_idx(1) = (m + 1)*proc_coords(1) - else - start_idx(1) = (m + 1)*proc_coords(1) + rem_cells - end if - end if - ! ================================================================== - #endif - end subroutine s_mpi_decompose_computational_domain - - !> The goal of this procedure is to populate the buffers of - !! the grid variables by communicating with the neighboring - !! processors. Note that only the buffers of the cell-width - !! distributions are handled in such a way. This is because - !! the buffers of cell-boundary locations may be calculated - !! directly from those of the cell-width distributions. - !! @param mpi_dir MPI communication coordinate direction - !! @param pbc_loc Processor boundary condition (PBC) location - subroutine s_mpi_sendrecv_grid_variables_buffers(mpi_dir, pbc_loc) - - integer, intent(in) :: mpi_dir - integer, intent(in) :: pbc_loc - - integer :: dst_proc(1:3) - -#ifdef MFC_MPI - - ! MPI Communication in x-direction ================================= - if (mpi_dir == 1) then - - if (pbc_loc == -1) then ! PBC at the beginning - - if (bc_x%end >= 0) then ! PBC at the beginning and end - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - dx(m - buff_size + 1), buff_size, & - MPI_DOUBLE_PRECISION, bc_x%end, 0, & - dx(-buff_size), buff_size, & - MPI_DOUBLE_PRECISION, bc_x%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - else ! PBC at the beginning only - - ! Send/receive buffer to/from bc_x%beg/bc_x%beg - call MPI_SENDRECV( & - dx(0), buff_size, & - MPI_DOUBLE_PRECISION, bc_x%beg, 1, & - dx(-buff_size), buff_size, & - MPI_DOUBLE_PRECISION, bc_x%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - end if - - else ! PBC at the end - - if (bc_x%beg >= 0) then ! PBC at the end and beginning - - ! Send/receive buffer to/from bc_x%beg/bc_x%end - call MPI_SENDRECV( & - dx(0), buff_size, & - MPI_DOUBLE_PRECISION, bc_x%beg, 1, & - dx(m + 1), buff_size, & - MPI_DOUBLE_PRECISION, bc_x%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - else ! PBC at the end only - - ! Send/receive buffer to/from bc_x%end/bc_x%end - call MPI_SENDRECV( & - dx(m - buff_size + 1), buff_size, & - MPI_DOUBLE_PRECISION, bc_x%end, 0, & - dx(m + 1), buff_size, & - MPI_DOUBLE_PRECISION, bc_x%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - end if - - end if - ! END: MPI Communication in x-direction ============================ - - ! MPI Communication in y-direction ================================= - elseif (mpi_dir == 2) then - - if (pbc_loc == -1) then ! PBC at the beginning - - if (bc_y%end >= 0) then ! PBC at the beginning and end - - ! Send/receive buffer to/from bc_y%end/bc_y%beg - call MPI_SENDRECV( & - dy(n - buff_size + 1), buff_size, & - MPI_DOUBLE_PRECISION, bc_y%end, 0, & - dy(-buff_size), buff_size, & - MPI_DOUBLE_PRECISION, bc_y%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - else ! PBC at the beginning only - - ! Send/receive buffer to/from bc_y%beg/bc_y%beg - call MPI_SENDRECV( & - dy(0), buff_size, & - MPI_DOUBLE_PRECISION, bc_y%beg, 1, & - dy(-buff_size), buff_size, & - MPI_DOUBLE_PRECISION, bc_y%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - end if - - else ! PBC at the end - - if (bc_y%beg >= 0) then ! PBC at the end and beginning - - ! Send/receive buffer to/from bc_y%beg/bc_y%end - call MPI_SENDRECV( & - dy(0), buff_size, & - MPI_DOUBLE_PRECISION, bc_y%beg, 1, & - dy(n + 1), buff_size, & - MPI_DOUBLE_PRECISION, bc_y%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - else ! PBC at the end only - - ! Send/receive buffer to/from bc_y%end/bc_y%end - call MPI_SENDRECV( & - dy(n - buff_size + 1), buff_size, & - MPI_DOUBLE_PRECISION, bc_y%end, 0, & - dy(n + 1), buff_size, & - MPI_DOUBLE_PRECISION, bc_y%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - end if - - end if - ! END: MPI Communication in y-direction ============================ - - ! MPI Communication in z-direction ================================= - else - - if (pbc_loc == -1) then ! PBC at the beginning - - if (bc_z%end >= 0) then ! PBC at the beginning and end - - ! Send/receive buffer to/from bc_z%end/bc_z%beg - call MPI_SENDRECV( & - dz(p - buff_size + 1), buff_size, & - MPI_DOUBLE_PRECISION, bc_z%end, 0, & - dz(-buff_size), buff_size, & - MPI_DOUBLE_PRECISION, bc_z%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - else ! PBC at the beginning only - - ! Send/receive buffer to/from bc_z%beg/bc_z%beg - call MPI_SENDRECV( & - dz(0), buff_size, & - MPI_DOUBLE_PRECISION, bc_z%beg, 1, & - dz(-buff_size), buff_size, & - MPI_DOUBLE_PRECISION, bc_z%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - end if - - else ! PBC at the end - - if (bc_z%beg >= 0) then ! PBC at the end and beginning - - ! Send/receive buffer to/from bc_z%beg/bc_z%end - call MPI_SENDRECV( & - dz(0), buff_size, & - MPI_DOUBLE_PRECISION, bc_z%beg, 1, & - dz(p + 1), buff_size, & - MPI_DOUBLE_PRECISION, bc_z%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - else ! PBC at the end only - - ! Send/receive buffer to/from bc_z%end/bc_z%end - call MPI_SENDRECV( & - dz(p - buff_size + 1), buff_size, & - MPI_DOUBLE_PRECISION, bc_z%end, 0, & - dz(p + 1), buff_size, & - MPI_DOUBLE_PRECISION, bc_z%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - end if - - end if + end subroutine s_mpi_sendrecv_ib_buffers - end if - ! END: MPI Communication in z-direction ============================ + subroutine s_mpi_sendrecv_capilary_variables_buffers(c_divs_vf, bc_id_sfs, mpi_dir, pbc_loc) -#endif + type(scalar_field), dimension(num_dims + 1), intent(inout) :: c_divs_vf + type(t_bc_id_sf), dimension(1:3, -1:1), intent(in) :: bc_id_sfs - end subroutine s_mpi_sendrecv_grid_variables_buffers - - !> The goal of this procedure is to populate the buffers of - !! the cell-average conservative variables by communicating - !! with the neighboring processors. - !! @param q_cons_vf Cell-average conservative variables - !! @param mpi_dir MPI communication coordinate direction - !! @param pbc_loc Processor boundary condition (PBC) location - subroutine s_mpi_sendrecv_variables_buffers(q_cons_vf, & - pb, mv, & - mpi_dir, & - pbc_loc) - - type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf - real(kind(0d0)), dimension(startx:, starty:, startz:, 1:, 1:), intent(inout) :: pb, mv integer, intent(in) :: mpi_dir, pbc_loc - integer :: i, j, k, l, r, q !< Generic loop iterators - integer :: buffer_counts(1:3), buffer_count type(int_bounds_info) :: boundary_conditions(1:3) integer :: beg_end(1:2), grid_dims(1:3) integer :: dst_proc, src_proc, recv_tag, send_tag - logical :: beg_end_geq_0 - integer :: pack_offset, unpack_offset real(kind(0d0)), pointer :: p_send, p_recv -#ifdef MFC_MPI - - call nvtxStartRange("RHS-COMM-PACKBUF") - !$acc update device(v_size) - - if (qbmm .and. .not. polytropic) then - buffer_counts = (/ & - buff_size*(sys_size + 2*nb*4)*(n + 1)*(p + 1), & - buff_size*(sys_size + 2*nb*4)*(m + 2*buff_size + 1)*(p + 1), & - buff_size*v_size*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) & - /) - else - buffer_counts = (/ & - buff_size*sys_size*(n + 1)*(p + 1), & - buff_size*sys_size*(m + 2*buff_size + 1)*(p + 1), & - buff_size*v_size*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) & - /) - end if - - buffer_count = buffer_counts(mpi_dir) - boundary_conditions = (/bc_x, bc_y, bc_z/) - beg_end = (/boundary_conditions(mpi_dir)%beg, boundary_conditions(mpi_dir)%end/) - beg_end_geq_0 = beg_end(max(pbc_loc, 0) - pbc_loc + 1) >= 0 - - ! Implements: - ! pbc_loc bc_x >= 0 -> [send/recv]_tag [dst/src]_proc - ! -1 (=0) 0 -> [1,0] [0,0] | 0 0 [1,0] [beg,beg] - ! -1 (=0) 1 -> [0,0] [1,0] | 0 1 [0,0] [end,beg] - ! +1 (=1) 0 -> [0,1] [1,1] | 1 0 [0,1] [end,end] - ! +1 (=1) 1 -> [1,1] [0,1] | 1 1 [1,1] [beg,end] - - send_tag = f_logical_to_int(.not. f_xor(beg_end_geq_0, pbc_loc == 1)) - recv_tag = f_logical_to_int(pbc_loc == 1) + @:BOUNDARY_CONDITION_INTEGER_DECLARATIONS() - dst_proc = beg_end(1 + f_logical_to_int(f_xor(pbc_loc == 1, beg_end_geq_0))) - src_proc = beg_end(1 + f_logical_to_int(pbc_loc == 1)) - - grid_dims = (/m, n, p/) +#ifdef MFC_MPI - pack_offset = 0 - if (f_xor(pbc_loc == 1, beg_end_geq_0)) then - pack_offset = grid_dims(mpi_dir) - buff_size + 1 - end if + nVars = num_dims + 1 + !$acc update device(nVars) - unpack_offset = 0 - if (pbc_loc == 1) then - unpack_offset = grid_dims(mpi_dir) + buff_size + 1 - end if + buffer_counts = (/ & + buff_size*nVars*(n + 1)*(p + 1), & + buff_size*nVars*(m + 2*buff_size + 1)*(p + 1), & + buff_size*nVars*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) & + /) - ! Pack Buffer to Send - #:for mpi_dir in [1, 2, 3] - if (mpi_dir == ${mpi_dir}$) then - #:if mpi_dir == 1 - !$acc parallel loop collapse(4) gang vector default(present) private(r) - do l = 0, p - do k = 0, n - do j = 0, buff_size - 1 - do i = 1, sys_size - r = (i - 1) + v_size*(j + buff_size*(k + (n + 1)*l)) - q_cons_buff_send(r) = q_cons_vf(i)%sf(j + pack_offset, k, l) - end do - end do - end do - end do - - if (qbmm .and. .not. polytropic) then - !$acc parallel loop collapse(4) gang vector default(present) private(r) - do l = 0, p - do k = 0, n - do j = 0, buff_size - 1 - do i = sys_size + 1, sys_size + 4 - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - (j + buff_size*(k + (n + 1)*l)) - q_cons_buff_send(r) = pb(j + pack_offset, k, l, i - sys_size, q) - end do - end do - end do - end do - end do - - !$acc parallel loop collapse(5) gang vector default(present) private(r) - do l = 0, p - do k = 0, n - do j = 0, buff_size - 1 - do i = sys_size + 1, sys_size + 4 - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - (j + buff_size*(k + (n + 1)*l)) - q_cons_buff_send(r) = mv(j + pack_offset, k, l, i - sys_size, q) - end do - end do - end do - end do - end do - end if - #:elif mpi_dir == 2 - !$acc parallel loop collapse(4) gang vector default(present) private(r) - do i = 1, sys_size - do l = 0, p - do k = 0, buff_size - 1 - do j = -buff_size, m + buff_size - r = (i - 1) + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - (k + buff_size*l)) - q_cons_buff_send(r) = q_cons_vf(i)%sf(j, k + pack_offset, l) - end do - end do - end do - end do - - if (qbmm .and. .not. polytropic) then - !$acc parallel loop collapse(5) gang vector default(present) private(r) - do i = sys_size + 1, sys_size + 4 - do l = 0, p - do k = 0, buff_size - 1 - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - (k + buff_size*l)) - q_cons_buff_send(r) = pb(j, k + pack_offset, l, i - sys_size, q) - end do - end do - end do - end do - end do - - !$acc parallel loop collapse(5) gang vector default(present) private(r) - do i = sys_size + 1, sys_size + 4 - do l = 0, p - do k = 0, buff_size - 1 - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - (k + buff_size*l)) - q_cons_buff_send(r) = mv(j, k + pack_offset, l, i - sys_size, q) - end do - end do - end do - end do - end do - end if - #:else - !$acc parallel loop collapse(4) gang vector default(present) private(r) - do i = 1, sys_size - do l = 0, buff_size - 1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - r = (i - 1) + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)*l)) - q_cons_buff_send(r) = q_cons_vf(i)%sf(j, k, l + pack_offset) - end do - end do - end do - end do - - if (qbmm .and. .not. polytropic) then - !$acc parallel loop collapse(5) gang vector default(present) private(r) - do i = sys_size + 1, sys_size + 4 - do l = 0, buff_size - 1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)*l)) - q_cons_buff_send(r) = pb(j, k, l + pack_offset, i - sys_size, q) - end do - end do - end do - end do - end do - - !$acc parallel loop collapse(5) gang vector default(present) private(r) - do i = sys_size + 1, sys_size + 4 - do l = 0, buff_size - 1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)*l)) - q_cons_buff_send(r) = mv(j, k, l + pack_offset, i - sys_size, q) - end do - end do - end do - end do - end do - end if - #:endif - end if - #:endfor - call nvtxEndRange ! Packbuf + #:block ITERATE_OVER_BUFFER_REGION_SIDED(dir="mpi_dir", loc="-pbc_loc", inner_loops=[("i", 1, "nVars")], pack_v_size="nVars") + c_divs_buff_send(pack_idr) = c_divs_vf(i)%sf(sx, sy, sz) + #:endblock - ! Send/Recv + p_send => c_divs_buff_send(0) + p_recv => c_divs_buff_recv(0) #:for rdma_mpi in [False, True] if (rdma_mpi .eqv. ${'.true.' if rdma_mpi else '.false.'}$) then - p_send => q_cons_buff_send(0) - p_recv => q_cons_buff_recv(0) #:if rdma_mpi !$acc data attach(p_send, p_recv) !$acc host_data use_device(p_send, p_recv) + call nvtxStartRange("RHS-COMM-SENDRECV-RDMA") #:else call nvtxStartRange("RHS-COMM-DEV2HOST") - !$acc update host(q_cons_buff_send, ib_buff_send) + !$acc update host(c_divs_buff_send) call nvtxEndRange + call nvtxStartRange("RHS-COMM-SENDRECV-NO-RMDA") #:endif call MPI_SENDRECV( & - p_send, buffer_count, MPI_DOUBLE_PRECISION, dst_proc, send_tag, & - p_recv, buffer_count, MPI_DOUBLE_PRECISION, src_proc, recv_tag, & + p_send, buffer_counts(mpi_dir), MPI_DOUBLE_PRECISION, neighbor_procs(mpi_dir, -pbc_loc), 0, & + p_recv, buffer_counts(mpi_dir), MPI_DOUBLE_PRECISION, neighbor_procs(mpi_dir, +pbc_loc), 0, & MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + call nvtxEndRange ! RHS-MPI-SENDRECV-(NO)-RDMA #:if rdma_mpi @@ -1108,1243 +405,17 @@ contains !$acc wait #:else call nvtxStartRange("RHS-COMM-HOST2DEV") - !$acc update device(q_cons_buff_recv) + !$acc update device(c_divs_buff_recv) call nvtxEndRange #:endif end if #:endfor - ! Unpack Received Buffer - call nvtxStartRange("RHS-COMM-UNPACKBUF") - #:for mpi_dir in [1, 2, 3] - if (mpi_dir == ${mpi_dir}$) then - #:if mpi_dir == 1 - !$acc parallel loop collapse(4) gang vector default(present) private(r) - do l = 0, p - do k = 0, n - do j = -buff_size, -1 - do i = 1, sys_size - r = (i - 1) + v_size* & - (j + buff_size*((k + 1) + (n + 1)*l)) - q_cons_vf(i)%sf(j + unpack_offset, k, l) = q_cons_buff_recv(r) -#if defined(__INTEL_COMPILER) - if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if -#endif - end do - end do - end do - end do - - if (qbmm .and. .not. polytropic) then - !$acc parallel loop collapse(5) gang vector default(present) private(r) - do l = 0, p - do k = 0, n - do j = -buff_size, -1 - do i = sys_size + 1, sys_size + 4 - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - (j + buff_size*((k + 1) + (n + 1)*l)) - pb(j + unpack_offset, k, l, i - sys_size, q) = q_cons_buff_recv(r) - end do - end do - end do - end do - end do - - !$acc parallel loop collapse(5) gang vector default(present) private(r) - do l = 0, p - do k = 0, n - do j = -buff_size, -1 - do i = sys_size + 1, sys_size + 4 - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - (j + buff_size*((k + 1) + (n + 1)*l)) - mv(j + unpack_offset, k, l, i - sys_size, q) = q_cons_buff_recv(r) - end do - end do - end do - end do - end do - end if - #:elif mpi_dir == 2 - !$acc parallel loop collapse(4) gang vector default(present) private(r) - do i = 1, sys_size - do l = 0, p - do k = -buff_size, -1 - do j = -buff_size, m + buff_size - r = (i - 1) + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + buff_size*l)) - q_cons_vf(i)%sf(j, k + unpack_offset, l) = q_cons_buff_recv(r) -#if defined(__INTEL_COMPILER) - if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if -#endif - end do - end do - end do - end do - - if (qbmm .and. .not. polytropic) then - !$acc parallel loop collapse(5) gang vector default(present) private(r) - do i = sys_size + 1, sys_size + 4 - do l = 0, p - do k = -buff_size, -1 - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + buff_size*l)) - pb(j, k + unpack_offset, l, i - sys_size, q) = q_cons_buff_recv(r) - end do - end do - end do - end do - end do - - !$acc parallel loop collapse(5) gang vector default(present) private(r) - do i = sys_size + 1, sys_size + 4 - do l = 0, p - do k = -buff_size, -1 - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + buff_size*l)) - mv(j, k + unpack_offset, l, i - sys_size, q) = q_cons_buff_recv(r) - end do - end do - end do - end do - end do - end if - #:else - ! Unpacking buffer from bc_z%beg - !$acc parallel loop collapse(4) gang vector default(present) private(r) - do i = 1, sys_size - do l = -buff_size, -1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - r = (i - 1) + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)* & - (l + buff_size))) - q_cons_vf(i)%sf(j, k, l + unpack_offset) = q_cons_buff_recv(r) -#if defined(__INTEL_COMPILER) - if (ieee_is_nan(q_cons_vf(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if -#endif - end do - end do - end do - end do - - if (qbmm .and. .not. polytropic) then - !$acc parallel loop collapse(5) gang vector default(present) private(r) - do i = sys_size + 1, sys_size + 4 - do l = -buff_size, -1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)* & - (l + buff_size))) - pb(j, k, l + unpack_offset, i - sys_size, q) = q_cons_buff_recv(r) - end do - end do - end do - end do - end do - - !$acc parallel loop collapse(5) gang vector default(present) private(r) - do i = sys_size + 1, sys_size + 4 - do l = -buff_size, -1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - do q = 1, nb - r = (i - 1) + (q - 1)*4 + nb*4 + v_size* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)* & - (l + buff_size))) - mv(j, k, l + unpack_offset, i - sys_size, q) = q_cons_buff_recv(r) - end do - end do - end do - end do - end do - end if - #:endif - end if - #:endfor - call nvtxEndRange - -#endif - - end subroutine s_mpi_sendrecv_variables_buffers - - !> The goal of this procedure is to populate the buffers of - !! the cell-average conservative variables by communicating - !! with the neighboring processors. - subroutine s_mpi_sendrecv_ib_buffers(ib_markers, gp_layers) - - type(integer_field), intent(inout) :: ib_markers - integer, intent(in) :: gp_layers - - integer :: i, j, k, l, r !< Generic loop iterators - integer, pointer, dimension(:) :: p_i_send, p_i_recv - -#ifdef MFC_MPI - - if (n > 0) then - if (p > 0) then - @:ALLOCATE(ib_buff_send(0:-1 + gp_layers * & - & (m + 2*gp_layers + 1)* & - & (n + 2*gp_layers + 1)* & - & (p + 2*gp_layers + 1)/ & - & (min(m, n, p) + 2*gp_layers + 1))) - else - @:ALLOCATE(ib_buff_send(0:-1 + gp_layers* & - & (max(m, n) + 2*gp_layers + 1))) + #:block ITERATE_OVER_BUFFER_REGION_SIDED(dir="mpi_dir", loc="pbc_loc", inner_loops=[("i", 1, "nVars")], pack_v_size="nVars") + if (bc_id_sfs(mpi_dir, pbc_loc)%sf(exlhs, eylhs, ezlhs)%type >= 0) then + c_divs_vf(i)%sf(x, y, z) = c_divs_buff_recv(pack_idr) end if - else - @:ALLOCATE(ib_buff_send(0:-1 + gp_layers)) - end if - @:ALLOCATE(ib_buff_recv(0:ubound(ib_buff_send, 1))) - - !nCalls_time = nCalls_time + 1 - - ! MPI Communication in x-direction ================================= - if (bc_x%beg >= 0) then ! PBC at the beginning - - if (bc_x%end >= 0) then ! PBC at the beginning and end - - ! Packing buffer to be sent to bc_x%end - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = 0, p - do k = 0, n - do j = m - gp_layers + 1, m - r = ((j - m - 1) + gp_layers*((k + 1) + (n + 1)*l)) - ib_buff_send(r) = ib_markers%sf(j, k, l) - end do - end do - end do - - !call MPI_Barrier(MPI_COMM_WORLD, ierr) - -#if defined(MFC_OpenACC) - if (rdma_mpi) then - p_i_send => ib_buff_send - p_i_recv => ib_buff_recv - - !$acc data attach(p_i_send, p_i_recv) - !$acc host_data use_device(p_i_send, p_i_recv) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - p_i_send(0), & - gp_layers*(n + 1)*(p + 1), & - MPI_INTEGER, bc_x%end, 0, & - p_i_recv(0), & - gp_layers*(n + 1)*(p + 1), & - MPI_INTEGER, bc_x%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - !$acc end host_data - !$acc end data - !$acc wait - else -#endif - - !$acc update host(ib_buff_send, ib_buff_send) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - ib_buff_send(0), & - gp_layers*(n + 1)*(p + 1), & - MPI_INTEGER, bc_x%end, 0, & - ib_buff_recv(0), & - gp_layers*(n + 1)*(p + 1), & - MPI_INTEGER, bc_x%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - -#if defined(MFC_OpenACC) - end if -#endif - - else ! PBC at the beginning only - - ! Packing buffer to be sent to bc_x%beg - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = 0, p - do k = 0, n - do j = 0, gp_layers - 1 - r = (j + gp_layers*(k + (n + 1)*l)) - ib_buff_send(r) = ib_markers%sf(j, k, l) - end do - end do - end do - - !call MPI_Barrier(MPI_COMM_WORLD, ierr) - -#if defined(MFC_OpenACC) - if (rdma_mpi) then - p_i_send => ib_buff_send - p_i_recv => ib_buff_recv - - !$acc data attach(p_i_send, p_i_recv) - !$acc host_data use_device(p_i_send, p_i_recv) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - p_i_send(0), & - gp_layers*(n + 1)*(p + 1), & - MPI_INTEGER, bc_x%beg, 1, & - p_i_recv(0), & - gp_layers*(n + 1)*(p + 1), & - MPI_INTEGER, bc_x%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - !$acc end host_data - !$acc end data - !$acc wait - else -#endif - !$acc update host(ib_buff_send) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - ib_buff_send(0), & - gp_layers*(n + 1)*(p + 1), & - MPI_INTEGER, bc_x%beg, 1, & - ib_buff_recv(0), & - gp_layers*(n + 1)*(p + 1), & - MPI_INTEGER, bc_x%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - -#if defined(MFC_OpenACC) - end if -#endif - - end if - -#if defined(MFC_OpenACC) - if (rdma_mpi .eqv. .false.) then - !$acc update device(ib_buff_recv) - end if -#endif - - ! Unpacking buffer received from bc_x%beg - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = 0, p - do k = 0, n - do j = -gp_layers, -1 - r = (j + gp_layers*((k + 1) + (n + 1)*l)) - ib_markers%sf(j, k, l) = ib_buff_recv(r) - end do - end do - end do - - end if - - if (bc_x%end >= 0) then ! PBC at the end - - if (bc_x%beg >= 0) then ! PBC at the end and beginning - - !$acc parallel loop collapse(3) gang vector default(present) private(r) - ! Packing buffer to be sent to bc_x%beg - do l = 0, p - do k = 0, n - do j = 0, gp_layers - 1 - r = (j + gp_layers*(k + (n + 1)*l)) - ib_buff_send(r) = ib_markers%sf(j, k, l) - end do - end do - end do - - !call MPI_Barrier(MPI_COMM_WORLD, ierr) - -#if defined(MFC_OpenACC) - if (rdma_mpi) then - p_i_send => ib_buff_send - p_i_recv => ib_buff_recv - - !$acc data attach(p_i_send, p_i_recv) - !$acc host_data use_device(p_i_send, p_i_recv) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - p_i_send(0), & - gp_layers*(n + 1)*(p + 1), & - MPI_INTEGER, bc_x%beg, 1, & - p_i_recv(0), & - gp_layers*(n + 1)*(p + 1), & - MPI_INTEGER, bc_x%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - !$acc end host_data - !$acc end data - !$acc wait - else -#endif - - !$acc update host(ib_buff_send) - call MPI_SENDRECV( & - ib_buff_send(0), & - gp_layers*(n + 1)*(p + 1), & - MPI_INTEGER, bc_x%beg, 1, & - ib_buff_recv(0), & - gp_layers*(n + 1)*(p + 1), & - MPI_INTEGER, bc_x%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - -#if defined(MFC_OpenACC) - end if -#endif - - else ! PBC at the end only - - ! Packing buffer to be sent to bc_x%end - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = 0, p - do k = 0, n - do j = m - gp_layers + 1, m - r = ((j - m - 1) + gp_layers*((k + 1) + (n + 1)*l)) - ib_buff_send(r) = ib_markers%sf(j, k, l) - end do - end do - end do - - !call MPI_Barrier(MPI_COMM_WORLD, ierr) - -#if defined(MFC_OpenACC) - if (rdma_mpi) then - p_i_send => ib_buff_send - p_i_recv => ib_buff_recv - - !$acc data attach(p_i_send, p_i_recv) - !$acc host_data use_device(p_i_send, p_i_recv) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - p_i_send(0), & - gp_layers*(n + 1)*(p + 1), & - MPI_INTEGER, bc_x%end, 0, & - p_i_recv(0), & - gp_layers*(n + 1)*(p + 1), & - MPI_INTEGER, bc_x%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - !$acc end host_data - !$acc end data - !$acc wait - else -#endif - - !$acc update host(ib_buff_send) - - call MPI_SENDRECV( & - ib_buff_send(0), & - gp_layers*(n + 1)*(p + 1), & - MPI_INTEGER, bc_x%end, 0, & - ib_buff_recv(0), & - gp_layers*(n + 1)*(p + 1), & - MPI_INTEGER, bc_x%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - -#if defined(MFC_OpenACC) - end if -#endif - - end if - - if (rdma_mpi .eqv. .false.) then - !$acc update device(ib_buff_recv) - end if - - ! Unpacking buffer received from bc_x%end - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = 0, p - do k = 0, n - do j = m + 1, m + gp_layers - r = ((j - m - 1) + gp_layers*(k + (n + 1)*l)) - ib_markers%sf(j, k, l) = ib_buff_recv(r) - end do - end do - end do - - end if - ! END: MPI Communication in x-direction ============================ - - ! MPI Communication in y-direction ================================= - - if (bc_y%beg >= 0) then ! PBC at the beginning - - if (bc_y%end >= 0) then ! PBC at the beginning and end - - ! Packing buffer to be sent to bc_y%end - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = 0, p - do k = n - gp_layers + 1, n - do j = -gp_layers, m + gp_layers - r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & - ((k - n + gp_layers - 1) + gp_layers*l)) - ib_buff_send(r) = ib_markers%sf(j, k, l) - end do - end do - end do - - !call MPI_Barrier(MPI_COMM_WORLD, ierr) - -#if defined(MFC_OpenACC) - if (rdma_mpi) then - p_i_send => ib_buff_send - p_i_recv => ib_buff_recv - - !$acc data attach(p_i_send, p_i_recv) - !$acc host_data use_device(p_i_send, p_i_recv) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - p_i_send(0), & - gp_layers*(m + 2*gp_layers + 1)*(p + 1), & - MPI_INTEGER, bc_y%end, 0, & - p_i_recv(0), & - gp_layers*(m + 2*gp_layers + 1)*(p + 1), & - MPI_INTEGER, bc_y%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - !$acc end host_data - !$acc end data - !$acc wait - else -#endif - - !$acc update host(ib_buff_send) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - ib_buff_send(0), & - gp_layers*(m + 2*gp_layers + 1)*(p + 1), & - MPI_INTEGER, bc_y%end, 0, & - ib_buff_recv(0), & - gp_layers*(m + 2*gp_layers + 1)*(p + 1), & - MPI_INTEGER, bc_y%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - -#if defined(MFC_OpenACC) - end if -#endif - - else ! PBC at the beginning only - - ! Packing buffer to be sent to bc_y%beg - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = 0, p - do k = 0, gp_layers - 1 - do j = -gp_layers, m + gp_layers - r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & - (k + gp_layers*l)) - ib_buff_send(r) = ib_markers%sf(j, k, l) - end do - end do - end do - - !call MPI_Barrier(MPI_COMM_WORLD, ierr) - -#if defined(MFC_OpenACC) - if (rdma_mpi) then - p_i_send => ib_buff_send - p_i_recv => ib_buff_recv - - !$acc data attach(p_i_send, p_i_recv) - !$acc host_data use_device(p_i_send, p_i_recv) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - p_i_send(0), & - gp_layers*(m + 2*gp_layers + 1)*(p + 1), & - MPI_INTEGER, bc_y%beg, 1, & - p_i_recv(0), & - gp_layers*(m + 2*gp_layers + 1)*(p + 1), & - MPI_INTEGER, bc_y%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - !$acc end host_data - !$acc end data - !$acc wait - else -#endif - - !$acc update host(ib_buff_send) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - ib_buff_send(0), & - gp_layers*(m + 2*gp_layers + 1)*(p + 1), & - MPI_INTEGER, bc_y%beg, 1, & - ib_buff_recv(0), & - gp_layers*(m + 2*gp_layers + 1)*(p + 1), & - MPI_INTEGER, bc_y%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - -#if defined(MFC_OpenACC) - end if -#endif - - end if - -#if defined(MFC_OpenACC) - if (rdma_mpi .eqv. .false.) then - !$acc update device(ib_buff_recv) - end if -#endif - - ! Unpacking buffer received from bc_y%beg - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = 0, p - do k = -gp_layers, -1 - do j = -gp_layers, m + gp_layers - r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & - ((k + gp_layers) + gp_layers*l)) - ib_markers%sf(j, k, l) = ib_buff_recv(r) - end do - end do - end do - - end if - - if (bc_y%end >= 0) then ! PBC at the end - - if (bc_y%beg >= 0) then ! PBC at the end and beginning - - ! Packing buffer to be sent to bc_y%beg - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = 0, p - do k = 0, gp_layers - 1 - do j = -gp_layers, m + gp_layers - r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & - (k + gp_layers*l)) - ib_buff_send(r) = ib_markers%sf(j, k, l) - end do - end do - end do - - !call MPI_Barrier(MPI_COMM_WORLD, ierr) - -#if defined(MFC_OpenACC) - if (rdma_mpi) then - p_i_send => ib_buff_send - p_i_recv => ib_buff_recv - - !$acc data attach(p_i_send, p_i_recv) - !$acc host_data use_device(p_i_send, p_i_recv) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - p_i_send(0), & - gp_layers*(m + 2*gp_layers + 1)*(p + 1), & - MPI_INTEGER, bc_y%beg, 1, & - p_i_recv(0), & - gp_layers*(m + 2*gp_layers + 1)*(p + 1), & - MPI_INTEGER, bc_y%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - !$acc end host_data - !$acc end data - !$acc wait - else -#endif - - !$acc update host(ib_buff_send) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - ib_buff_send(0), & - gp_layers*(m + 2*gp_layers + 1)*(p + 1), & - MPI_INTEGER, bc_y%beg, 1, & - ib_buff_recv(0), & - gp_layers*(m + 2*gp_layers + 1)*(p + 1), & - MPI_INTEGER, bc_y%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - -#if defined(MFC_OpenACC) - end if -#endif - - else ! PBC at the end only - - ! Packing buffer to be sent to bc_y%end - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = 0, p - do k = n - gp_layers + 1, n - do j = -gp_layers, m + gp_layers - r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & - ((k - n + gp_layers - 1) + gp_layers*l)) - ib_buff_send(r) = ib_markers%sf(j, k, l) - end do - end do - end do - - !call MPI_Barrier(MPI_COMM_WORLD, ierr) - -#if defined(MFC_OpenACC) - if (rdma_mpi) then - p_i_send => ib_buff_send - p_i_recv => ib_buff_recv - - !$acc data attach(p_i_send, p_i_recv) - !$acc host_data use_device(p_i_send, p_i_recv) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - p_i_send(0), & - gp_layers*(m + 2*gp_layers + 1)*(p + 1), & - MPI_INTEGER, bc_y%end, 0, & - p_i_recv(0), & - gp_layers*(m + 2*gp_layers + 1)*(p + 1), & - MPI_INTEGER, bc_y%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - !$acc end host_data - !$acc end data - !$acc wait - else -#endif - - !$acc update host(ib_buff_send) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - ib_buff_send(0), & - gp_layers*(m + 2*gp_layers + 1)*(p + 1), & - MPI_INTEGER, bc_y%end, 0, & - ib_buff_recv(0), & - gp_layers*(m + 2*gp_layers + 1)*(p + 1), & - MPI_INTEGER, bc_y%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - -#if defined(MFC_OpenACC) - end if -#endif - - end if - -#if defined(MFC_OpenACC) - if (rdma_mpi .eqv. .false.) then - !$acc update device(ib_buff_recv) - end if -#endif - - ! Unpacking buffer received form bc_y%end - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = 0, p - do k = n + 1, n + gp_layers - do j = -gp_layers, m + gp_layers - r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & - ((k - n - 1) + gp_layers*l)) - ib_markers%sf(j, k, l) = ib_buff_recv(r) - end do - end do - end do - - end if - ! END: MPI Communication in y-direction ============================ - - ! MPI Communication in z-direction ================================= - if (bc_z%beg >= 0) then ! PBC at the beginning - - if (bc_z%end >= 0) then ! PBC at the beginning and end - - ! Packing buffer to be sent to bc_z%end - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = p - gp_layers + 1, p - do k = -gp_layers, n + gp_layers - do j = -gp_layers, m + gp_layers - r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & - ((k + gp_layers) + (n + 2*gp_layers + 1)* & - (l - p + gp_layers - 1))) - ib_buff_send(r) = ib_markers%sf(j, k, l) - end do - end do - end do - - !call MPI_Barrier(MPI_COMM_WORLD, ierr) - -#if defined(MFC_OpenACC) - if (rdma_mpi) then - p_i_send => ib_buff_send - p_i_recv => ib_buff_recv - - !$acc data attach(p_i_send, p_i_recv) - !$acc host_data use_device(p_i_send, p_i_recv) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - p_i_send(0), & - gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), & - MPI_INTEGER, bc_z%end, 0, & - p_i_recv(0), & - gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), & - MPI_INTEGER, bc_z%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - !$acc end host_data - !$acc end data - !$acc wait - else -#endif - - !$acc update host(ib_buff_send) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - ib_buff_send(0), & - gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), & - MPI_INTEGER, bc_z%end, 0, & - ib_buff_recv(0), & - gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), & - MPI_INTEGER, bc_z%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - -#if defined(MFC_OpenACC) - end if -#endif - - else ! PBC at the beginning only - - ! Packing buffer to be sent to bc_z%beg - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = 0, gp_layers - 1 - do k = -gp_layers, n + gp_layers - do j = -gp_layers, m + gp_layers - r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & - ((k + gp_layers) + (n + 2*gp_layers + 1)*l)) - ib_buff_send(r) = ib_markers%sf(j, k, l) - end do - end do - end do - - !call MPI_Barrier(MPI_COMM_WORLD, ierr) - -#if defined(MFC_OpenACC) - if (rdma_mpi) then - p_i_send => ib_buff_send - p_i_recv => ib_buff_recv - - !$acc data attach(p_i_send, p_i_recv) - !$acc host_data use_device(p_i_send, p_i_recv) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - p_i_send(0), & - gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), & - MPI_INTEGER, bc_z%beg, 1, & - p_i_recv(0), & - gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), & - MPI_INTEGER, bc_z%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - !$acc end host_data - !$acc end data - !$acc wait - else -#endif - - !$acc update host(ib_buff_send) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - ib_buff_send(0), & - gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), & - MPI_INTEGER, bc_z%beg, 1, & - ib_buff_recv(0), & - gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), & - MPI_INTEGER, bc_z%beg, 0, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - -#if defined(MFC_OpenACC) - end if -#endif - - end if - -#if defined(MFC_OpenACC) - if (rdma_mpi .eqv. .false.) then - !$acc update device(ib_buff_recv) - end if -#endif - - ! Unpacking buffer from bc_z%beg - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = -gp_layers, -1 - do k = -gp_layers, n + gp_layers - do j = -gp_layers, m + gp_layers - r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & - ((k + gp_layers) + (n + 2*gp_layers + 1)* & - (l + gp_layers))) - ib_markers%sf(j, k, l) = ib_buff_recv(r) - end do - end do - end do - - end if - - if (bc_z%end >= 0) then ! PBC at the end - - if (bc_z%beg >= 0) then ! PBC at the end and beginning - - ! Packing buffer to be sent to bc_z%beg - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = 0, gp_layers - 1 - do k = -gp_layers, n + gp_layers - do j = -gp_layers, m + gp_layers - r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & - ((k + gp_layers) + (n + 2*gp_layers + 1)*l)) - ib_buff_send(r) = ib_markers%sf(j, k, l) - end do - end do - end do - - !call MPI_Barrier(MPI_COMM_WORLD, ierr) - -#if defined(MFC_OpenACC) - if (rdma_mpi) then - p_i_send => ib_buff_send - p_i_recv => ib_buff_recv - - !$acc data attach(p_i_send, p_i_recv) - !$acc host_data use_device(p_i_send, p_i_recv) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - p_i_send(0), & - gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), & - MPI_INTEGER, bc_z%beg, 1, & - p_i_recv(0), & - gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), & - MPI_INTEGER, bc_z%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - !$acc end host_data - !$acc end data - !$acc wait - else -#endif - !$acc update host(ib_buff_send) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - ib_buff_send(0), & - gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), & - MPI_INTEGER, bc_z%beg, 1, & - ib_buff_recv(0), & - gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), & - MPI_INTEGER, bc_z%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - -#if defined(MFC_OpenACC) - end if -#endif - - else ! PBC at the end only - - ! Packing buffer to be sent to bc_z%end - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = p - gp_layers + 1, p - do k = -gp_layers, n + gp_layers - do j = -gp_layers, m + gp_layers - r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & - ((k + gp_layers) + (n + 2*gp_layers + 1)* & - (l - p + gp_layers - 1))) - ib_buff_send(r) = ib_markers%sf(j, k, l) - end do - end do - end do - - !call MPI_Barrier(MPI_COMM_WORLD, ierr) - -#if defined(MFC_OpenACC) - if (rdma_mpi) then - p_i_send => ib_buff_send - p_i_recv => ib_buff_recv - - !$acc data attach(p_i_send, p_i_recv) - !$acc host_data use_device(p_i_send, p_i_recv) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - p_i_send(0), & - gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), & - MPI_INTEGER, bc_z%end, 0, & - p_i_recv(0), & - gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), & - MPI_INTEGER, bc_z%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - !$acc end host_data - !$acc end data - !$acc wait - else -#endif - !$acc update host(ib_buff_send) - - ! Send/receive buffer to/from bc_x%end/bc_x%beg - call MPI_SENDRECV( & - ib_buff_send(0), & - gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), & - MPI_INTEGER, bc_z%end, 0, & - ib_buff_recv(0), & - gp_layers*(m + 2*gp_layers + 1)*(n + 2*gp_layers + 1), & - MPI_INTEGER, bc_z%end, 1, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - -#if defined(MFC_OpenACC) - end if -#endif - - end if - -#if defined(MFC_OpenACC) - if (rdma_mpi .eqv. .false.) then - !$acc update device(ib_buff_recv) - end if -#endif - - ! Unpacking buffer received from bc_z%end - !$acc parallel loop collapse(3) gang vector default(present) private(r) - do l = p + 1, p + gp_layers - do k = -gp_layers, n + gp_layers - do j = -gp_layers, m + gp_layers - r = ((j + gp_layers) + (m + 2*gp_layers + 1)* & - ((k + gp_layers) + (n + 2*gp_layers + 1)* & - (l - p - 1))) - ib_markers%sf(j, k, l) = ib_buff_recv(r) - end do - end do - end do - - end if - - ! END: MPI Communication in z-direction ============================ - -#endif - - end subroutine s_mpi_sendrecv_ib_buffers - - subroutine s_mpi_sendrecv_capilary_variables_buffers(c_divs_vf, mpi_dir, pbc_loc) - - type(scalar_field), dimension(num_dims + 1), intent(inout) :: c_divs_vf - integer, intent(in) :: mpi_dir, pbc_loc - - integer :: i, j, k, l, r, q !< Generic loop iterators - - integer :: buffer_counts(1:3), buffer_count - - type(int_bounds_info) :: boundary_conditions(1:3) - integer :: beg_end(1:2), grid_dims(1:3) - integer :: dst_proc, src_proc, recv_tag, send_tag - - logical :: beg_end_geq_0 - - integer :: pack_offset, unpack_offset - real(kind(0d0)), pointer :: p_send, p_recv - -#ifdef MFC_MPI - - nVars = num_dims + 1 - !$acc update device(nVars) - - buffer_counts = (/ & - buff_size*nVars*(n + 1)*(p + 1), & - buff_size*nVars*(m + 2*buff_size + 1)*(p + 1), & - buff_size*nVars*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) & - /) - - buffer_count = buffer_counts(mpi_dir) - boundary_conditions = (/bc_x, bc_y, bc_z/) - beg_end = (/boundary_conditions(mpi_dir)%beg, boundary_conditions(mpi_dir)%end/) - beg_end_geq_0 = beg_end(max(pbc_loc, 0) - pbc_loc + 1) >= 0 - - ! Implements: - ! pbc_loc bc_x >= 0 -> [send/recv]_tag [dst/src]_proc - ! -1 (=0) 0 -> [1,0] [0,0] | 0 0 [1,0] [beg,beg] - ! -1 (=0) 1 -> [0,0] [1,0] | 0 1 [0,0] [end,beg] - ! +1 (=1) 0 -> [0,1] [1,1] | 1 0 [0,1] [end,end] - ! +1 (=1) 1 -> [1,1] [0,1] | 1 1 [1,1] [beg,end] - - send_tag = f_logical_to_int(.not. f_xor(beg_end_geq_0, pbc_loc == 1)) - recv_tag = f_logical_to_int(pbc_loc == 1) - - dst_proc = beg_end(1 + f_logical_to_int(f_xor(pbc_loc == 1, beg_end_geq_0))) - src_proc = beg_end(1 + f_logical_to_int(pbc_loc == 1)) - - grid_dims = (/m, n, p/) - - pack_offset = 0 - if (f_xor(pbc_loc == 1, beg_end_geq_0)) then - pack_offset = grid_dims(mpi_dir) - buff_size + 1 - end if - - unpack_offset = 0 - if (pbc_loc == 1) then - unpack_offset = grid_dims(mpi_dir) + buff_size + 1 - end if - - ! Pack Buffer to Send - #:for mpi_dir in [1, 2, 3] - if (mpi_dir == ${mpi_dir}$) then - #:if mpi_dir == 1 - !$acc parallel loop collapse(4) gang vector default(present) private(r) - do l = 0, p - do k = 0, n - do j = 0, buff_size - 1 - do i = 1, nVars - r = (i - 1) + nVars*(j + buff_size*(k + (n + 1)*l)) - c_divs_buff_send(r) = c_divs_vf(i)%sf(j + pack_offset, k, l) - end do - end do - end do - end do - - #:elif mpi_dir == 2 - !$acc parallel loop collapse(4) gang vector default(present) private(r) - do i = 1, nVars - do l = 0, p - do k = 0, buff_size - 1 - do j = -buff_size, m + buff_size - r = (i - 1) + nVars* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - (k + buff_size*l)) - c_divs_buff_send(r) = c_divs_vf(i)%sf(j, k + pack_offset, l) - end do - end do - end do - end do - - #:else - !$acc parallel loop collapse(4) gang vector default(present) private(r) - do i = 1, nVars - do l = 0, buff_size - 1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - r = (i - 1) + nVars* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)*l)) - c_divs_buff_send(r) = c_divs_vf(i)%sf(j, k, l + pack_offset) - end do - end do - end do - end do - #:endif - end if - #:endfor - - ! Send/Recv - #:for rdma_mpi in [False, True] - if (rdma_mpi .eqv. ${'.true.' if rdma_mpi else '.false.'}$) then - p_send => c_divs_buff_send(0) - p_recv => c_divs_buff_recv(0) - - #:if rdma_mpi - !$acc data attach(p_send, p_recv) - !$acc host_data use_device(p_send, p_recv) - #:else - !$acc update host(c_divs_buff_send) - #:endif - - call MPI_SENDRECV( & - p_send, buffer_count, MPI_DOUBLE_PRECISION, dst_proc, send_tag, & - p_recv, buffer_count, MPI_DOUBLE_PRECISION, src_proc, recv_tag, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - - #:if rdma_mpi - !$acc end host_data - !$acc end data - !$acc wait - #:else - !$acc update device(c_divs_buff_recv) - #:endif - end if - #:endfor - - ! Unpack Received Buffer - #:for mpi_dir in [1, 2, 3] - if (mpi_dir == ${mpi_dir}$) then - #:if mpi_dir == 1 - !$acc parallel loop collapse(4) gang vector default(present) private(r) - do l = 0, p - do k = 0, n - do j = -buff_size, -1 - do i = 1, nVars - r = (i - 1) + nVars* & - (j + buff_size*((k + 1) + (n + 1)*l)) - c_divs_vf(i)%sf(j + unpack_offset, k, l) = c_divs_buff_recv(r) -#if defined(__INTEL_COMPILER) - if (ieee_is_nan(c_divs_vf(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if -#endif - end do - end do - end do - end do - - #:elif mpi_dir == 2 - !$acc parallel loop collapse(4) gang vector default(present) private(r) - do i = 1, nVars - do l = 0, p - do k = -buff_size, -1 - do j = -buff_size, m + buff_size - r = (i - 1) + nVars* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + buff_size*l)) - c_divs_vf(i)%sf(j, k + unpack_offset, l) = c_divs_buff_recv(r) -#if defined(__INTEL_COMPILER) - if (ieee_is_nan(c_divs_vf(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if -#endif - end do - end do - end do - end do - - #:else - ! Unpacking buffer from bc_z%beg - !$acc parallel loop collapse(4) gang vector default(present) private(r) - do i = 1, nVars - do l = -buff_size, -1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - r = (i - 1) + nVars* & - ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)* & - (l + buff_size))) - c_divs_vf(i)%sf(j, k, l + unpack_offset) = c_divs_buff_recv(r) -#if defined(__INTEL_COMPILER) - if (ieee_is_nan(c_divs_vf(i)%sf(j, k, l))) then - print *, "Error", j, k, l, i - error stop "NaN(s) in recv" - end if -#endif - end do - end do - end do - end do - - #:endif - end if - #:endfor + #:endblock #endif @@ -2363,8 +434,6 @@ contains #ifdef MFC_MPI - ! Deallocating q_cons_buff_send and q_cons_buff_recv - @:DEALLOCATE(q_cons_buff_send, q_cons_buff_recv) if (ib) then @:DEALLOCATE(ib_buff_send, ib_buff_recv) end if diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp index 389847dd7..1d6c20f3f 100644 --- a/src/simulation/m_rhs.fpp +++ b/src/simulation/m_rhs.fpp @@ -670,7 +670,7 @@ contains call nvtxEndRange call nvtxStartRange("RHS-COMMUNICATION") - call s_populate_variables_buffers(q_prim_qp%vf, pb, mv) + call s_populate_prim_buffers(q_prim_qp%vf, pb, mv) call nvtxEndRange if (cfl_dt) then diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index 4ecb6127c..cabc2f5de 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -61,6 +61,8 @@ module m_start_up use m_helper_basic !< Functions to compare floating point numbers + use m_boundary_conditions_common + #ifdef MFC_OpenACC use openacc #endif @@ -78,6 +80,8 @@ module m_start_up use m_surface_tension use m_body_forces + + use m_boundary_conditions ! ========================================================================== implicit none @@ -87,7 +91,7 @@ module m_start_up s_read_data_files, & s_read_serial_data_files, & s_read_parallel_data_files, & - s_populate_grid_variables_buffers, & + s_mpi_sendrecv_grid_spacing_buffers, & s_initialize_internal_energy_equations, & s_initialize_modules, s_initialize_gpu_vars, & s_initialize_mpi_domain, s_finalize_modules, & @@ -141,6 +145,7 @@ contains teno_CT, mp_weno, weno_avg, & riemann_solver, low_Mach, wave_speeds, avg_state, & bc_x, bc_y, bc_z, & + num_bc_patches, patch_bc, & x_domain, y_domain, z_domain, & hypoelasticity, & ib, num_ibs, patch_ib, & @@ -252,6 +257,8 @@ contains integer :: i, r !< Generic loop iterator + integer :: iter_dir, iter_loc + ! Confirming that the directory from which the initial condition and ! the grid data files are to be read in exists and exiting otherwise if (cfl_dt) then @@ -269,6 +276,8 @@ contains call s_mpi_abort(trim(file_path)//' is missing. Exiting ...') end if + call s_read_boundary_condition_files(t_step_dir) + ! Cell-boundary Locations in x-direction =========================== file_path = trim(t_step_dir)//'/x_cb.dat' @@ -502,6 +511,7 @@ contains logical :: file_exist character(len=10) :: t_step_start_string + character(len=path_len) :: step_dirpath integer :: i, j @@ -509,8 +519,10 @@ contains allocate (y_cb_glb(-1:n_glb)) allocate (z_cb_glb(-1:p_glb)) + step_dirpath = trim(case_dir)//'/restart_data' + ! Read in cell boundary locations in x-direction - file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//'x_cb.dat' + file_loc = trim(step_dirpath)//trim(mpiiofs)//'x_cb.dat' inquire (FILE=trim(file_loc), EXIST=file_exist) if (file_exist) then @@ -541,7 +553,7 @@ contains if (n > 0) then ! Read in cell boundary locations in y-direction - file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//'y_cb.dat' + file_loc = trim(step_dirpath)//trim(mpiiofs)//'y_cb.dat' inquire (FILE=trim(file_loc), EXIST=file_exist) if (file_exist) then @@ -584,6 +596,8 @@ contains end if end if + call s_read_boundary_condition_files(trim(step_dirpath)) + if (file_per_process) then if (cfl_dt) then call s_int_to_str(n_start, t_step_start_string) @@ -592,7 +606,7 @@ contains call s_int_to_str(t_step_start, t_step_start_string) write (file_loc, '(I0,A1,I7.7,A)') t_step_start, '_', proc_rank, '.dat' end if - file_loc = trim(case_dir)//'/restart_data/lustre_'//trim(t_step_start_string)//trim(mpiiofs)//trim(file_loc) + file_loc = trim(step_dirpath)//'/lustre_'//trim(t_step_start_string)//trim(mpiiofs)//trim(file_loc) inquire (FILE=trim(file_loc), EXIST=file_exist) if (file_exist) then @@ -653,7 +667,7 @@ contains if (ib) then ! Read IB Markers write (file_loc, '(A)') 'ib.dat' - file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc) + file_loc = trim(step_dirpath)//trim(mpiiofs)//trim(file_loc) inquire (FILE=trim(file_loc), EXIST=file_exist) if (file_exist) then @@ -673,7 +687,7 @@ contains ! Read Levelset write (file_loc, '(A)') 'levelset.dat' - file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc) + file_loc = trim(step_dirpath)//trim(mpiiofs)//trim(file_loc) inquire (FILE=trim(file_loc), EXIST=file_exist) if (file_exist) then @@ -693,7 +707,7 @@ contains ! Read Levelset Norm write (file_loc, '(A)') 'levelset_norm.dat' - file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc) + file_loc = trim(step_dirpath)//trim(mpiiofs)//trim(file_loc) inquire (FILE=trim(file_loc), EXIST=file_exist) if (file_exist) then @@ -724,7 +738,7 @@ contains else write (file_loc, '(I0,A)') t_step_start, '.dat' end if - file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc) + file_loc = trim(step_dirpath)//trim(mpiiofs)//trim(file_loc) inquire (FILE=trim(file_loc), EXIST=file_exist) if (file_exist) then @@ -803,7 +817,7 @@ contains ! Read IB Markers write (file_loc, '(A)') 'ib.dat' - file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc) + file_loc = trim(step_dirpath)//trim(mpiiofs)//trim(file_loc) inquire (FILE=trim(file_loc), EXIST=file_exist) if (file_exist) then @@ -823,7 +837,7 @@ contains ! Read Levelset write (file_loc, '(A)') 'levelset.dat' - file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc) + file_loc = trim(step_dirpath)//trim(mpiiofs)//trim(file_loc) inquire (FILE=trim(file_loc), EXIST=file_exist) if (file_exist) then @@ -834,7 +848,7 @@ contains call MPI_FILE_SET_VIEW(ifile, disp, MPI_DOUBLE_PRECISION, MPI_IO_levelset_DATA%view, & 'native', mpi_info_int, ierr) - call MPI_FILE_READ(ifile, MPI_IO_levelset_DATA%var%sf, data_size, & + call MPI_FILE_READ(ifile, MPI_IO_levelset_DATA%var%sf, data_size*num_ibs, & MPI_DOUBLE_PRECISION, status, ierr) else @@ -843,7 +857,7 @@ contains ! Read Levelset Norm write (file_loc, '(A)') 'levelset_norm.dat' - file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc) + file_loc = trim(step_dirpath)//trim(mpiiofs)//trim(file_loc) inquire (FILE=trim(file_loc), EXIST=file_exist) if (file_exist) then @@ -880,7 +894,7 @@ contains allocate (airfoil_grid_l(1:Np)) write (file_loc, '(A)') 'airfoil_l.dat' - file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc) + file_loc = trim(step_dirpath)//trim(mpiiofs)//trim(file_loc) inquire (FILE=trim(file_loc), EXIST=file_exist) if (file_exist) then @@ -897,7 +911,7 @@ contains end if write (file_loc, '(A)') 'airfoil_u.dat' - file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc) + file_loc = trim(step_dirpath)//trim(mpiiofs)//trim(file_loc) inquire (FILE=trim(file_loc), EXIST=file_exist) if (file_exist) then @@ -932,218 +946,6 @@ contains end subroutine s_read_parallel_data_files - !> The purpose of this subroutine is to populate the buffers - !! of the grid variables, which are constituted of the cell- - !! boundary locations and cell-width distributions, based on - !! the boundary conditions. - subroutine s_populate_grid_variables_buffers - - integer :: i !< Generic loop iterator - - ! Population of Buffers in x-direction ============================= - - ! Populating cell-width distribution buffer, at the beginning of the - ! coordinate direction, based on the selected boundary condition. In - ! order, these are the ghost-cell extrapolation, symmetry, periodic, - ! and processor boundary conditions. - if (bc_x%beg <= -3) then - do i = 1, buff_size - dx(-i) = dx(0) - end do - elseif (bc_x%beg == -2) then - do i = 1, buff_size - dx(-i) = dx(i - 1) - end do - elseif (bc_x%beg == -1) then - do i = 1, buff_size - dx(-i) = dx(m - (i - 1)) - end do - else - call s_mpi_sendrecv_grid_variables_buffers(1, -1) - end if - - ! Computing the cell-boundary locations buffer, at the beginning of - ! the coordinate direction, from the cell-width distribution buffer - do i = 1, buff_size - x_cb(-1 - i) = x_cb(-i) - dx(-i) - end do - ! Computing the cell-center locations buffer, at the beginning of - ! the coordinate direction, from the cell-width distribution buffer - do i = 1, buff_size - x_cc(-i) = x_cc(1 - i) - (dx(1 - i) + dx(-i))/2d0 - end do - - ! Populating the cell-width distribution buffer, at the end of the - ! coordinate direction, based on desired boundary condition. These - ! include, in order, ghost-cell extrapolation, symmetry, periodic, - ! and processor boundary conditions. - if (bc_x%end <= -3) then - do i = 1, buff_size - dx(m + i) = dx(m) - end do - elseif (bc_x%end == -2) then - do i = 1, buff_size - dx(m + i) = dx(m - (i - 1)) - end do - elseif (bc_x%end == -1) then - do i = 1, buff_size - dx(m + i) = dx(i - 1) - end do - else - call s_mpi_sendrecv_grid_variables_buffers(1, 1) - end if - - ! Populating the cell-boundary locations buffer, at the end of the - ! coordinate direction, from buffer of the cell-width distribution - do i = 1, buff_size - x_cb(m + i) = x_cb(m + (i - 1)) + dx(m + i) - end do - ! Populating the cell-center locations buffer, at the end of the - ! coordinate direction, from buffer of the cell-width distribution - do i = 1, buff_size - x_cc(m + i) = x_cc(m + (i - 1)) + (dx(m + (i - 1)) + dx(m + i))/2d0 - end do - - ! END: Population of Buffers in x-direction ======================== - - ! Population of Buffers in y-direction ============================= - - ! Populating cell-width distribution buffer, at the beginning of the - ! coordinate direction, based on the selected boundary condition. In - ! order, these are the ghost-cell extrapolation, symmetry, periodic, - ! and processor boundary conditions. - if (n == 0) then - return - elseif (bc_y%beg <= -3 .and. bc_y%beg /= -14) then - do i = 1, buff_size - dy(-i) = dy(0) - end do - elseif (bc_y%beg == -2 .or. bc_y%beg == -14) then - do i = 1, buff_size - dy(-i) = dy(i - 1) - end do - elseif (bc_y%beg == -1) then - do i = 1, buff_size - dy(-i) = dy(n - (i - 1)) - end do - else - call s_mpi_sendrecv_grid_variables_buffers(2, -1) - end if - - ! Computing the cell-boundary locations buffer, at the beginning of - ! the coordinate direction, from the cell-width distribution buffer - do i = 1, buff_size - y_cb(-1 - i) = y_cb(-i) - dy(-i) - end do - ! Computing the cell-center locations buffer, at the beginning of - ! the coordinate direction, from the cell-width distribution buffer - do i = 1, buff_size - y_cc(-i) = y_cc(1 - i) - (dy(1 - i) + dy(-i))/2d0 - end do - - ! Populating the cell-width distribution buffer, at the end of the - ! coordinate direction, based on desired boundary condition. These - ! include, in order, ghost-cell extrapolation, symmetry, periodic, - ! and processor boundary conditions. - if (bc_y%end <= -3) then - do i = 1, buff_size - dy(n + i) = dy(n) - end do - elseif (bc_y%end == -2) then - do i = 1, buff_size - dy(n + i) = dy(n - (i - 1)) - end do - elseif (bc_y%end == -1) then - do i = 1, buff_size - dy(n + i) = dy(i - 1) - end do - else - call s_mpi_sendrecv_grid_variables_buffers(2, 1) - end if - - ! Populating the cell-boundary locations buffer, at the end of the - ! coordinate direction, from buffer of the cell-width distribution - do i = 1, buff_size - y_cb(n + i) = y_cb(n + (i - 1)) + dy(n + i) - end do - ! Populating the cell-center locations buffer, at the end of the - ! coordinate direction, from buffer of the cell-width distribution - do i = 1, buff_size - y_cc(n + i) = y_cc(n + (i - 1)) + (dy(n + (i - 1)) + dy(n + i))/2d0 - end do - - ! END: Population of Buffers in y-direction ======================== - - ! Population of Buffers in z-direction ============================= - - ! Populating cell-width distribution buffer, at the beginning of the - ! coordinate direction, based on the selected boundary condition. In - ! order, these are the ghost-cell extrapolation, symmetry, periodic, - ! and processor boundary conditions. - if (p == 0) then - return - elseif (bc_z%beg <= -3) then - do i = 1, buff_size - dz(-i) = dz(0) - end do - elseif (bc_z%beg == -2) then - do i = 1, buff_size - dz(-i) = dz(i - 1) - end do - elseif (bc_z%beg == -1) then - do i = 1, buff_size - dz(-i) = dz(p - (i - 1)) - end do - else - call s_mpi_sendrecv_grid_variables_buffers(3, -1) - end if - - ! Computing the cell-boundary locations buffer, at the beginning of - ! the coordinate direction, from the cell-width distribution buffer - do i = 1, buff_size - z_cb(-1 - i) = z_cb(-i) - dz(-i) - end do - ! Computing the cell-center locations buffer, at the beginning of - ! the coordinate direction, from the cell-width distribution buffer - do i = 1, buff_size - z_cc(-i) = z_cc(1 - i) - (dz(1 - i) + dz(-i))/2d0 - end do - - ! Populating the cell-width distribution buffer, at the end of the - ! coordinate direction, based on desired boundary condition. These - ! include, in order, ghost-cell extrapolation, symmetry, periodic, - ! and processor boundary conditions. - if (bc_z%end <= -3) then - do i = 1, buff_size - dz(p + i) = dz(p) - end do - elseif (bc_z%end == -2) then - do i = 1, buff_size - dz(p + i) = dz(p - (i - 1)) - end do - elseif (bc_z%end == -1) then - do i = 1, buff_size - dz(p + i) = dz(i - 1) - end do - else - call s_mpi_sendrecv_grid_variables_buffers(3, 1) - end if - - ! Populating the cell-boundary locations buffer, at the end of the - ! coordinate direction, from buffer of the cell-width distribution - do i = 1, buff_size - z_cb(p + i) = z_cb(p + (i - 1)) + dz(p + i) - end do - ! Populating the cell-center locations buffer, at the end of the - ! coordinate direction, from buffer of the cell-width distribution - do i = 1, buff_size - z_cc(p + i) = z_cc(p + (i - 1)) + (dz(p + (i - 1)) + dz(p + i))/2d0 - end do - - ! END: Population of Buffers in z-direction ======================== - - end subroutine s_populate_grid_variables_buffers - !> The purpose of this procedure is to initialize the !! values of the internal-energy equations of each phase !! from the mass of each phase, the mixture momentum and @@ -1398,6 +1200,7 @@ contains subroutine s_initialize_modules call s_initialize_global_parameters_module() + !Quadrature weights and nodes for polydisperse simulations if (bubbles .and. nb > 1 .and. R0_type == 1) then call s_simpson @@ -1417,6 +1220,7 @@ contains call acc_present_dump() #endif + call s_initialize_mpi_common_module() call s_initialize_mpi_proxy_module() call s_initialize_variables_conversion_module() if (grid_geometry == 3) call s_initialize_fftw_module() @@ -1458,6 +1262,8 @@ contains call acc_present_dump() #endif + call s_initialize_boundary_conditions_module() + ! Reading in the user provided initial condition and grid data call s_read_data_files(q_cons_ts(1)%vf) @@ -1467,7 +1273,7 @@ contains if (acoustic_source) call s_precalculate_acoustic_spatial_sources() ! Populating the buffers of the grid variables using the boundary conditions - call s_populate_grid_variables_buffers() + call s_mpi_sendrecv_grid_spacing_buffers(bc_id_sfs) ! Computation of parameters, allocation of memory, association of pointers, ! and/or execution of any other tasks that are needed to properly configure @@ -1573,11 +1379,10 @@ contains !$acc update device(sigma, surface_tension) !$acc update device(dx, dy, dz, x_cb, x_cc, y_cb, y_cc, z_cb, z_cc) - - !$acc update device(bc_x%vb1, bc_x%vb2, bc_x%vb3, bc_x%ve1, bc_x%ve2, bc_x%ve3) - !$acc update device(bc_y%vb1, bc_y%vb2, bc_y%vb3, bc_y%ve1, bc_y%ve2, bc_y%ve3) - !$acc update device(bc_z%vb1, bc_z%vb2, bc_z%vb3, bc_z%ve1, bc_z%ve2, bc_z%ve3) + !$acc update device(bc_x%vel_beg, bc_x%vel_end) + !$acc update device(bc_y%vel_beg, bc_y%vel_end) + !$acc update device(bc_z%vel_beg, bc_z%vel_end) !$acc update device(bc_x%grcbc_in, bc_x%grcbc_out, bc_x%grcbc_vel_out) !$acc update device(bc_y%grcbc_in, bc_y%grcbc_out, bc_y%grcbc_vel_out) !$acc update device(bc_z%grcbc_in, bc_z%grcbc_out, bc_z%grcbc_vel_out) @@ -1605,6 +1410,7 @@ contains call s_finalize_variables_conversion_module() if (grid_geometry == 3) call s_finalize_fftw_module call s_finalize_mpi_proxy_module() + call s_finalize_mpi_common_module() call s_finalize_global_parameters_module() if (relax) call s_finalize_relaxation_solver_module() if (viscous) then diff --git a/src/simulation/m_viscous.fpp b/src/simulation/m_viscous.fpp index afe909ca5..75d0b91d1 100644 --- a/src/simulation/m_viscous.fpp +++ b/src/simulation/m_viscous.fpp @@ -16,6 +16,8 @@ module m_viscous use m_helper use m_finite_differences + + use m_boundary_conditions_common ! ========================================================================== private; public s_get_viscous, & @@ -1303,7 +1305,8 @@ contains type(scalar_field), intent(inout) :: grad_z type(int_bounds_info) :: ix, iy, iz - integer :: j, k, l !< Generic loop iterators + @:BOUNDARY_CONDITION_INTEGER_DECLARATIONS() + integer :: iter_loc ix%beg = 1 - buff_size; ix%end = m + buff_size - 1 if (n > 0) then @@ -1397,66 +1400,31 @@ contains end if end if - if (bc_x%beg <= -3) then - !$acc parallel loop collapse(2) gang vector default(present) - do l = idwbuff(3)%beg, idwbuff(3)%end - do k = idwbuff(2)%beg, idwbuff(2)%end - grad_x%sf(0, k, l) = (-3d0*var%sf(0, k, l) + 4d0*var%sf(1, k, l) - var%sf(2, k, l))/ & - (x_cc(2) - x_cc(0)) - end do - end do - end if - if (bc_x%end <= -3) then - !$acc parallel loop collapse(2) gang vector default(present) - do l = idwbuff(3)%beg, idwbuff(3)%end - do k = idwbuff(2)%beg, idwbuff(2)%end - grad_x%sf(m, k, l) = (3d0*var%sf(m, k, l) - 4d0*var%sf(m - 1, k, l) + var%sf(m - 2, k, l))/ & - (x_cc(m) - x_cc(m - 2)) - end do - end do - end if - if (n > 0) then - if (bc_y%beg <= -3 .and. bc_y%beg /= -13) then - !$acc parallel loop collapse(2) gang vector default(present) - do l = idwbuff(3)%beg, idwbuff(3)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - grad_y%sf(j, 0, l) = (-3d0*var%sf(j, 0, l) + 4d0*var%sf(j, 1, l) - var%sf(j, 2, l))/ & - (y_cc(2) - y_cc(0)) - end do - end do - end if - if (bc_y%end <= -3) then - !$acc parallel loop collapse(2) gang vector default(present) - do l = idwbuff(3)%beg, idwbuff(3)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - grad_y%sf(j, n, l) = (3d0*var%sf(j, n, l) - 4d0*var%sf(j, n - 1, l) + var%sf(j, n - 2, l))/ & - (y_cc(n) - y_cc(n - 2)) - end do - end do + #:for dir, cmp in zip([1, 2, 3], ['x', 'y', 'z']) + if (${dir}$ <= num_dims .and. any(bc_id_has_bc(${dir}$, -1, :-3))) then + #:block ITERATE_OVER_BUFFER_REGION_SIDED(dir=dir, loc="-1", thickness=1, pack_v_size=1) + if (bc_id_sfs(${dir}$, -1)%sf(exlhs, eylhs, ezlhs)%type <= -13 .and. bc_id_sfs(${dir}$, -1)%sf(exlhs, eylhs, ezlhs)%type /= -13) then + grad_${cmp}$%sf(ex, ey, ez) = & + (iter_loc*3d0*var%sf(ex, ey, ez) - & + iter_loc*4d0*var%sf(ex - locx, ey - locy, ez - locz) + & + iter_loc*var%sf(ex - 2*locx, ey - 2*locy, ez - 2*locz))/ & + (${cmp}$_cc(2) - ${cmp}$_cc(0)) + end if + #:endblock end if - if (p > 0) then - if (bc_z%beg <= -3) then - !$acc parallel loop collapse(2) gang vector default(present) - do k = idwbuff(2)%beg, idwbuff(2)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - grad_z%sf(j, k, 0) = & - (-3d0*var%sf(j, k, 0) + 4d0*var%sf(j, k, 1) - var%sf(j, k, 2))/ & - (z_cc(2) - z_cc(0)) - end do - end do - end if - if (bc_z%end <= -3) then - !$acc parallel loop collapse(2) gang vector default(present) - do k = idwbuff(2)%beg, idwbuff(2)%end - do j = idwbuff(1)%beg, idwbuff(1)%end - grad_z%sf(j, k, p) = & - (3d0*var%sf(j, k, p) - 4d0*var%sf(j, k, p - 1) + var%sf(j, k, p - 2))/ & - (z_cc(p) - z_cc(p - 2)) - end do - end do - end if + + if (${dir}$ <= num_dims .and. any(bc_id_has_bc(${dir}$, +1, :-3))) then + #:block ITERATE_OVER_BUFFER_REGION_SIDED(dir=dir, loc="+1", thickness=1, pack_v_size=1) + if (bc_id_sfs(${dir}$, +1)%sf(exlhs, eylhs, ezlhs)%type <= -13 .and. bc_id_sfs(${dir}$, -1)%sf(exlhs, eylhs, ezlhs)%type /= -13) then + grad_${cmp}$%sf(ex, ey, ez) = & + (-iter_loc*3d0*var%sf(ex, ey, ez) + & + iter_loc*4d0*var%sf(ex - locx, ey - locy, ez - locz) - & + iter_loc*var%sf(ex - 2*locx, ey - 2*locy, ez - 2*locz))/ & + (${cmp}$_cc(e${cmp}$) - ${cmp}$_cc(e${cmp}$-2)) + end if + #:endblock end if - end if + #:endfor end subroutine s_compute_fd_gradient diff --git a/src/simulation/m_weno.fpp b/src/simulation/m_weno.fpp index 38f659a8e..962cab30c 100644 --- a/src/simulation/m_weno.fpp +++ b/src/simulation/m_weno.fpp @@ -96,10 +96,6 @@ module m_weno ! END: WENO Coefficients =================================================== - integer :: v_size !< Number of WENO-reconstructed cell-average variables - - !$acc declare create(v_size) - !> @name Indical bounds in the s1-, s2- and s3-directions !> @{ type(int_bounds_info) :: is1_weno, is2_weno, is3_weno diff --git a/toolchain/mfc/args.py b/toolchain/mfc/args.py index cad27120a..275fa295f 100644 --- a/toolchain/mfc/args.py +++ b/toolchain/mfc/args.py @@ -82,6 +82,8 @@ def add_common_arguments(p, mask = None): test.add_argument("-m", "--max-attempts", type=int, default=1, help="Maximum number of attempts to run a test.") test.add_argument( "--no-build", action="store_true", default=False, help="(Testing) Do not rebuild MFC.") test.add_argument("--case-optimization", action="store_true", default=False, help="(GPU Optimization) Compile MFC targets with some case parameters hard-coded.") + test.add_argument( "--dry-run", action="store_true", default=False, help="Build and generate case files but do not run tests.") + test_meg = test.add_mutually_exclusive_group() test_meg.add_argument("--generate", action="store_true", default=False, help="(Test Generation) Generate golden files.") test_meg.add_argument("--add-new-variables", action="store_true", default=False, help="(Test Generation) If new variables are found in D/ when running tests, add them to the golden files.") diff --git a/toolchain/mfc/run/case_dicts.py b/toolchain/mfc/run/case_dicts.py index 5749bd172..c091edbf6 100644 --- a/toolchain/mfc/run/case_dicts.py +++ b/toolchain/mfc/run/case_dicts.py @@ -55,8 +55,28 @@ def analytic(self): 'cfl_const_dt': ParamType.LOG, 'chemistry': ParamType.LOG, 'cantera_file': ParamType.STR, + 'num_bc_patches': ParamType.INT, } +for i in range(100): + COMMON[f"patch_bc({i+1})%type"] = ParamType.INT + COMMON[f"patch_bc({i+1})%geometry"] = ParamType.INT + COMMON[f"patch_bc({i+1})%dir"] = ParamType.INT + COMMON[f"patch_bc({i+1})%loc"] = ParamType.INT + COMMON[f"patch_bc({i+1})%radius"] = ParamType.REAL + + for j in range(1, 3+1): + COMMON[f"patch_bc({i+1})%centroid({j})"] = ParamType.REAL + COMMON[f"patch_bc({i+1})%length({j})"] = ParamType.REAL + COMMON[f"patch_bc({i+1})%vel({j})"] = ParamType.REAL + +for cmp in ["x", "y", "z"]: + for loc in ["beg", "end"]: + COMMON[f'bc_{cmp}%{loc}'] = ParamType.INT + + for dir in range(1, 3+1): + COMMON[f'bc_{cmp}%vel_{loc}({dir})'] = ParamType.REAL + PRE_PROCESS = COMMON.copy() PRE_PROCESS.update({ 'old_grid': ParamType.LOG, @@ -249,14 +269,6 @@ def analytic(self): SIMULATION[f'patch_ib({ib_id})%length_{cmp}'] = ParamType.REAL for cmp in ["x", "y", "z"]: - SIMULATION[f'bc_{cmp}%beg'] = ParamType.INT - SIMULATION[f'bc_{cmp}%end'] = ParamType.INT - SIMULATION[f'bc_{cmp}%vb1'] = ParamType.REAL - SIMULATION[f'bc_{cmp}%vb2'] = ParamType.REAL - SIMULATION[f'bc_{cmp}%vb3'] = ParamType.REAL - SIMULATION[f'bc_{cmp}%ve1'] = ParamType.REAL - SIMULATION[f'bc_{cmp}%ve2'] = ParamType.REAL - SIMULATION[f'bc_{cmp}%ve3'] = ParamType.REAL SIMULATION[f'bc_{cmp}%pres_in'] = ParamType.REAL SIMULATION[f'bc_{cmp}%pres_out'] = ParamType.REAL SIMULATION[f'bc_{cmp}%grcbc_in'] = ParamType.LOG @@ -275,7 +287,6 @@ def analytic(self): SIMULATION[f'{var}_{cmp}'] = ParamType.REAL SIMULATION[f'bf_{cmp}'] = ParamType.LOG - for prepend in ["domain%beg", "domain%end"]: SIMULATION[f"{cmp}_{prepend}"] = ParamType.REAL diff --git a/toolchain/mfc/test/test.py b/toolchain/mfc/test/test.py index beedb639d..0b84f8322 100644 --- a/toolchain/mfc/test/test.py +++ b/toolchain/mfc/test/test.py @@ -1,5 +1,5 @@ import os, typing, shutil, time, itertools -from random import sample +from random import sample, seed import rich, rich.table @@ -55,6 +55,7 @@ def __filter(cases_) -> typing.List[TestCase]: if ARG("percent") == 100: return cases + seed(time.time()) return sample(cases, k=int(len(cases)*ARG("percent")/100.0)) @@ -140,6 +141,10 @@ def _handle_case(case: TestCase, devices: typing.Set[int]): case.delete_output() case.create_directory() + if ARG("dry_run"): + cons.print(f" [bold magenta]{case.get_uuid()}[/bold magenta] SKIP {case.trace}") + return + cmd = case.run([PRE_PROCESS, SIMULATION], gpus=devices) out_filepath = os.path.join(case.get_dirpath(), "out_pre_sim.txt") @@ -224,7 +229,10 @@ def handle_case(case: TestCase, devices: typing.Set[int]): try: _handle_case(case, devices) - nPASS += 1 + if ARG("dry_run"): + nSKIP += 1 + else: + nPASS += 1 except Exception as exc: if nAttempts < ARG("max_attempts"): cons.print(f"[bold yellow] Attempt {nAttempts}: Failed test {case.get_uuid()}. Retrying...[/bold yellow]")