diff --git a/.clang-tidy b/.clang-tidy index 42f4fc36065..f40dd987add 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -33,10 +33,10 @@ Checks: '-*, modernize-replace-auto-ptr, modernize-replace-disallow-copy-and-assign-macro, modernize-replace-random-shuffle, - modernize-return-braced-init-list, modernize-shrink-to-fit, modernize-unary-static-assert, modernize-use-nullptr, + -modernize-return-braced-init-list, mpi-*, performance-faster-string-find, performance-for-range-copy, diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml index 03a3044848f..1754549ab2d 100644 --- a/.github/workflows/cuda.yml +++ b/.github/workflows/cuda.yml @@ -111,7 +111,7 @@ jobs: which nvcc || echo "nvcc not in PATH!" git clone https://github.com/AMReX-Codes/amrex.git ../amrex - cd ../amrex && git checkout --detach 2e99628138df3b5b0ecf50b0c1201d5547f821a0 && cd - + cd ../amrex && git checkout --detach 23.10 && cd - make COMP=gcc QED=FALSE USE_MPI=TRUE USE_GPU=TRUE USE_OMP=FALSE USE_PSATD=TRUE USE_CCACHE=TRUE -j 2 build_nvhpc21-11-nvcc: diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index 2fd979feff9..c81ee598114 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -45,7 +45,7 @@ jobs: brew install ccache brew install fftw brew install libomp - brew link --force libomp + brew link --overwrite --force libomp brew install ninja brew install open-mpi brew install pkg-config diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1c32c94c99c..bb8b6c8b33d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,7 +18,7 @@ exclude: '^share/openPMD/thirdParty' # See https://pre-commit.com/hooks.html for more hooks repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 + rev: v4.5.0 hooks: - id: trailing-whitespace args: [--markdown-linebreak-ext=md] diff --git a/CMakeLists.txt b/CMakeLists.txt index f9b1a8bbc2b..5d00093e893 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,7 @@ # Preamble #################################################################### # cmake_minimum_required(VERSION 3.20.0) -project(WarpX VERSION 23.09) +project(WarpX VERSION 23.10) include(${WarpX_SOURCE_DIR}/cmake/WarpXFunctions.cmake) diff --git a/Docs/source/conf.py b/Docs/source/conf.py index e2cdfe15cda..f9e06bcc8c3 100644 --- a/Docs/source/conf.py +++ b/Docs/source/conf.py @@ -79,9 +79,9 @@ # built documents. # # The short X.Y version. -version = u'23.09' +version = u'23.10' # The full version, including alpha/beta/rc tags. -release = u'23.09' +release = u'23.10' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/Docs/source/highlights.rst b/Docs/source/highlights.rst index dd9f6fd3e81..1e99629b9c2 100644 --- a/Docs/source/highlights.rst +++ b/Docs/source/highlights.rst @@ -14,6 +14,11 @@ Plasma-Based Acceleration Scientific works in laser-plasma and beam-plasma acceleration. +#. Peng, H. and Huang, T. W. and Jiang, K. and Li, R. and Wu, C. N. and Yu, M. Y. and Riconda, C. and Weber, S. and Zhou, C. T. and Ruan, S. C. + **Coherent Subcycle Optical Shock from a Superluminal Plasma Wake**. + Phys. Rev. Lett. **131**, 145003, 2023 + `DOI:10.1103/PhysRevLett.131.145003 `__ + #. Mewes SM, Boyle GJ, Ferran Pousa A, Shalloo RJ, Osterhoff J, Arran C, Corner L, Walczak R, Hooker SM, Thévenet M. **Demonstration of tunability of HOFI waveguides via start-to-end simulations**. Phys. Rev. Research **5**, 033112, 2023 @@ -27,8 +32,8 @@ Scientific works in laser-plasma and beam-plasma acceleration. #. Wang J, Zeng M, Li D, Wang X, Gao J. **High quality beam produced by tightly focused laser driven wakefield accelerators**. - arXiv pre-print, 2023. - `DOI:10.48550/arXiv.2304.10730 `__ + Phys. Rev. Accel. Beams, **26**, 091303, 2023. + `DOI:10.1103/PhysRevAccelBeams.26.091303 `__ #. Fedeli L, Huebl A, Boillod-Cerneux F, Clark T, Gott K, Hillairet C, Jaure S, Leblanc A, Lehe R, Myers A, Piechurski C, Sato M, Zaim N, Zhang W, Vay J-L, Vincenti H. **Pushing the Frontier in the Design of Laser-Based Electron Accelerators with Groundbreaking Mesh-Refined Particle-In-Cell Simulations on Exascale-Class Supercomputers**. diff --git a/Docs/source/install/hpc/lassen.rst b/Docs/source/install/hpc/lassen.rst index 5726254652a..ac15844a5ff 100644 --- a/Docs/source/install/hpc/lassen.rst +++ b/Docs/source/install/hpc/lassen.rst @@ -24,12 +24,27 @@ If you are new to this system, **please see the following resources**: Login ----- -.. note:: +.. tab-set:: - Lassen is currently transitioning to RHEL8. - During this transition, first SSH into lassen and then ``ssh eatoss4`` next to work with the updated RHEL8/TOSS4 nodes. + .. tab-item:: TOSS4 (RHEL8) - Approximately October 2023, the new software environment on these nodes will be the new default. + Lassen is currently transitioning to RHEL8. + During this transition, first SSH into lassen and then to the updated RHEL8/TOSS4 nodes. + + .. code-block:: bash + + ssh lassen.llnl.gov + ssh eatoss4 + + Approximately October/November 2023, the new software environment on these nodes will be the new default. + + .. tab-item:: TOSS3 (RHEL7) + + .. code-block:: bash + + ssh lassen.llnl.gov + + Approximately October/November 2023, this partition will become TOSS4 (RHEL8) as well. .. _building-lassen-preparation: @@ -43,78 +58,135 @@ Use the following commands to download the WarpX source code: git clone https://github.com/ECP-WarpX/WarpX.git /usr/workspace/${USER}/lassen/src/warpx -We use system software modules, add environment hints and further dependencies via the file ``$HOME/lassen_v100_warpx.profile``. -Create it now: +.. tab-set:: -.. code-block:: bash + .. tab-item:: TOSS4 (RHEL8) - cp /usr/workspace/${USER}/lassen/src/warpx/Tools/machines/lassen-llnl/lassen_v100_warpx.profile.example $HOME/lassen_v100_warpx.profile + We use system software modules, add environment hints and further dependencies via the file ``$HOME/lassen_v100_warpx.profile``. + Create it now: -.. dropdown:: Script Details - :color: light - :icon: info - :animate: fade-in-slide-down + .. code-block:: bash - .. literalinclude:: ../../../../Tools/machines/lassen-llnl/lassen_v100_warpx.profile.example - :language: bash + cp /usr/workspace/${USER}/lassen/src/warpx/Tools/machines/lassen-llnl/lassen_v100_warpx.profile.example $HOME/lassen_v100_warpx.profile -Edit the 2nd line of this script, which sets the ``export proj=""`` variable. -For example, if you are member of the project ``nsldt``, then run ``vi $HOME/lassen_v100_warpx.profile``. -Enter the edit mode by typing ``i`` and edit line 2 to read: + .. dropdown:: Script Details + :color: light + :icon: info + :animate: fade-in-slide-down -.. code-block:: bash + .. literalinclude:: ../../../../Tools/machines/lassen-llnl/lassen_v100_warpx.profile.example + :language: bash - export proj="nsldt" + Edit the 2nd line of this script, which sets the ``export proj=""`` variable. + For example, if you are member of the project ``nsldt``, then run ``vi $HOME/lassen_v100_warpx.profile``. + Enter the edit mode by typing ``i`` and edit line 2 to read: -Exit the ``vi`` editor with ``Esc`` and then type ``:wq`` (write & quit). + .. code-block:: bash -.. important:: + export proj="nsldt" - Now, and as the first step on future logins to lassen, activate these environment settings: + Exit the ``vi`` editor with ``Esc`` and then type ``:wq`` (write & quit). - .. code-block:: bash + .. important:: + + Now, and as the first step on future logins to lassen, activate these environment settings: + + .. code-block:: bash + + source $HOME/lassen_v100_warpx.profile + + .. tab-item:: TOSS3 (RHEL7) + + We use system software modules, add environment hints and further dependencies via the file ``$HOME/lassen_v100_warpx_toss3.profile``. + Create it now: + + .. code-block:: bash + + cp /usr/workspace/${USER}/lassen/src/warpx/Tools/machines/lassen-llnl/lassen_v100_warpx_toss3.profile.example $HOME/lassen_v100_warpx_toss3.profile + + .. dropdown:: Script Details + :color: light + :icon: info + :animate: fade-in-slide-down - source $HOME/lassen_v100_warpx.profile + .. literalinclude:: ../../../../Tools/machines/lassen-llnl/lassen_v100_warpx_toss3.profile.example + :language: bash + + Edit the 2nd line of this script, which sets the ``export proj=""`` variable. + For example, if you are member of the project ``nsldt``, then run ``vi $HOME/lassen_v100_warpx_toss3.profile``. + Enter the edit mode by typing ``i`` and edit line 2 to read: + + .. code-block:: bash + + export proj="nsldt" + + Exit the ``vi`` editor with ``Esc`` and then type ``:wq`` (write & quit). + + .. important:: + + Now, and as the first step on future logins to lassen, activate these environment settings: + + .. code-block:: bash + + source $HOME/lassen_v100_warpx_toss3.profile Finally, since lassen does not yet provide software modules for some of our dependencies, install them once: -.. code-block:: bash +.. tab-set:: - bash /usr/workspace/${USER}/lassen/src/warpx/Tools/machines/lassen-llnl/install_v100_dependencies.sh - source /usr/workspace/${USER}/lassen/gpu/venvs/warpx-lassen/bin/activate + .. tab-item:: TOSS4 (RHEL8) -.. dropdown:: Script Details - :color: light - :icon: info - :animate: fade-in-slide-down + .. code-block:: bash - .. literalinclude:: ../../../../Tools/machines/lassen-llnl/install_v100_dependencies.sh - :language: bash + bash /usr/workspace/${USER}/lassen/src/warpx/Tools/machines/lassen-llnl/install_v100_dependencies.sh + source /usr/workspace/${USER}/lassen/gpu/venvs/warpx-lassen/bin/activate -.. dropdown:: AI/ML Dependencies (Optional) - :animate: fade-in-slide-down + .. dropdown:: Script Details + :color: light + :icon: info + :animate: fade-in-slide-down - If you plan to run AI/ML workflows depending on pyTorch, run the next step as well. - This will take a while and should be skipped if not needed. + .. literalinclude:: ../../../../Tools/machines/lassen-llnl/install_v100_dependencies.sh + :language: bash - .. code-block:: bash + .. dropdown:: AI/ML Dependencies (Optional) + :animate: fade-in-slide-down - runNode bash /usr/workspace/${USER}/lassen/src/warpx/Tools/machines/lassen-llnl/install_v100_ml.sh + If you plan to run AI/ML workflows depending on pyTorch, run the next step as well. + This will take a while and should be skipped if not needed. - .. dropdown:: Script Details - :color: light - :icon: info - :animate: fade-in-slide-down + .. code-block:: bash - .. literalinclude:: ../../../../Tools/machines/lassen-llnl/install_v100_ml.sh - :language: bash + runNode bash /usr/workspace/${USER}/lassen/src/warpx/Tools/machines/lassen-llnl/install_v100_ml.sh - For `optimas dependencies `__ (incl. scikit-learn), plan another hour of build time: + .. dropdown:: Script Details + :color: light + :icon: info + :animate: fade-in-slide-down - .. code-block:: bash + .. literalinclude:: ../../../../Tools/machines/lassen-llnl/install_v100_ml.sh + :language: bash + + For `optimas dependencies `__ (incl. scikit-learn), plan another hour of build time: + + .. code-block:: bash + + python3 -m pip install -r /usr/workspace/${USER}/lassen/src/warpx/Tools/optimas/requirements.txt + + .. tab-item:: TOSS3 (RHEL7) + + .. code-block:: bash + + bash /usr/workspace/${USER}/lassen/src/warpx/Tools/machines/lassen-llnl/install_v100_dependencies_toss3.sh + source /usr/workspace/${USER}/lassen/gpu/venvs/warpx-lassen-toss3/bin/activate - python3 -m pip install -r /usr/workspace/${USER}/lassen/src/warpx/Tools/optimas/requirements.txt + .. dropdown:: Script Details + :color: light + :icon: info + :animate: fade-in-slide-down + .. literalinclude:: ../../../../Tools/machines/lassen-llnl/install_v100_dependencies_toss3.sh + :language: bash .. _building-lassen-compilation: diff --git a/Python/pywarpx/fields.py b/Python/pywarpx/fields.py index fd647287a53..5eeeda50ea7 100644 --- a/Python/pywarpx/fields.py +++ b/Python/pywarpx/fields.py @@ -298,10 +298,14 @@ def _get_field(self, mfi): # self.mf.array(mfi) is in C ordering. # Note: transposing creates a view and not a copy. device_arr4 = self.mf.array(mfi) - if cp is not None: - device_arr = cp.array(device_arr4, copy=False).T + if libwarpx.libwarpx_so.Config.have_gpu: + if cp is not None: + device_arr = device_arr4.to_cupy(copy=False) + else: + # Relies on managed memory + device_arr = device_arr4.to_numpy(copy=False) else: - device_arr = np.array(device_arr4, copy=False).T + device_arr = device_arr4.to_numpy(copy=False) if not self.include_ghosts: nghosts = self._get_n_ghosts() device_arr = device_arr[tuple([slice(ng, -ng) for ng in nghosts[:self.dim]])] @@ -423,11 +427,10 @@ def __getitem__(self, index): if global_slices is not None: # Note that the array will always have 4 dimensions. device_arr = self._get_field(mfi) - if cp is not None: - # Copy the data from the device to the host - slice_arr = cp.asnumpy(device_arr[block_slices]) - else: - slice_arr = device_arr[block_slices] + slice_arr = device_arr[block_slices] + if (cp is not None) and (type(slice_arr) is cp.ndarray): + # Copy data from host to device using cupy syntax + slice_arr = slice_arr.get() datalist.append((global_slices, slice_arr)) # Gather the data from all processors @@ -528,9 +531,9 @@ def __setitem__(self, index, value): mf_arr = self._get_field(mfi) if isinstance(value, np.ndarray): slice_value = value3d[global_slices] - if cp is not None: + if libwarpx.libwarpx_so.Config.have_gpu: # Copy data from host to device - slice_value = cp.asarray(value3d[global_slices]) + slice_value = cp.asarray(slice_value) mf_arr[block_slices] = slice_value else: mf_arr[block_slices] = value diff --git a/Python/setup.py b/Python/setup.py index 99f79cd1d18..568c3f83ddc 100644 --- a/Python/setup.py +++ b/Python/setup.py @@ -54,7 +54,7 @@ package_data = {} setup(name = 'pywarpx', - version = '23.09', + version = '23.10', packages = ['pywarpx'], package_dir = {'pywarpx': 'pywarpx'}, description = """Wrapper of WarpX""", diff --git a/README.md b/README.md index ef0551405a0..87cf2b012c0 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![Code Status development](https://dev.azure.com/ECP-WarpX/WarpX/_apis/build/status/ECP-WarpX.WarpX?branchName=development)](https://dev.azure.com/ECP-WarpX/WarpX/_build/latest?definitionId=1&branchName=development) [![Nightly Installation Tests](https://dev.azure.com/ECP-WarpX/WarpX/_apis/build/status/ECP-WarpX.Nightly?branchName=nightly&label=nightly%20packages)](https://dev.azure.com/ECP-WarpX/WarpX/_build?definitionId=2) -[![Documentation Status](https://readthedocs.org/projects/warpx/badge/?version=latest)](https://warpx.readthedocs.io/en/latest/?badge=latest) +[![Documentation Status](https://readthedocs.org/projects/warpx/badge/?version=latest)](https://warpx.readthedocs.io) [![Spack Version](https://img.shields.io/spack/v/warpx)](https://spack.readthedocs.io/en/latest/package_list.html#warpx) [![Conda Version](https://img.shields.io/conda/vn/conda-forge/warpx)](https://anaconda.org/conda-forge/warpx) [![Discussions](https://img.shields.io/badge/chat-discussions-turquoise.svg)](https://github.com/ECP-WarpX/WarpX/discussions) diff --git a/Regression/Checksum/benchmarks_json/Langmuir_multi_2d_MR_momentum_conserving.json b/Regression/Checksum/benchmarks_json/Langmuir_multi_2d_MR_momentum_conserving.json new file mode 100644 index 00000000000..72c11245e82 --- /dev/null +++ b/Regression/Checksum/benchmarks_json/Langmuir_multi_2d_MR_momentum_conserving.json @@ -0,0 +1,40 @@ +{ + "electrons": { + "particle_momentum_x": 4.2339125386796835e-20, + "particle_momentum_y": 0.0, + "particle_momentum_z": 4.2339186493637336e-20, + "particle_position_x": 0.6553603030161138, + "particle_position_y": 0.6553603026454923, + "particle_weight": 3200000000000000.5 + }, + "lev=0": { + "Bx": 0.0, + "By": 44.73189518299763, + "Bz": 0.0, + "Ex": 7583674122515.894, + "Ey": 0.0, + "Ez": 7583672042436.007, + "jx": 7283798122345162.0, + "jy": 0.0, + "jz": 7283804678733656.0 + }, + "lev=1": { + "Bx": 0.0, + "By": 467.8271651312273, + "Bz": 0.0, + "Ex": 7609732451490.526, + "Ey": 0.0, + "Ez": 7604891944279.171, + "jx": 7103859890231833.0, + "jy": 0.0, + "jz": 7099873215674540.0 + }, + "positrons": { + "particle_momentum_x": 4.233907500710132e-20, + "particle_momentum_y": 0.0, + "particle_momentum_z": 4.2339135644675305e-20, + "particle_position_x": 0.6553596934061604, + "particle_position_y": 0.6553596937869579, + "particle_weight": 3200000000000000.5 + } +} \ No newline at end of file diff --git a/Regression/Checksum/benchmarks_json/divb_cleaning_3d.json b/Regression/Checksum/benchmarks_json/divb_cleaning_3d.json index cbb43a81004..17d56dcc906 100644 --- a/Regression/Checksum/benchmarks_json/divb_cleaning_3d.json +++ b/Regression/Checksum/benchmarks_json/divb_cleaning_3d.json @@ -1,13 +1,13 @@ { "lev=0": { - "Bx": 30128649.582633037, - "By": 30128649.58263304, - "Bz": 30128649.582633033, - "Ex": 137776481658696.53, - "Ey": 137776481658695.72, - "Ez": 137776481658736.47, - "G": 7382627392167406.0, - "divB": 6914882882637.787, - "divE": 60880563.38401385 + "Bx": 30110529.73244452, + "By": 30110529.73244452, + "Bz": 30110529.732444517, + "Ex": 137103615680453.66, + "Ey": 137103615680454.5, + "Ez": 137103615680494.48, + "G": 7413121805692223.0, + "divB": 6944252335584.075, + "divE": 60882624.59445304 } } \ No newline at end of file diff --git a/Regression/WarpX-GPU-tests.ini b/Regression/WarpX-GPU-tests.ini index c6caa6b915c..1a1c1fc612c 100644 --- a/Regression/WarpX-GPU-tests.ini +++ b/Regression/WarpX-GPU-tests.ini @@ -60,7 +60,7 @@ emailBody = Check https://ccse.lbl.gov/pub/GpuRegressionTesting/WarpX/ for more [AMReX] dir = /home/regtester/git/amrex/ -branch = 2e99628138df3b5b0ecf50b0c1201d5547f821a0 +branch = 23.10 [source] dir = /home/regtester/git/WarpX diff --git a/Regression/WarpX-tests.ini b/Regression/WarpX-tests.ini index e4c502d858c..3eb8101addb 100644 --- a/Regression/WarpX-tests.ini +++ b/Regression/WarpX-tests.ini @@ -59,7 +59,7 @@ emailBody = Check https://ccse.lbl.gov/pub/RegressionTesting/WarpX/ for more det [AMReX] dir = /home/regtester/AMReX_RegTesting/amrex/ -branch = 2e99628138df3b5b0ecf50b0c1201d5547f821a0 +branch = 23.10 [source] dir = /home/regtester/AMReX_RegTesting/warpx @@ -1225,6 +1225,25 @@ particleTypes = electrons positrons analysisRoutine = Examples/Tests/langmuir/analysis_2d.py analysisOutputImage = Langmuir_multi_2d_MR.png +[Langmuir_multi_2d_MR_momentum_conserving] +buildDir = . +inputFile = Examples/Tests/langmuir/inputs_2d +runtime_params = algo.maxwell_solver=ckc warpx.use_filter=1 amr.max_level=1 amr.ref_ratio=4 warpx.fine_tag_lo=-10.e-6 -10.e-6 warpx.fine_tag_hi=10.e-6 10.e-6 algo.field_gathering=momentum-conserving diag1.electrons.variables=w ux uy uz diag1.positrons.variables=w ux uy uz +dim = 2 +addToCompileString = +cmakeSetupOpts = -DWarpX_DIMS=2 +restartTest = 0 +useMPI = 1 +numprocs = 2 +useOMP = 1 +numthreads = 1 +compileTest = 0 +doVis = 0 +compareParticles = 1 +particleTypes = electrons positrons +analysisRoutine = Examples/Tests/langmuir/analysis_2d.py +analysisOutputImage = Langmuir_multi_2d_MR_momentum_conserving.png + [Langmuir_multi_2d_MR_psatd] buildDir = . inputFile = Examples/Tests/langmuir/inputs_2d diff --git a/Source/BoundaryConditions/PML.H b/Source/BoundaryConditions/PML.H index a925cd6c9ef..4be1be146cc 100644 --- a/Source/BoundaryConditions/PML.H +++ b/Source/BoundaryConditions/PML.H @@ -29,6 +29,7 @@ #include #include +#include #include #include @@ -187,15 +188,10 @@ public: void ExchangeG (amrex::MultiFab* G_fp, amrex::MultiFab* G_cp, int do_pml_in_domain); void ExchangeG (PatchType patch_type, amrex::MultiFab* Gp, int do_pml_in_domain); - void FillBoundary (); - void FillBoundaryE (); - void FillBoundaryB (); - void FillBoundaryF (); - void FillBoundaryG (); - void FillBoundaryE (PatchType patch_type); - void FillBoundaryB (PatchType patch_type); - void FillBoundaryF (PatchType patch_type); - void FillBoundaryG (PatchType patch_type); + void FillBoundaryE (PatchType patch_type, std::optional nodal_sync=std::nullopt); + void FillBoundaryB (PatchType patch_type, std::optional nodal_sync=std::nullopt); + void FillBoundaryF (PatchType patch_type, std::optional nodal_sync=std::nullopt); + void FillBoundaryG (PatchType patch_type, std::optional nodal_sync=std::nullopt); bool ok () const { return m_ok; } diff --git a/Source/BoundaryConditions/PML.cpp b/Source/BoundaryConditions/PML.cpp index 5fc3e2dd3dc..811d1aa469d 100644 --- a/Source/BoundaryConditions/PML.cpp +++ b/Source/BoundaryConditions/PML.cpp @@ -1229,103 +1229,66 @@ PML::CopyToPML (MultiFab& pml, MultiFab& reg, const Geometry& geom) } void -PML::FillBoundary () -{ - FillBoundaryE(); - FillBoundaryB(); - FillBoundaryF(); - FillBoundaryG(); -} - -void -PML::FillBoundaryE () -{ - FillBoundaryE(PatchType::fine); - FillBoundaryE(PatchType::coarse); -} - -void -PML::FillBoundaryE (PatchType patch_type) +PML::FillBoundaryE (PatchType patch_type, std::optional nodal_sync) { if (patch_type == PatchType::fine && pml_E_fp[0] && pml_E_fp[0]->nGrowVect().max() > 0) { const auto& period = m_geom->periodicity(); const Vector mf{pml_E_fp[0].get(),pml_E_fp[1].get(),pml_E_fp[2].get()}; - ablastr::utils::communication::FillBoundary(mf, WarpX::do_single_precision_comms, period); + ablastr::utils::communication::FillBoundary(mf, WarpX::do_single_precision_comms, period, nodal_sync); } else if (patch_type == PatchType::coarse && pml_E_cp[0] && pml_E_cp[0]->nGrowVect().max() > 0) { const auto& period = m_cgeom->periodicity(); const Vector mf{pml_E_cp[0].get(),pml_E_cp[1].get(),pml_E_cp[2].get()}; - ablastr::utils::communication::FillBoundary(mf, WarpX::do_single_precision_comms, period); + ablastr::utils::communication::FillBoundary(mf, WarpX::do_single_precision_comms, period, nodal_sync); } } void -PML::FillBoundaryB () -{ - FillBoundaryB(PatchType::fine); - FillBoundaryB(PatchType::coarse); -} - -void -PML::FillBoundaryB (PatchType patch_type) +PML::FillBoundaryB (PatchType patch_type, std::optional nodal_sync) { if (patch_type == PatchType::fine && pml_B_fp[0]) { const auto& period = m_geom->periodicity(); const Vector mf{pml_B_fp[0].get(),pml_B_fp[1].get(),pml_B_fp[2].get()}; - ablastr::utils::communication::FillBoundary(mf, WarpX::do_single_precision_comms, period); + ablastr::utils::communication::FillBoundary(mf, WarpX::do_single_precision_comms, period, nodal_sync); } else if (patch_type == PatchType::coarse && pml_B_cp[0]) { const auto& period = m_cgeom->periodicity(); const Vector mf{pml_B_cp[0].get(),pml_B_cp[1].get(),pml_B_cp[2].get()}; - ablastr::utils::communication::FillBoundary(mf, WarpX::do_single_precision_comms, period); + ablastr::utils::communication::FillBoundary(mf, WarpX::do_single_precision_comms, period, nodal_sync); } } void -PML::FillBoundaryF () -{ - FillBoundaryF(PatchType::fine); - FillBoundaryF(PatchType::coarse); -} - -void -PML::FillBoundaryF (PatchType patch_type) +PML::FillBoundaryF (PatchType patch_type, std::optional nodal_sync) { if (patch_type == PatchType::fine && pml_F_fp && pml_F_fp->nGrowVect().max() > 0) { const auto& period = m_geom->periodicity(); - ablastr::utils::communication::FillBoundary(*pml_F_fp, WarpX::do_single_precision_comms, period); + ablastr::utils::communication::FillBoundary(*pml_F_fp, WarpX::do_single_precision_comms, period, nodal_sync); } else if (patch_type == PatchType::coarse && pml_F_cp && pml_F_cp->nGrowVect().max() > 0) { const auto& period = m_cgeom->periodicity(); - ablastr::utils::communication::FillBoundary(*pml_F_cp, WarpX::do_single_precision_comms, period); + ablastr::utils::communication::FillBoundary(*pml_F_cp, WarpX::do_single_precision_comms, period, nodal_sync); } } void -PML::FillBoundaryG () -{ - FillBoundaryG(PatchType::fine); - FillBoundaryG(PatchType::coarse); -} - -void -PML::FillBoundaryG (PatchType patch_type) +PML::FillBoundaryG (PatchType patch_type, std::optional nodal_sync) { if (patch_type == PatchType::fine && pml_G_fp && pml_G_fp->nGrowVect().max() > 0) { const auto& period = m_geom->periodicity(); - ablastr::utils::communication::FillBoundary(*pml_G_fp, WarpX::do_single_precision_comms, period); + ablastr::utils::communication::FillBoundary(*pml_G_fp, WarpX::do_single_precision_comms, period, nodal_sync); } else if (patch_type == PatchType::coarse && pml_G_cp && pml_G_cp->nGrowVect().max() > 0) { const auto& period = m_cgeom->periodicity(); - ablastr::utils::communication::FillBoundary(*pml_G_cp, WarpX::do_single_precision_comms, period); + ablastr::utils::communication::FillBoundary(*pml_G_cp, WarpX::do_single_precision_comms, period, nodal_sync); } } diff --git a/Source/BoundaryConditions/PML_RZ.H b/Source/BoundaryConditions/PML_RZ.H index 72c901bf480..ac4b6ff2c4d 100644 --- a/Source/BoundaryConditions/PML_RZ.H +++ b/Source/BoundaryConditions/PML_RZ.H @@ -22,6 +22,7 @@ #include #include +#include #include enum struct PatchType : int; @@ -45,8 +46,8 @@ public: void FillBoundaryE (); void FillBoundaryB (); - void FillBoundaryE (PatchType patch_type); - void FillBoundaryB (PatchType patch_type); + void FillBoundaryE (PatchType patch_type, std::optional nodal_sync=std::nullopt); + void FillBoundaryB (PatchType patch_type, std::optional nodal_sync=std::nullopt); void CheckPoint (const std::string& dir) const; void Restart (const std::string& dir); diff --git a/Source/BoundaryConditions/PML_RZ.cpp b/Source/BoundaryConditions/PML_RZ.cpp index 98ed12e7d67..d04c6e53bf8 100644 --- a/Source/BoundaryConditions/PML_RZ.cpp +++ b/Source/BoundaryConditions/PML_RZ.cpp @@ -134,13 +134,13 @@ PML_RZ::FillBoundaryE () } void -PML_RZ::FillBoundaryE (PatchType patch_type) +PML_RZ::FillBoundaryE (PatchType patch_type, std::optional nodal_sync) { if (patch_type == PatchType::fine && pml_E_fp[0] && pml_E_fp[0]->nGrowVect().max() > 0) { const amrex::Periodicity& period = m_geom->periodicity(); const Vector mf{pml_E_fp[0].get(),pml_E_fp[1].get()}; - ablastr::utils::communication::FillBoundary(mf, WarpX::do_single_precision_comms, period); + ablastr::utils::communication::FillBoundary(mf, WarpX::do_single_precision_comms, period, nodal_sync); } } @@ -151,13 +151,13 @@ PML_RZ::FillBoundaryB () } void -PML_RZ::FillBoundaryB (PatchType patch_type) +PML_RZ::FillBoundaryB (PatchType patch_type, std::optional nodal_sync) { if (patch_type == PatchType::fine && pml_B_fp[0]) { const amrex::Periodicity& period = m_geom->periodicity(); const Vector mf{pml_B_fp[0].get(),pml_B_fp[1].get()}; - ablastr::utils::communication::FillBoundary(mf, WarpX::do_single_precision_comms, period); + ablastr::utils::communication::FillBoundary(mf, WarpX::do_single_precision_comms, period, nodal_sync); } } diff --git a/Source/Evolve/WarpXEvolve.cpp b/Source/Evolve/WarpXEvolve.cpp index 467a4268d97..a1b536f6c71 100644 --- a/Source/Evolve/WarpXEvolve.cpp +++ b/Source/Evolve/WarpXEvolve.cpp @@ -472,10 +472,6 @@ WarpX::OneStep_nosub (Real cur_time) if (WarpX::do_divb_cleaning || WarpX::do_pml_divb_cleaning) FillBoundaryG(guard_cells.ng_alloc_G, WarpX::sync_nodal_points); } - - if (do_pml) { - NodalSyncPML(); - } } else { EvolveF(0.5_rt * dt[0], DtType::FirstHalf); EvolveG(0.5_rt * dt[0], DtType::FirstHalf); @@ -502,11 +498,10 @@ WarpX::OneStep_nosub (Real cur_time) if (do_pml) { DampPML(); - NodalSyncPML(); - FillBoundaryE(guard_cells.ng_MovingWindow); - FillBoundaryB(guard_cells.ng_MovingWindow); - FillBoundaryF(guard_cells.ng_MovingWindow); - FillBoundaryG(guard_cells.ng_MovingWindow); + FillBoundaryE(guard_cells.ng_MovingWindow, WarpX::sync_nodal_points); + FillBoundaryB(guard_cells.ng_MovingWindow, WarpX::sync_nodal_points); + FillBoundaryF(guard_cells.ng_MovingWindow, WarpX::sync_nodal_points); + FillBoundaryG(guard_cells.ng_MovingWindow, WarpX::sync_nodal_points); } // E and B are up-to-date in the domain, but all guard cells are @@ -746,11 +741,6 @@ WarpX::OneStep_multiJ (const amrex::Real cur_time) if (WarpX::do_divb_cleaning || WarpX::do_pml_divb_cleaning) FillBoundaryG(guard_cells.ng_alloc_G, WarpX::sync_nodal_points); - // Synchronize fields on nodal points in PML - if (do_pml) - { - NodalSyncPML(); - } #else amrex::ignore_unused(cur_time); WARPX_ABORT_WITH_MESSAGE( @@ -927,9 +917,6 @@ WarpX::OneStep_sub1 (Real curtime) if ( safe_guard_cells ) FillBoundaryB(coarse_lev, PatchType::fine, guard_cells.ng_FieldSolver, WarpX::sync_nodal_points); - - // Synchronize nodal points at the end of the time step - if (do_pml) NodalSyncPML(); } void diff --git a/Source/Parallelization/WarpXComm.cpp b/Source/Parallelization/WarpXComm.cpp index 9b7f16478fc..e5ede95272c 100644 --- a/Source/Parallelization/WarpXComm.cpp +++ b/Source/Parallelization/WarpXComm.cpp @@ -181,6 +181,16 @@ WarpX::UpdateAuxilaryDataStagToNodal () ng_src, ng, WarpX::do_single_precision_comms, cperiod); } + const amrex::IntVect& refinement_ratio = refRatio(lev-1); + + const amrex::IntVect& Bx_fp_stag = Bfield_fp[lev][0]->ixType().toIntVect(); + const amrex::IntVect& By_fp_stag = Bfield_fp[lev][1]->ixType().toIntVect(); + const amrex::IntVect& Bz_fp_stag = Bfield_fp[lev][2]->ixType().toIntVect(); + + const amrex::IntVect& Bx_cp_stag = Bfield_cp[lev][0]->ixType().toIntVect(); + const amrex::IntVect& By_cp_stag = Bfield_cp[lev][1]->ixType().toIntVect(); + const amrex::IntVect& Bz_cp_stag = Bfield_cp[lev][2]->ixType().toIntVect(); + #ifdef AMREX_USE_OMP #pragma omp parallel if (Gpu::notInLaunchRegion()) #endif @@ -203,9 +213,9 @@ WarpX::UpdateAuxilaryDataStagToNodal () amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept { - warpx_interp_nd_bfield_x(j,k,l, bx_aux, bx_fp, bx_cp, bx_c); - warpx_interp_nd_bfield_y(j,k,l, by_aux, by_fp, by_cp, by_c); - warpx_interp_nd_bfield_z(j,k,l, bz_aux, bz_fp, bz_cp, bz_c); + warpx_interp(j, k, l, bx_aux, bx_fp, bx_cp, bx_c, Bx_fp_stag, Bx_cp_stag, refinement_ratio); + warpx_interp(j, k, l, by_aux, by_fp, by_cp, by_c, By_fp_stag, By_cp_stag, refinement_ratio); + warpx_interp(j, k, l, bz_aux, bz_fp, bz_cp, bz_c, Bz_fp_stag, Bz_cp_stag, refinement_ratio); }); } } @@ -239,6 +249,16 @@ WarpX::UpdateAuxilaryDataStagToNodal () ng_src, ng, WarpX::do_single_precision_comms, cperiod); } + const amrex::IntVect& refinement_ratio = refRatio(lev-1); + + const amrex::IntVect& Ex_fp_stag = Efield_fp[lev][0]->ixType().toIntVect(); + const amrex::IntVect& Ey_fp_stag = Efield_fp[lev][1]->ixType().toIntVect(); + const amrex::IntVect& Ez_fp_stag = Efield_fp[lev][2]->ixType().toIntVect(); + + const amrex::IntVect& Ex_cp_stag = Efield_cp[lev][0]->ixType().toIntVect(); + const amrex::IntVect& Ey_cp_stag = Efield_cp[lev][1]->ixType().toIntVect(); + const amrex::IntVect& Ez_cp_stag = Efield_cp[lev][2]->ixType().toIntVect(); + #ifdef AMREX_USE_OMP #pragma omp parallel if (Gpu::notInLaunchRegion()) #endif @@ -261,9 +281,9 @@ WarpX::UpdateAuxilaryDataStagToNodal () amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept { - warpx_interp_nd_efield_x(j,k,l, ex_aux, ex_fp, ex_cp, ex_c); - warpx_interp_nd_efield_y(j,k,l, ey_aux, ey_fp, ey_cp, ey_c); - warpx_interp_nd_efield_z(j,k,l, ez_aux, ez_fp, ez_cp, ez_c); + warpx_interp(j, k, l, ex_aux, ex_fp, ex_cp, ex_c, Ex_fp_stag, Ex_cp_stag, refinement_ratio); + warpx_interp(j, k, l, ey_aux, ey_fp, ey_cp, ey_c, Ey_fp_stag, Ey_cp_stag, refinement_ratio); + warpx_interp(j, k, l, ez_aux, ez_fp, ez_cp, ez_c, Ez_fp_stag, Ez_cp_stag, refinement_ratio); }); } } @@ -561,13 +581,13 @@ WarpX::FillBoundaryE (const int lev, const PatchType patch_type, const amrex::In (patch_type == PatchType::fine) ? pml[lev]->GetE_fp() : pml[lev]->GetE_cp(); pml[lev]->Exchange(mf_pml, mf, patch_type, do_pml_in_domain); - pml[lev]->FillBoundaryE(patch_type); + pml[lev]->FillBoundaryE(patch_type, nodal_sync); } #if (defined WARPX_DIM_RZ) && (defined WARPX_USE_PSATD) if (pml_rz[lev]) { - pml_rz[lev]->FillBoundaryE(patch_type); + pml_rz[lev]->FillBoundaryE(patch_type, nodal_sync); } #endif } @@ -618,13 +638,13 @@ WarpX::FillBoundaryB (const int lev, const PatchType patch_type, const amrex::In (patch_type == PatchType::fine) ? pml[lev]->GetB_fp() : pml[lev]->GetB_cp(); pml[lev]->Exchange(mf_pml, mf, patch_type, do_pml_in_domain); - pml[lev]->FillBoundaryB(patch_type); + pml[lev]->FillBoundaryB(patch_type, nodal_sync); } #if (defined WARPX_DIM_RZ) && (defined WARPX_USE_PSATD) if (pml_rz[lev]) { - pml_rz[lev]->FillBoundaryB(patch_type); + pml_rz[lev]->FillBoundaryB(patch_type, nodal_sync); } #endif } @@ -761,7 +781,7 @@ WarpX::FillBoundaryF (int lev, PatchType patch_type, IntVect ng, std::optionalok()) { if (F_fp[lev]) pml[lev]->ExchangeF(patch_type, F_fp[lev].get(), do_pml_in_domain); - pml[lev]->FillBoundaryF(patch_type); + pml[lev]->FillBoundaryF(patch_type, nodal_sync); } if (F_fp[lev]) @@ -776,7 +796,7 @@ WarpX::FillBoundaryF (int lev, PatchType patch_type, IntVect ng, std::optionalok()) { if (F_cp[lev]) pml[lev]->ExchangeF(patch_type, F_cp[lev].get(), do_pml_in_domain); - pml[lev]->FillBoundaryF(patch_type); + pml[lev]->FillBoundaryF(patch_type, nodal_sync); } if (F_cp[lev]) @@ -805,7 +825,7 @@ void WarpX::FillBoundaryG (int lev, PatchType patch_type, IntVect ng, std::optio if (do_pml && pml[lev] && pml[lev]->ok()) { if (G_fp[lev]) pml[lev]->ExchangeG(patch_type, G_fp[lev].get(), do_pml_in_domain); - pml[lev]->FillBoundaryG(patch_type); + pml[lev]->FillBoundaryG(patch_type, nodal_sync); } if (G_fp[lev]) @@ -820,7 +840,7 @@ void WarpX::FillBoundaryG (int lev, PatchType patch_type, IntVect ng, std::optio if (do_pml && pml[lev] && pml[lev]->ok()) { if (G_cp[lev]) pml[lev]->ExchangeG(patch_type, G_cp[lev].get(), do_pml_in_domain); - pml[lev]->FillBoundaryG(patch_type); + pml[lev]->FillBoundaryG(patch_type, nodal_sync); } if (G_cp[lev]) @@ -1473,62 +1493,3 @@ void WarpX::NodalSyncRho ( ablastr::utils::communication::OverrideSync(rhoc, WarpX::do_single_precision_comms, cperiod); } } - -void WarpX::NodalSyncPML () -{ - for (int lev = 0; lev <= finest_level; lev++) { - NodalSyncPML(lev); - } -} - -void WarpX::NodalSyncPML (int lev) -{ - NodalSyncPML(lev, PatchType::fine); - if (lev > 0) NodalSyncPML(lev, PatchType::coarse); -} - -void WarpX::NodalSyncPML (int lev, PatchType patch_type) -{ - if (pml[lev] && pml[lev]->ok()) - { - const std::array& pml_E = (patch_type == PatchType::fine) ? - pml[lev]->GetE_fp() : pml[lev]->GetE_cp(); - const std::array& pml_B = (patch_type == PatchType::fine) ? - pml[lev]->GetB_fp() : pml[lev]->GetB_cp(); - amrex::MultiFab* const pml_F = (patch_type == PatchType::fine) ? pml[lev]->GetF_fp() : pml[lev]->GetF_cp(); - amrex::MultiFab* const pml_G = (patch_type == PatchType::fine) ? pml[lev]->GetG_fp() : pml[lev]->GetG_cp(); - - // Always synchronize nodal points - const amrex::Periodicity& period = Geom(lev).periodicity(); - ablastr::utils::communication::OverrideSync(*pml_E[0], WarpX::do_single_precision_comms, period); - ablastr::utils::communication::OverrideSync(*pml_E[1], WarpX::do_single_precision_comms, period); - ablastr::utils::communication::OverrideSync(*pml_E[2], WarpX::do_single_precision_comms, period); - ablastr::utils::communication::OverrideSync(*pml_B[0], WarpX::do_single_precision_comms, period); - ablastr::utils::communication::OverrideSync(*pml_B[1], WarpX::do_single_precision_comms, period); - ablastr::utils::communication::OverrideSync(*pml_B[2], WarpX::do_single_precision_comms, period); - if (pml_F) { - ablastr::utils::communication::OverrideSync(*pml_F, WarpX::do_single_precision_comms, period); - } - if (pml_G) { - ablastr::utils::communication::OverrideSync(*pml_G, WarpX::do_single_precision_comms, period); - } - } - -#if (defined WARPX_DIM_RZ) && (defined WARPX_USE_PSATD) - if (pml_rz[lev]) - { - // This is not actually needed with RZ PSATD since the - // arrays are always cell centered. Keep for now since - // it may be useful if the PML is used with FDTD - const std::array pml_rz_E = pml_rz[lev]->GetE_fp(); - const std::array pml_rz_B = pml_rz[lev]->GetB_fp(); - - // Always synchronize nodal points - const amrex::Periodicity& period = Geom(lev).periodicity(); - pml_rz_E[0]->OverrideSync(period); - pml_rz_E[1]->OverrideSync(period); - pml_rz_B[0]->OverrideSync(period); - pml_rz_B[1]->OverrideSync(period); - } -#endif -} diff --git a/Source/Parallelization/WarpXComm_K.H b/Source/Parallelization/WarpXComm_K.H index b97773af251..4cec33bc84f 100644 --- a/Source/Parallelization/WarpXComm_K.H +++ b/Source/Parallelization/WarpXComm_K.H @@ -10,6 +10,20 @@ #include #include +/** + * \brief Interpolation function called within WarpX::UpdateAuxilaryDataSameType + * to interpolate data from the coarse and fine grids to the fine aux grid, + * assuming that all grids have the same staggering (either collocated or staggered). + * + * \param[in] j index along x of the output array + * \param[in] k index along y (in 3D) or z (in 2D) of the output array + * \param[in] l index along z (in 3D, l=0 in 2D) of the output array + * \param[in,out] arr_aux output array where interpolated values are stored + * \param[in] arr_fine input fine-patch array storing the values to interpolate + * \param[in] arr_coarse input coarse-patch array storing the values to interpolate + * \param[in] arr_stag IndexType of the arrays + * \param[in] rr mesh refinement ratios along each direction + */ AMREX_GPU_DEVICE AMREX_FORCE_INLINE void warpx_interp (int j, int k, int l, amrex::Array4 const& arr_aux, @@ -68,561 +82,218 @@ void warpx_interp (int j, int k, int l, arr_aux(j,k,l) = arr_fine(j,k,l) + res; } +/** + * \brief Interpolation function called within WarpX::UpdateAuxilaryDataStagToNodal + * to interpolate data from the coarse and fine grids to the fine aux grid, + * with momentum-conserving field gathering, hence between grids with different staggering, + * and assuming that the aux grid is collocated. + * + * \param[in] j index along x of the output array + * \param[in] k index along y (in 3D) or z (in 2D) of the output array + * \param[in] l index along z (in 3D, l=0 in 2D) of the output array + * \param[in,out] arr_aux output array where interpolated values are stored + * \param[in] arr_fine input fine-patch array storing the values to interpolate + * \param[in] arr_coarse input coarse-patch array storing the values to interpolate + * \param[in] arr_fine_stag IndexType of the fine-patch arrays + * \param[in] arr_coarse_stag IndexType of the coarse-patch arrays + * \param[in] rr mesh refinement ratios along each direction + */ AMREX_GPU_DEVICE AMREX_FORCE_INLINE -void warpx_interp_nd_bfield_x (int j, int k, int l, - amrex::Array4 const& Bxa, - amrex::Array4 const& Bxf, - amrex::Array4 const& Bxc, - amrex::Array4 const& Bxg) +void warpx_interp (int j, int k, int l, + amrex::Array4 const& arr_aux, + amrex::Array4 const& arr_fine, + amrex::Array4 const& arr_coarse, + amrex::Array4 const& arr_tmp, + const amrex::IntVect& arr_fine_stag, + const amrex::IntVect& arr_coarse_stag, + const amrex::IntVect& rr) { using namespace amrex; - // Pad Bxf with zeros beyond ghost cells for out-of-bound accesses - const auto Bxf_zeropad = [Bxf] (const int jj, const int kk, const int ll) noexcept + // Pad input arrays with zeros beyond ghost cells + // for out-of-bound accesses due to large-stencil operations + const auto arr_fine_zeropad = [arr_fine] (const int jj, const int kk, const int ll) noexcept { - return Bxf.contains(jj,kk,ll) ? Bxf(jj,kk,ll) : 0.0_rt; + return arr_fine.contains(jj,kk,ll) ? arr_fine(jj,kk,ll) : 0.0_rt; + }; + const auto arr_coarse_zeropad = [arr_coarse] (const int jj, const int kk, const int ll) noexcept + { + return arr_coarse.contains(jj,kk,ll) ? arr_coarse(jj,kk,ll) : 0.0_rt; + }; + const auto arr_tmp_zeropad = [arr_tmp] (const int jj, const int kk, const int ll) noexcept + { + return arr_tmp.contains(jj,kk,ll) ? arr_tmp(jj,kk,ll) : 0.0_rt; }; - const int jg = amrex::coarsen(j,2); - const Real wx = (j == jg*2) ? 0.0_rt : 0.5_rt; - const Real owx = 1.0_rt-wx; - - const int kg = amrex::coarsen(k,2); - Real wy = 0.0_rt; - Real owy = 0.0_rt; - wy = (k == kg*2) ? 0.0_rt : 0.5_rt; - owy = 1.0_rt-wy; + // NOTE Indices (j,k,l) in the following refer to: + // - (z,-,-) in 1D + // - (x,z,-) in 2D + // - (r,z,-) in RZ + // - (x,y,z) in 3D + // Refinement ratio + const int rj = rr[0]; #if defined(WARPX_DIM_1D_Z) - - // interp from coarse nodal to fine nodal - const Real bg = owx * Bxg(jg ,0,0) - + wx * Bxg(jg+1,0,0); - - // interp from coarse staggered to fine nodal - const Real bc = owx * Bxc(jg ,0,0) - + wx * Bxc(jg+1,0,0); - - // interp from fine staggered to fine nodal - const Real bf = 0.5_rt*(Bxf_zeropad(j-1,0,0) + Bxf_zeropad(j,0,0)); - amrex::ignore_unused(owy); - + constexpr int rk = 1; + constexpr int rl = 1; #elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ) - - // interp from coarse nodal to fine nodal - const Real bg = owx * owy * Bxg(jg ,kg ,0) - + owx * wy * Bxg(jg ,kg+1,0) - + wx * owy * Bxg(jg+1,kg ,0) - + wx * wy * Bxg(jg+1,kg+1,0); - - // interp from coarse staggered to fine nodal - wy = 0.5_rt-wy; owy = 1.0_rt-wy; - const Real bc = owx * owy * Bxc(jg ,kg ,0) - + owx * wy * Bxc(jg ,kg-1,0) - + wx * owy * Bxc(jg+1,kg ,0) - + wx * wy * Bxc(jg+1,kg-1,0); - - // interp from fine staggered to fine nodal - const Real bf = 0.5_rt*(Bxf_zeropad(j,k-1,0) + Bxf_zeropad(j,k,0)); - + const int rk = rr[1]; + constexpr int rl = 1; #else - - const int lg = amrex::coarsen(l,2); - Real wz = (l == lg*2) ? 0.0_rt : 0.5_rt; - Real owz = 1.0_rt-wz; - - // interp from coarse nodal to fine nodal - const Real bg = owx * owy * owz * Bxg(jg ,kg ,lg ) - + wx * owy * owz * Bxg(jg+1,kg ,lg ) - + owx * wy * owz * Bxg(jg ,kg+1,lg ) - + wx * wy * owz * Bxg(jg+1,kg+1,lg ) - + owx * owy * wz * Bxg(jg ,kg ,lg+1) - + wx * owy * wz * Bxg(jg+1,kg ,lg+1) - + owx * wy * wz * Bxg(jg ,kg+1,lg+1) - + wx * wy * wz * Bxg(jg+1,kg+1,lg+1); - - // interp from coarse staggered to fine nodal - wy = 0.5_rt-wy; owy = 1.0_rt-wy; - wz = 0.5_rt-wz; owz = 1.0_rt-wz; - const Real bc = owx * owy * owz * Bxc(jg ,kg ,lg ) - + wx * owy * owz * Bxc(jg+1,kg ,lg ) - + owx * wy * owz * Bxc(jg ,kg-1,lg ) - + wx * wy * owz * Bxc(jg+1,kg-1,lg ) - + owx * owy * wz * Bxc(jg ,kg ,lg-1) - + wx * owy * wz * Bxc(jg+1,kg ,lg-1) - + owx * wy * wz * Bxc(jg ,kg-1,lg-1) - + wx * wy * wz * Bxc(jg+1,kg-1,lg-1); - - // interp from fine stagged to fine nodal - const Real bf = 0.25_rt*(Bxf_zeropad(j,k-1,l-1) + Bxf_zeropad(j,k,l-1) + Bxf_zeropad(j,k-1,l) + Bxf_zeropad(j,k,l)); + const int rk = rr[1]; + const int rl = rr[2]; #endif - Bxa(j,k,l) = bg + (bf-bc); -} - -AMREX_GPU_DEVICE AMREX_FORCE_INLINE -void warpx_interp_nd_bfield_y (int j, int k, int l, - amrex::Array4 const& Bya, - amrex::Array4 const& Byf, - amrex::Array4 const& Byc, - amrex::Array4 const& Byg) -{ - using namespace amrex; - - // Pad Byf with zeros beyond ghost cells for out-of-bound accesses - const auto Byf_zeropad = [Byf] (const int jj, const int kk, const int ll) noexcept - { - return Byf.contains(jj,kk,ll) ? Byf(jj,kk,ll) : 0.0_rt; - }; - - const int jg = amrex::coarsen(j,2); - Real wx, owx; - wx = (j == jg*2) ? 0.0_rt : 0.5_rt; - owx = 1.0_rt-wx; - - const int kg = amrex::coarsen(k,2); - Real wy = 0.0_rt; - Real owy = 0.0_rt; - wy = (k == kg*2) ? 0.0_rt : 0.5_rt; - owy = 1.0_rt-wy; + // Staggering of fine array (0: cell-centered; 1: nodal) + const int sj_fp = arr_fine_stag[0]; +#if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ) + const int sk_fp = arr_fine_stag[1]; +#elif defined(WARPX_DIM_3D) + const int sk_fp = arr_fine_stag[1]; + const int sl_fp = arr_fine_stag[2]; +#endif + // Staggering of coarse array (0: cell-centered; 1: nodal) + const int sj_cp = arr_coarse_stag[0]; #if defined(WARPX_DIM_1D_Z) - - // interp from coarse nodal to fine nodal - const Real bg = owx * Byg(jg ,0,0) - + wx * Byg(jg+1,0,0); - - // interp from coarse staggered to fine nodal - const Real bc = owx * Byc(jg ,0,0) - + wx * Byc(jg+1,0,0); - - // interp from fine staggered to fine nodal - const Real bf = 0.5_rt*(Byf_zeropad(j-1,0,0) + Byf_zeropad(j,0,0)); - amrex::ignore_unused(owy); - + constexpr int sk_cp = 0; + constexpr int sl_cp = 0; #elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ) - - // interp from coarse nodal to fine nodal - const Real bg = owx * owy * Byg(jg ,kg ,0) - + owx * wy * Byg(jg ,kg+1,0) - + wx * owy * Byg(jg+1,kg ,0) - + wx * wy * Byg(jg+1,kg+1,0); - - // interp from coarse stagged (cell-centered for By) to fine nodal - wx = 0.5_rt-wx; owx = 1.0_rt-wx; - wy = 0.5_rt-wy; owy = 1.0_rt-wy; - const Real bc = owx * owy * Byc(jg ,kg ,0) - + owx * wy * Byc(jg ,kg-1,0) - + wx * owy * Byc(jg-1,kg ,0) - + wx * wy * Byc(jg-1,kg-1,0); - - // interp form fine stagger (cell-centered for By) to fine nodal - const Real bf = 0.25_rt*(Byf_zeropad(j,k,0) + Byf_zeropad(j-1,k,0) + Byf_zeropad(j,k-1,0) + Byf_zeropad(j-1,k-1,0)); - + const int sk_cp = arr_coarse_stag[1]; + constexpr int sl_cp = 0; #else - - const int lg = amrex::coarsen(l,2); - Real wz = (l == lg*2) ? 0.0_rt : 0.5_rt; - Real owz = 1.0_rt-wz; - - // interp from coarse nodal to fine nodal - const Real bg = owx * owy * owz * Byg(jg ,kg ,lg ) - + wx * owy * owz * Byg(jg+1,kg ,lg ) - + owx * wy * owz * Byg(jg ,kg+1,lg ) - + wx * wy * owz * Byg(jg+1,kg+1,lg ) - + owx * owy * wz * Byg(jg ,kg ,lg+1) - + wx * owy * wz * Byg(jg+1,kg ,lg+1) - + owx * wy * wz * Byg(jg ,kg+1,lg+1) - + wx * wy * wz * Byg(jg+1,kg+1,lg+1); - - // interp from coarse staggered to fine nodal - wx = 0.5_rt-wx; owx = 1.0_rt-wx; - wz = 0.5_rt-wz; owz = 1.0_rt-wz; - const Real bc = owx * owy * owz * Byc(jg ,kg ,lg ) - + wx * owy * owz * Byc(jg-1,kg ,lg ) - + owx * wy * owz * Byc(jg ,kg+1,lg ) - + wx * wy * owz * Byc(jg-1,kg+1,lg ) - + owx * owy * wz * Byc(jg ,kg ,lg-1) - + wx * owy * wz * Byc(jg-1,kg ,lg-1) - + owx * wy * wz * Byc(jg ,kg+1,lg-1) - + wx * wy * wz * Byc(jg-1,kg+1,lg-1); - - // interp from fine stagged to fine nodal - const Real bf = 0.25_rt*(Byf_zeropad(j-1,k,l-1) + Byf_zeropad(j,k,l-1) + Byf_zeropad(j-1,k,l) + Byf_zeropad(j,k,l)); - + const int sk_cp = arr_coarse_stag[1]; + const int sl_cp = arr_coarse_stag[2]; #endif - Bya(j,k,l) = bg + (bf-bc); -} + // Number of points used for interpolation from coarse grid to fine grid + int nj; + int nk; + int nl; -AMREX_GPU_DEVICE AMREX_FORCE_INLINE -void warpx_interp_nd_bfield_z (int j, int k, int l, - amrex::Array4 const& Bza, - amrex::Array4 const& Bzf, - amrex::Array4 const& Bzc, - amrex::Array4 const& Bzg) -{ - using namespace amrex; + int jc = amrex::coarsen(j, rj); + int kc = amrex::coarsen(k, rk); + int lc = amrex::coarsen(l, rl); - // Pad Bzf with zeros beyond ghost cells for out-of-bound accesses - const auto Bzf_zeropad = [Bzf] (const int jj, const int kk, const int ll) noexcept - { - return Bzf.contains(jj,kk,ll) ? Bzf(jj,kk,ll) : 0.0_rt; - }; + amrex::Real tmp = 0.0_rt; + amrex::Real fine = 0.0_rt; + amrex::Real coarse = 0.0_rt; - const int jg = amrex::coarsen(j,2); - Real wx, owx; - wx = (j == jg*2) ? 0.0_rt : 0.5_rt; - owx = 1.0_rt-wx; + amrex::Real wj; + amrex::Real wk; + amrex::Real wl; - const int kg = amrex::coarsen(k,2); - Real wy = 0.0_rt; - Real owy = 0.0_rt; - wy = (k == kg*2) ? 0.0_rt : 0.5_rt; - owy = 1.0_rt-wy; + // 1) Interpolation from coarse nodal to fine nodal + nj = 2; #if defined(WARPX_DIM_1D_Z) - - // interp from coarse nodal to fine nodal - const Real bg = owx * Bzg(jg ,0,0) - + wx * Bzg(jg+1,0,0); - - // interp from coarse staggered to fine nodal - const Real bc = owx * Bzc(jg ,0,0) - + wx * Bzc(jg+1,0,0); - - // interp from fine staggered to fine nodal - const Real bf = Bzf_zeropad(j,0,0); - amrex::ignore_unused(owy); - + nk = 1; + nl = 1; #elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ) - - // interp from coarse nodal to fine nodal - const Real bg = owx * owy * Bzg(jg ,kg ,0) - + owx * wy * Bzg(jg ,kg+1,0) - + wx * owy * Bzg(jg+1,kg ,0) - + wx * wy * Bzg(jg+1,kg+1,0); - - // interp from coarse staggered to fine nodal - wx = 0.5_rt-wx; owx = 1.0_rt-wx; - const Real bc = owx * owy * Bzc(jg ,kg ,0) - + owx * wy * Bzc(jg ,kg+1,0) - + wx * owy * Bzc(jg-1,kg ,0) - + wx * wy * Bzc(jg-1,kg+1,0); - - // interp from fine staggered to fine nodal - const Real bf = 0.5_rt*(Bzf_zeropad(j-1,k,0) + Bzf_zeropad(j,k,0)); - + nk = 2; + nl = 1; #else - - const int lg = amrex::coarsen(l,2); - const Real wz = (l == lg*2) ? 0.0_rt : 0.5_rt; - const Real owz = 1.0_rt-wz; - - // interp from coarse nodal to fine nodal - const Real bg = owx * owy * owz * Bzg(jg ,kg ,lg ) - + wx * owy * owz * Bzg(jg+1,kg ,lg ) - + owx * wy * owz * Bzg(jg ,kg+1,lg ) - + wx * wy * owz * Bzg(jg+1,kg+1,lg ) - + owx * owy * wz * Bzg(jg ,kg ,lg+1) - + wx * owy * wz * Bzg(jg+1,kg ,lg+1) - + owx * wy * wz * Bzg(jg ,kg+1,lg+1) - + wx * wy * wz * Bzg(jg+1,kg+1,lg+1); - - // interp from coarse staggered to fine nodal - wx = 0.5_rt-wx; owx = 1.0_rt-wx; - wy = 0.5_rt-wy; owy = 1.0_rt-wy; - const Real bc = owx * owy * owz * Bzc(jg ,kg ,lg ) - + wx * owy * owz * Bzc(jg-1,kg ,lg ) - + owx * wy * owz * Bzc(jg ,kg-1,lg ) - + wx * wy * owz * Bzc(jg-1,kg-1,lg ) - + owx * owy * wz * Bzc(jg ,kg ,lg+1) - + wx * owy * wz * Bzc(jg-1,kg ,lg+1) - + owx * wy * wz * Bzc(jg ,kg-1,lg+1) - + wx * wy * wz * Bzc(jg-1,kg-1,lg+1); - - // interp from fine stagged to fine nodal - const Real bf = 0.25_rt*(Bzf_zeropad(j-1,k-1,l) + Bzf_zeropad(j,k-1,l) + Bzf_zeropad(j-1,k,l) + Bzf_zeropad(j,k,l)); - + nk = 2; + nl = 2; #endif - Bza(j,k,l) = bg + (bf-bc); -} - -AMREX_GPU_DEVICE AMREX_FORCE_INLINE -void warpx_interp_nd_efield_x (int j, int k, int l, - amrex::Array4 const& Exa, - amrex::Array4 const& Exf, - amrex::Array4 const& Exc, - amrex::Array4 const& Exg) -{ - using namespace amrex; - - // Pad Exf with zeros beyond ghost cells for out-of-bound accesses - const auto Exf_zeropad = [Exf] (const int jj, const int kk, const int ll) noexcept - { - return Exf.contains(jj,kk,ll) ? Exf(jj,kk,ll) : 0.0_rt; - }; - - const int jg = amrex::coarsen(j,2); - Real wx, owx; - wx = (j == jg*2) ? 0.0_rt : 0.5_rt; - owx = 1.0_rt-wx; + wj = 1.0_rt; + wk = 1.0_rt; + wl = 1.0_rt; + for (int jj = 0; jj < nj; jj++) { + for (int kk = 0; kk < nk; kk++) { + for (int ll = 0; ll < nl; ll++) { + wj = (rj - amrex::Math::abs(j - (jc + jj) * rj)) / static_cast(rj); +#if (AMREX_SPACEDIM >= 2) + wk = (rk - amrex::Math::abs(k - (kc + kk) * rk)) / static_cast(rk); +#endif +#if (AMREX_SPACEDIM == 3) + wl = (rl - amrex::Math::abs(l - (lc + ll) * rl)) / static_cast(rl); +#endif + tmp += wj * wk * wl * arr_tmp_zeropad(jc+jj,kc+kk,lc+ll); + } + } + } - const int kg = amrex::coarsen(k,2); - Real wy = 0.0_rt; - Real owy =0.0_rt; - wy = (k == kg*2) ? 0.0_rt : 0.5_rt; - owy = 1.0_rt-wy; + // 2) Interpolation from coarse staggered to fine nodal + nj = 2; #if defined(WARPX_DIM_1D_Z) - - // interp from coarse nodal to fine nodal - const Real eg = owx * Exg(jg ,0,0) - + wx * Exg(jg+1,0,0); - - // interp from coarse staggered to fine nodal - const Real ec = owx * Exc(jg ,0,0) - + wx * Exc(jg+1,0,0); - - // interp from fine staggered to fine nodal - const Real ef = Exf_zeropad(j,0,0); - amrex::ignore_unused(owy); - + nk = 1; + nl = 1; #elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ) - - // interp from coarse nodal to fine nodal - const Real eg = owx * owy * Exg(jg ,kg ,0) - + owx * wy * Exg(jg ,kg+1,0) - + wx * owy * Exg(jg+1,kg ,0) - + wx * wy * Exg(jg+1,kg+1,0); - - // interp from coarse staggered to fine nodal - wx = 0.5_rt-wx; owx = 1.0_rt-wx; - const Real ec = owx * owy * Exc(jg ,kg ,0) - + owx * wy * Exc(jg ,kg+1,0) - + wx * owy * Exc(jg-1,kg ,0) - + wx * wy * Exc(jg-1,kg+1,0); - - // interp from fine staggered to fine nodal - const Real ef = 0.5_rt*(Exf_zeropad(j-1,k,0) + Exf_zeropad(j,k,0)); - + nk = 2; + nl = 1; #else - - const int lg = amrex::coarsen(l,2); - const Real wz = (l == lg*2) ? 0.0 : 0.5; - const Real owz = 1.0_rt-wz; - - // interp from coarse nodal to fine nodal - const Real eg = owx * owy * owz * Exg(jg ,kg ,lg ) - + wx * owy * owz * Exg(jg+1,kg ,lg ) - + owx * wy * owz * Exg(jg ,kg+1,lg ) - + wx * wy * owz * Exg(jg+1,kg+1,lg ) - + owx * owy * wz * Exg(jg ,kg ,lg+1) - + wx * owy * wz * Exg(jg+1,kg ,lg+1) - + owx * wy * wz * Exg(jg ,kg+1,lg+1) - + wx * wy * wz * Exg(jg+1,kg+1,lg+1); - - // interp from coarse staggered to fine nodal - wx = 0.5_rt-wx; owx = 1.0_rt-wx; - const Real ec = owx * owy * owz * Exc(jg ,kg ,lg ) - + wx * owy * owz * Exc(jg-1,kg ,lg ) - + owx * wy * owz * Exc(jg ,kg+1,lg ) - + wx * wy * owz * Exc(jg-1,kg+1,lg ) - + owx * owy * wz * Exc(jg ,kg ,lg+1) - + wx * owy * wz * Exc(jg-1,kg ,lg+1) - + owx * wy * wz * Exc(jg ,kg+1,lg+1) - + wx * wy * wz * Exc(jg-1,kg+1,lg+1); - - // interp from fine staggered to fine nodal - const Real ef = 0.5_rt*(Exf_zeropad(j-1,k,l) + Exf_zeropad(j,k,l)); - + nk = 2; + nl = 2; #endif - Exa(j,k,l) = eg + (ef-ec); -} - -AMREX_GPU_DEVICE AMREX_FORCE_INLINE -void warpx_interp_nd_efield_y (int j, int k, int l, - amrex::Array4 const& Eya, - amrex::Array4 const& Eyf, - amrex::Array4 const& Eyc, - amrex::Array4 const& Eyg) -{ - using namespace amrex; + const int jn = (sj_cp == 1) ? j : j - rj / 2; + const int kn = (sk_cp == 1) ? k : k - rk / 2; + const int ln = (sl_cp == 1) ? l : l - rl / 2; - // Pad Eyf with zeros beyond ghost cells for out-of-bound accesses - const auto Eyf_zeropad = [Eyf] (const int jj, const int kk, const int ll) noexcept - { - return Eyf.contains(jj,kk,ll) ? Eyf(jj,kk,ll) : 0.0_rt; - }; + jc = amrex::coarsen(jn, rj); + kc = amrex::coarsen(kn, rk); + lc = amrex::coarsen(ln, rl); - const int jg = amrex::coarsen(j,2); - const Real wx = (j == jg*2) ? 0.0_rt : 0.5_rt; - const Real owx = 1.0_rt-wx; + wj = 1.0_rt; + wk = 1.0_rt; + wl = 1.0_rt; + for (int jj = 0; jj < nj; jj++) { + for (int kk = 0; kk < nk; kk++) { + for (int ll = 0; ll < nl; ll++) { + wj = (rj - amrex::Math::abs(jn - (jc + jj) * rj)) / static_cast(rj); +#if (AMREX_SPACEDIM >= 2) + wk = (rk - amrex::Math::abs(kn - (kc + kk) * rk)) / static_cast(rk); +#endif +#if (AMREX_SPACEDIM == 3) + wl = (rl - amrex::Math::abs(ln - (lc + ll) * rl)) / static_cast(rl); +#endif + coarse += wj * wk * wl * arr_coarse_zeropad(jc+jj,kc+kk,lc+ll); + } + } + } - const int kg = amrex::coarsen(k,2); - Real wy = 0.0_rt; - Real owy = 0.0_rt; - wy = (k == kg*2) ? 0.0_rt : 0.5_rt; - owy = 1.0_rt-wy; + // 3) Interpolation from fine staggered to fine nodal + nj = (sj_fp == 0) ? 2 : 1; #if defined(WARPX_DIM_1D_Z) - - // interp from coarse nodal to fine nodal - const Real eg = owx * Eyg(jg ,0,0) - + wx * Eyg(jg+1,0,0); - - // interp from coarse staggered to fine nodal - const Real ec = owx * Eyc(jg ,0,0) - + wx * Eyc(jg+1,0,0); - - // interp from fine staggered to fine nodal - const Real ef = Eyf_zeropad(j,0,0); - amrex::ignore_unused(owy); - + nk = 1; + nl = 1; #elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ) - - // interp from coarse nodal to fine nodal - const Real eg = owx * owy * Eyg(jg ,kg ,0) - + owx * wy * Eyg(jg ,kg+1,0) - + wx * owy * Eyg(jg+1,kg ,0) - + wx * wy * Eyg(jg+1,kg+1,0); - - // interp from coarse staggered to fine nodal (Eyc is actually nodal) - const Real ec = owx * owy * Eyc(jg ,kg ,0) - + owx * wy * Eyc(jg ,kg+1,0) - + wx * owy * Eyc(jg+1,kg ,0) - + wx * wy * Eyc(jg+1,kg+1,0); - - // interp from fine staggered to fine nodal - const Real ef = Eyf_zeropad(j,k,0); - + nk = (sk_fp == 0) ? 2 : 1; + nl = 1; #else - - const int lg = amrex::coarsen(l,2); - const Real wz = (l == lg*2) ? 0.0 : 0.5; - const Real owz = 1.0_rt-wz; - - // interp from coarse nodal to fine nodal - const Real eg = owx * owy * owz * Eyg(jg ,kg ,lg ) - + wx * owy * owz * Eyg(jg+1,kg ,lg ) - + owx * wy * owz * Eyg(jg ,kg+1,lg ) - + wx * wy * owz * Eyg(jg+1,kg+1,lg ) - + owx * owy * wz * Eyg(jg ,kg ,lg+1) - + wx * owy * wz * Eyg(jg+1,kg ,lg+1) - + owx * wy * wz * Eyg(jg ,kg+1,lg+1) - + wx * wy * wz * Eyg(jg+1,kg+1,lg+1); - - // interp from coarse staggered to fine nodal - wy = 0.5_rt-wy; owy = 1.0_rt-wy; - const Real ec = owx * owy * owz * Eyc(jg ,kg ,lg ) - + wx * owy * owz * Eyc(jg+1,kg ,lg ) - + owx * wy * owz * Eyc(jg ,kg-1,lg ) - + wx * wy * owz * Eyc(jg+1,kg-1,lg ) - + owx * owy * wz * Eyc(jg ,kg ,lg+1) - + wx * owy * wz * Eyc(jg+1,kg ,lg+1) - + owx * wy * wz * Eyc(jg ,kg-1,lg+1) - + wx * wy * wz * Eyc(jg+1,kg-1,lg+1); - - // interp from fine staggered to fine nodal - const Real ef = 0.5_rt*(Eyf_zeropad(j,k-1,l) + Eyf_zeropad(j,k,l)); - + nk = (sk_fp == 0) ? 2 : 1; + nl = (sl_fp == 0) ? 2 : 1; #endif - Eya(j,k,l) = eg + (ef-ec); -} - -AMREX_GPU_DEVICE AMREX_FORCE_INLINE -void warpx_interp_nd_efield_z (int j, int k, int l, - amrex::Array4 const& Eza, - amrex::Array4 const& Ezf, - amrex::Array4 const& Ezc, - amrex::Array4 const& Ezg) -{ - using namespace amrex; - - // Pad Ezf with zeros beyond ghost cells for out-of-bound accesses - const auto Ezf_zeropad = [Ezf] (const int jj, const int kk, const int ll) noexcept - { - return Ezf.contains(jj,kk,ll) ? Ezf(jj,kk,ll) : 0.0_rt; - }; - - const int jg = amrex::coarsen(j,2); - const Real wx = (j == jg*2) ? 0.0_rt : 0.5_rt; - const Real owx = 1.0_rt-wx; - - const int kg = amrex::coarsen(k,2); - Real wy = 0.0_rt; - Real owy = 0.0_rt; - wy = (k == kg*2) ? 0.0_rt : 0.5_rt; - owy = 1.0_rt-wy; - + const int jm = (sj_fp == 0) ? j-1 : j; #if defined(WARPX_DIM_1D_Z) - - // interp from coarse nodal to fine nodal - const Real eg = owx * Ezg(jg ,0,0) - + wx * Ezg(jg+1,0,0); - - // interp from coarse staggered to fine nodal - const Real ec = owx * Ezc(jg ,0,0) - + wx * Ezc(jg+1,0,0); - - // interp from fine staggered to fine nodal - const Real ef = 0.5_rt*(Ezf_zeropad(j-1,0,0) + Ezf_zeropad(j,0,0)); - amrex::ignore_unused(owy); - + const int km = k; + const int lm = l; #elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ) - - // interp from coarse nodal to fine nodal - const Real eg = owx * owy * Ezg(jg ,kg ,0) - + owx * wy * Ezg(jg ,kg+1,0) - + wx * owy * Ezg(jg+1,kg ,0) - + wx * wy * Ezg(jg+1,kg+1,0); - - // interp from coarse stagged to fine nodal - wy = 0.5_rt-wy; owy = 1.0_rt-wy; - const Real ec = owx * owy * Ezc(jg ,kg ,0) - + owx * wy * Ezc(jg ,kg-1,0) - + wx * owy * Ezc(jg+1,kg ,0) - + wx * wy * Ezc(jg+1,kg-1,0); - - // interp from fine staggered to fine nodal - const Real ef = 0.5_rt*(Ezf_zeropad(j,k-1,0) + Ezf_zeropad(j,k,0)); - + const int km = (sk_fp == 0) ? k-1 : k; + const int lm = l; #else - - const int lg = amrex::coarsen(l,2); - Real wz = (l == lg*2) ? 0.0_rt : 0.5_rt; - Real owz = 1.0_rt-wz; - - // interp from coarse nodal to fine nodal - const Real eg = owx * owy * owz * Ezg(jg ,kg ,lg ) - + wx * owy * owz * Ezg(jg+1,kg ,lg ) - + owx * wy * owz * Ezg(jg ,kg+1,lg ) - + wx * wy * owz * Ezg(jg+1,kg+1,lg ) - + owx * owy * wz * Ezg(jg ,kg ,lg+1) - + wx * owy * wz * Ezg(jg+1,kg ,lg+1) - + owx * wy * wz * Ezg(jg ,kg+1,lg+1) - + wx * wy * wz * Ezg(jg+1,kg+1,lg+1); - - // interp from coarse staggered to fine nodal - wz = 0.5_rt-wz; owz = 1.0_rt-wz; - const Real ec = owx * owy * owz * Ezc(jg ,kg ,lg ) - + wx * owy * owz * Ezc(jg+1,kg ,lg ) - + owx * wy * owz * Ezc(jg ,kg+1,lg ) - + wx * wy * owz * Ezc(jg+1,kg+1,lg ) - + owx * owy * wz * Ezc(jg ,kg ,lg-1) - + wx * owy * wz * Ezc(jg+1,kg ,lg-1) - + owx * wy * wz * Ezc(jg ,kg+1,lg-1) - + wx * wy * wz * Ezc(jg+1,kg+1,lg-1); - - // interp from fine staggered to fine nodal - const Real ef = 0.5_rt*(Ezf_zeropad(j,k,l-1) + Ezf_zeropad(j,k,l)); - + const int km = (sk_fp == 0) ? k-1 : k; + const int lm = (sl_fp == 0) ? l-1 : l; #endif - Eza(j,k,l) = eg + (ef-ec); + for (int jj = 0; jj < nj; jj++) { + for (int kk = 0; kk < nk; kk++) { + for (int ll = 0; ll < nl; ll++) { + wj = 1.0_rt / static_cast(nj); + wk = 1.0_rt / static_cast(nk); + wl = 1.0_rt / static_cast(nl); + fine += wj * wk * wl * arr_fine_zeropad(jm+jj,km+kk,lm+ll); + } + } + } + + // Final result + arr_aux(j,k,l) = tmp + (fine - coarse); } /** diff --git a/Source/Particles/Collision/BinaryCollision/BinaryCollision.H b/Source/Particles/Collision/BinaryCollision/BinaryCollision.H index 5473f0093f0..9cf2f91b5aa 100644 --- a/Source/Particles/Collision/BinaryCollision/BinaryCollision.H +++ b/Source/Particles/Collision/BinaryCollision/BinaryCollision.H @@ -97,6 +97,10 @@ public: const amrex::ParmParse pp_collision_name(collision_name); pp_collision_name.queryarr("product_species", m_product_species); m_have_product_species = !m_product_species.empty(); + if ((std::is_same::value) & (m_have_product_species)) { + WARPX_ABORT_WITH_MESSAGE( "Binary collision " + collision_name + + " does not produce species. Thus, `product_species` should not be specified in the input script." ); + } m_copy_transform_functor = CopyTransformFunctorType(collision_name, mypc); } diff --git a/Source/Particles/Collision/BinaryCollision/ParticleCreationFunc.H b/Source/Particles/Collision/BinaryCollision/ParticleCreationFunc.H index 2b43f76e89f..7fdf04de6ca 100644 --- a/Source/Particles/Collision/BinaryCollision/ParticleCreationFunc.H +++ b/Source/Particles/Collision/BinaryCollision/ParticleCreationFunc.H @@ -115,7 +115,7 @@ public: { using namespace amrex::literals; - if (n_total_pairs == 0) return {m_num_product_species, 0}; + if (n_total_pairs == 0) return amrex::Vector(m_num_product_species, 0); // Compute offset array and allocate memory for the produced species amrex::Gpu::DeviceVector offsets(n_total_pairs); diff --git a/Source/Utils/SpeciesUtils.cpp b/Source/Utils/SpeciesUtils.cpp index 2ac5879f9d1..bdba97f10b2 100644 --- a/Source/Utils/SpeciesUtils.cpp +++ b/Source/Utils/SpeciesUtils.cpp @@ -42,14 +42,6 @@ namespace SpeciesUtils { const bool mass_is_specified = utils::parser::queryWithParser(pp_species_name, "mass", mass); - if ( charge_is_specified && species_is_specified ){ - ablastr::warn_manager::WMRecordWarning("Species", - "Both '" + species_name + ".charge' and " + - species_name + ".species_type' are specified.\n" + - species_name + ".charge' will take precedence.\n"); - - } - WARPX_ALWAYS_ASSERT_WITH_MESSAGE( charge_is_specified || species_is_specified || @@ -58,6 +50,22 @@ namespace SpeciesUtils { species_name + "'." ); + WARPX_ALWAYS_ASSERT_WITH_MESSAGE( + mass_is_specified || + species_is_specified || + (injection_style == "external_file"), + "Need to specify at least one of species_type or mass for species '" + + species_name + "'." + ); + + if ( charge_is_specified && species_is_specified ){ + ablastr::warn_manager::WMRecordWarning("Species", + "Both '" + species_name + ".charge' and " + + species_name + ".species_type' are specified.\n" + + species_name + ".charge' will take precedence.\n"); + + } + if ( mass_is_specified && species_is_specified ){ ablastr::warn_manager::WMRecordWarning("Species", "Both '" + species_name + ".mass' and " + diff --git a/Source/WarpX.H b/Source/WarpX.H index 034c321ba14..1bd3f5bf46c 100644 --- a/Source/WarpX.H +++ b/Source/WarpX.H @@ -726,21 +726,6 @@ public: void CopyJPML (); bool isAnyBoundaryPML(); - /** - * \brief Synchronize the nodal points of the PML MultiFabs - */ - void NodalSyncPML (); - - /** - * \brief Synchronize the nodal points of the PML MultiFabs for given MR level - */ - void NodalSyncPML (int lev); - - /** - * \brief Synchronize the nodal points of the PML MultiFabs for given MR level and patch - */ - void NodalSyncPML (int lev, PatchType patch_type); - PML* GetPML (int lev); #if (defined WARPX_DIM_RZ) && (defined WARPX_USE_PSATD) PML_RZ* GetPML_RZ (int lev); diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp index 348b3125f57..6be8d517775 100644 --- a/Source/WarpX.cpp +++ b/Source/WarpX.cpp @@ -1198,6 +1198,18 @@ WarpX::ReadParameters () // Use same shape factors in all directions, for gathering if (field_gathering_algo == GatheringAlgo::MomentumConserving) galerkin_interpolation = false; + // With the PSATD solver, momentum-conserving field gathering + // combined with mesh refinement does not seem to work correctly + // TODO Needs debugging + if (electromagnetic_solver_id == ElectromagneticSolverAlgo::PSATD && + field_gathering_algo == GatheringAlgo::MomentumConserving && + maxLevel() > 0) + { + WARPX_ABORT_WITH_MESSAGE( + "With the PSATD solver, momentum-conserving field gathering" + " combined with mesh refinement is currently not implemented"); + } + em_solver_medium = GetAlgorithmInteger(pp_algo, "em_solver_medium"); if (em_solver_medium == MediumForEM::Macroscopic ) { macroscopic_solver_algo = GetAlgorithmInteger(pp_algo,"macroscopic_sigma_method"); diff --git a/Source/ablastr/utils/Communication.H b/Source/ablastr/utils/Communication.H index 9be9fec1e58..85f58ff9f65 100644 --- a/Source/ablastr/utils/Communication.H +++ b/Source/ablastr/utils/Communication.H @@ -60,7 +60,8 @@ void ParallelAdd (amrex::MultiFab &dst, void FillBoundary (amrex::MultiFab &mf, bool do_single_precision_comms, - const amrex::Periodicity &period = amrex::Periodicity::NonPeriodic()); + const amrex::Periodicity &period = amrex::Periodicity::NonPeriodic(), + std::optional nodal_sync=std::nullopt); void FillBoundary (amrex::MultiFab &mf, amrex::IntVect ng, @@ -77,7 +78,7 @@ void FillBoundary (amrex::iMultiFab& mf, void FillBoundary(amrex::Vector const &mf, bool do_single_precision_comms, - const amrex::Periodicity &period); + const amrex::Periodicity &period, std::optional nodal_sync=std::nullopt); void SumBoundary (amrex::MultiFab &mf, diff --git a/Source/ablastr/utils/Communication.cpp b/Source/ablastr/utils/Communication.cpp index 3ae18ad8d1c..c33e6a1d154 100644 --- a/Source/ablastr/utils/Communication.cpp +++ b/Source/ablastr/utils/Communication.cpp @@ -114,18 +114,18 @@ void FillBoundary (amrex::MultiFab &mf, } } -void FillBoundary (amrex::MultiFab &mf, bool do_single_precision_comms, const amrex::Periodicity &period) +void FillBoundary (amrex::MultiFab &mf, bool do_single_precision_comms, const amrex::Periodicity &period, std::optional nodal_sync) { amrex::IntVect const ng = mf.n_grow; - FillBoundary(mf, ng, do_single_precision_comms, period); + FillBoundary(mf, ng, do_single_precision_comms, period, nodal_sync); } void FillBoundary (amrex::Vector const &mf, bool do_single_precision_comms, - const amrex::Periodicity &period) + const amrex::Periodicity &period, std::optional nodal_sync) { for (auto x : mf) { - ablastr::utils::communication::FillBoundary(*x, do_single_precision_comms, period); + ablastr::utils::communication::FillBoundary(*x, do_single_precision_comms, period, nodal_sync); } } diff --git a/Tools/Algorithms/psatd_pml.ipynb b/Tools/Algorithms/psatd_pml.ipynb new file mode 100644 index 00000000000..216be77fd87 --- /dev/null +++ b/Tools/Algorithms/psatd_pml.ipynb @@ -0,0 +1,607 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "import inspect\n", + "import sympy as sp\n", + "from sympy import *\n", + "from sympy.solvers.solveset import linsolve\n", + "\n", + "sp.init_session()\n", + "sp.init_printing()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Global variables:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Dimensional parameters\n", + "dd = 9\n", + "DD = 12\n", + "\n", + "# Speed of light, time, time step\n", + "c = sp.symbols(r'c', real = True, positive = True)\n", + "t = sp.symbols(r't', real = True)\n", + "tn = sp.symbols(r't_n', real = True)\n", + "dt = sp.symbols(r'\\Delta{t}', real = True, positive = True)\n", + "\n", + "# Components of k vector, omega, norm of k vector\n", + "kx = sp.symbols(r'k_x', real = True)\n", + "ky = sp.symbols(r'k_y', real = True)\n", + "kz = sp.symbols(r'k_z', real = True)\n", + "om = sp.symbols(r'omega', real = True, positive = True)\n", + "knorm = sp.sqrt(kx**2 + ky**2 + kz**2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Method:\n", + "The goal is to solve a system of second-order ordinary differential equations in time of the form $\\boldsymbol{\\ddot{X}} = M \\boldsymbol{X}$, where the $d \\times d$ matrix $M$ has eigenpairs $(\\lambda_i, \\{\\boldsymbol{v}_{i,1}, \\dots, \\boldsymbol{v}_{i,\\mu_i}\\})$, where $\\mu_i$ denotes the algebraic multiplicity of $\\lambda_i$ and $\\sum_i \\mu_i = d$.\n", + "Assuming that all eigenvalues are either zero or negative, we can write $\\lambda_i = - \\omega_i^2$, with $\\omega_i = \\sqrt{- \\lambda_i} \\geq 0$.\n", + "Then the solution of $\\boldsymbol{\\ddot{X}} = M \\boldsymbol{X}$ reads\n", + "$$\n", + "\\boldsymbol{X} = \\sum_i \\sum_j^{\\mu_i} \\left(a_{i,j} C(\\omega_i, t) + b_{i,j} S(\\omega_i, t) \\right) \\boldsymbol{v}_{i,j} \\,,\n", + "$$\n", + "where $a_{i,j}$ and $b_{i,j}$ are integration constants to be determined by the initial conditions $\\boldsymbol{X}(t_n)$ and $\\boldsymbol{\\dot{X}}(t_n)$, $C(\\omega, t) = \\cos(\\omega \\, (t - t_n))$, and\n", + "$$\n", + "S(\\omega, t) =\n", + "\\begin{cases}\n", + "(t - t_n) & \\omega = 0 \\,, \\\\\n", + "\\dfrac{\\sin(\\omega \\, (t - t_n))}{\\omega} & \\omega \\neq 0 \\,.\n", + "\\end{cases}\n", + "$$\n", + "We remark that\n", + "$$\n", + "\\begin{aligned}\n", + "& \\boldsymbol{X}(t_n) = \\sum_i \\sum_j^{\\mu_i} \\left(a_{i,j} C(\\omega_i, t_n) + b_{i,j} S(\\omega_i, t_n) \\right) \\boldsymbol{v}_{i,j} = \\sum_i \\sum_j^{\\mu_i} a_{i,j} \\boldsymbol{v}_{i,j} \\,, \\\\\n", + "& \\boldsymbol{\\dot{X}}(t_n) = \\sum_i \\sum_j^{\\mu_i} \\left(a_{i,j} \\dot{C}(\\omega_i, t_n) + b_{i,j} \\dot{S}(\\omega_i, t_n) \\right) \\boldsymbol{v}_{i,j} = \\sum_i \\sum_j^{\\mu_i} b_{i,j} \\boldsymbol{v}_{i,j} \\,.\n", + "\\end{aligned}\n", + "$$\n", + "In fact, the second time derivative of $\\boldsymbol{X}$ yields\n", + "$$\n", + "\\begin{split}\n", + "\\boldsymbol{\\ddot{X}} & = \\sum_i \\sum_j^{\\mu_i} \\left(a_{i,j} \\ddot{C}(\\omega_i, t) + b_{i,j} \\ddot{S}(\\omega_i, t) \\right) \\boldsymbol{v}_{i,j} \\\\\n", + "& = \\sum_i (-\\omega_i^2) \\sum_j^{\\mu_i} \\left(a_{i,j} C(\\omega_i, t) + b_{i,j} S(\\omega_i, t) \\right) \\boldsymbol{v}_{i,j} = M \\boldsymbol{X} \\,,\n", + "\\end{split}\n", + "$$\n", + "where we used the fact that, by definition, $M \\boldsymbol{v}_{i,j} = \\lambda_i \\boldsymbol{v}_{i,j} = - \\omega_i^2 \\boldsymbol{v}_{i,j}$ for all $j = 1, \\dots, \\mu_i$." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Auxiliary functions:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def C(omega, t):\n", + " return sp.cos(omega * (t-tn))\n", + "\n", + "def S(omega, t):\n", + " return (t-tn) if omega == 0. else sp.sin(omega * (t-tn)) / omega\n", + "\n", + "def Xt(eigenpairs, a, b, t):\n", + " \"\"\"\n", + " Compute X(t) according to the formulas above\n", + " for a given set of eigenpairs and coefficients.\n", + " \"\"\"\n", + " XX = zeros(DD, 1)\n", + " # Index used for the integration constants a_n and b_n\n", + " i = 0\n", + " # Loop over matrix eigenpairs\n", + " for ep in eigenpairs:\n", + " # ep[0] is an eigenvalue and om = sp.sqrt(-ep[0])\n", + " omega = 0. if ep[0] == 0. else om\n", + " # am is the algebraic multiplicity of the eigenvalue\n", + " am = ep[1]\n", + " # vF is the list of all eigenvectors corresponding to the eigenvalue\n", + " vX = ep[2]\n", + " # Loop over algebraic multiplicity of the eigenvalue\n", + " for j in range(am):\n", + " XX += (a[i] * C(omega, t) + b[i] * S(omega, t)) * vX[j]\n", + " i += 1\n", + " return XX\n", + "\n", + "def evolve(X, dX_dt, d2X_dt2):\n", + " \"\"\"\n", + " Solve ordinary differential equation X'' = M*X.\n", + " \"\"\"\n", + " # Set matrix for given ODE\n", + " MX = zeros(DD)\n", + " for i in range(DD):\n", + " for j in range(DD):\n", + " MX[i,j] = d2X_dt2[i].coeff(X[j], 1)\n", + " #MX /= c**2\n", + "\n", + " print()\n", + " print(r'Matrix:')\n", + " display(MX)\n", + "\n", + " # Characteristic matrix\n", + " lamda = sp.symbols(r'lamda')\n", + " Id = eye(DD)\n", + " MX_charmat = MX - lamda * Id\n", + "\n", + " # Characteristic polynomial\n", + " MX_charpoly = MX_charmat.det()\n", + " MX_charpoly = factor(MX_charpoly.as_expr())\n", + "\n", + " print(r'Characteristic polynomial:')\n", + " display(MX_charpoly)\n", + " \n", + " MX_eigenvals = sp.solve(MX_charpoly, lamda)\n", + "\n", + " # List of eigenvectors\n", + " MX_eigenvects = []\n", + "\n", + " # List of eigenpairs\n", + " MX_eigenpairs = []\n", + " \n", + " # Compute eigenvectors as null spaces\n", + " for l in MX_eigenvals:\n", + "\n", + " # M - lamda * Id\n", + " A = MX_charmat.subs(lamda, l)\n", + " A.simplify()\n", + "\n", + " print(r'Eigenvalue:')\n", + " display(l)\n", + "\n", + " print(r'Characteristic matrix:')\n", + " display(A)\n", + "\n", + " # Perform Gaussian elimination (necessary for lamda != 0)\n", + " if (l != 0.):\n", + " print(r'Gaussian elimination:')\n", + " print(r'A[0,:] += A[1,:]')\n", + " A[0,:] += A[1,:]\n", + " print(r'A[0,:] += A[2,:]')\n", + " A[0,:] += A[2,:]\n", + " print(r'Swap A[0,:] and A[11,:]')\n", + " row = A[11,:]\n", + " A[11,:] = A[0,:]\n", + " A[0,:] = row\n", + " print(r'A[3,:] += A[4,:]')\n", + " A[3,:] += A[4,:]\n", + " print(r'A[3,:] += A[5,:]')\n", + " A[3,:] += A[5,:]\n", + " print(r'Swap A[3,:] and A[10,:]')\n", + " row = A[10,:]\n", + " A[10,:] = A[3,:]\n", + " A[3,:] = row\n", + " print(r'A[0,:] += A[3,:]')\n", + " A[0,:] += A[3,:]\n", + " print(r'A[0,:] += A[9,:]')\n", + " A[0,:] += A[9,:]\n", + " print(r'Swap A[0,:] and A[9,:]')\n", + " row = A[9,:]\n", + " A[9,:] = A[0,:]\n", + " A[0,:] = row\n", + " print(r'A[6,:] += A[8,:]')\n", + " A[6,:] += A[8,:]\n", + " print(r'A[6,:] += A[7,:]')\n", + " A[6,:] += A[7,:]\n", + " print(r'Swap A[6,:] and A[8,:]')\n", + " row = A[8,:]\n", + " A[8,:] = A[6,:]\n", + " A[6,:] = row\n", + " print(r'A[6,:] += A[7,:]')\n", + " A[6,:] += A[7,:]\n", + " print(r'A[4,:] += A[5,:]')\n", + " A[4,:] += A[5,:]\n", + " A.simplify()\n", + " display(A)\n", + "\n", + " # Compute null space and store eigenvectors\n", + " v = A.nullspace()\n", + " MX_eigenvects.append(v)\n", + "\n", + " print(r'Eigenvectors:')\n", + " display(v)\n", + " \n", + " # Store eigenpairs (eigenvalue, algebraic multiplicity, eigenvectors)\n", + " MX_eigenpairs.append((l, len(v), v))\n", + " \n", + " #print(r'Eigenpairs:')\n", + " #display(MX_eigenpairs)\n", + "\n", + " # Verify that the eigenpairs satisfy the charcteristic equations\n", + " for ep in MX_eigenpairs:\n", + " for j in range(ep[1]):\n", + " diff = MX * ep[2][j] - ep[0] * ep[2][j]\n", + " diff.simplify()\n", + " if diff != zeros(DD,1):\n", + " print('The charcteristic equation is not verified for some eigenpairs')\n", + " display(diff)\n", + "\n", + " # Define integration constants\n", + " a = []\n", + " b = []\n", + " for i in range(DD):\n", + " an = r'a_{:d}'.format(i+1)\n", + " bn = r'b_{:d}'.format(i+1) \n", + " a.append(sp.symbols(an))\n", + " b.append(sp.symbols(bn))\n", + "\n", + " # Set equations corresponding to initial conditions\n", + " lhs_a = Xt(MX_eigenpairs, a, b, tn) - X\n", + " lhs_b = Xt(MX_eigenpairs, a, b, t ).diff(t).subs(t, tn) - dX_dt\n", + "\n", + " # Compute integration constants from initial conditions\n", + " # (convert list of tuples to list using list comprehension)\n", + " a = list(linsolve(list(lhs_a), a))\n", + " a = [item for el in a for item in el]\n", + " b = list(linsolve(list(lhs_b), b))\n", + " b = [item for el in b for item in el]\n", + "\n", + " # Evaluate solution at t = tn + dt\n", + " X_new = Xt(MX_eigenpairs, a, b, tn+dt).expand()\n", + " for d in range(DD):\n", + " for Eij in E:\n", + " X_new[d] = X_new[d].collect(Eij)\n", + " for Bij in B:\n", + " X_new[d] = X_new[d].collect(Bij)\n", + " for Fi in F:\n", + " X_new[d] = X_new[d].collect(Fi)\n", + " for Gi in G:\n", + " X_new[d] = X_new[d].collect(Gi)\n", + "\n", + " # Check correctness by taking *second* derivative\n", + " # and comparing with initial right-hand side at time tn\n", + " X_t = Xt(MX_eigenpairs, a, b, t)\n", + " diff = X_t.diff(t).diff(t).subs(t, tn).subs(om, c*knorm).expand() - d2X_dt2\n", + " diff.simplify()\n", + " if diff != zeros(DD,1):\n", + " print('Integration in time failed')\n", + " display(diff)\n", + " \n", + " return X_t, X_new" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### First-order and second-order ODEs for $\\boldsymbol{E}$, $\\boldsymbol{B}$, $F$ and $G$:\n", + "Equations for the $\\boldsymbol{E}$ field:\n", + "$$\n", + "\\begin{alignat*}{3}\n", + "& \\frac{\\partial E_{xx}}{\\partial t} = c^2 i k_x (F_x + F_y + F_z) \\quad\n", + "&& \\frac{\\partial E_{xy}}{\\partial t} = c^2 i k_y (B_{zx} + B_{zy} + B_{zz}) \\quad\n", + "&& \\frac{\\partial E_{xz}}{\\partial t} = -c^2 i k_z (B_{yx} + B_{yy} + B_{yz}) \\\\[5pt]\n", + "& \\frac{\\partial E_{yx}}{\\partial t} = -c^2 i k_x (B_{zx} + B_{zy} + B_{zz}) \\quad\n", + "&& \\frac{\\partial E_{yy}}{\\partial t} = c^2 i k_y (F_x + F_y + F_z) \\quad\n", + "&& \\frac{\\partial E_{yz}}{\\partial t} = c^2 i k_z (B_{xx} + B_{xy} + B_{xz}) \\\\[5pt]\n", + "& \\frac{\\partial E_{zx}}{\\partial t} = c^2 i k_x (B_{yx} + B_{yy} + B_{yz}) \\quad\n", + "&& \\frac{\\partial E_{zy}}{\\partial t} = -c^2 i k_y (B_{xx} + B_{xy} + B_{xz})\\quad\n", + "&& \\frac{\\partial E_{zz}}{\\partial t} = c^2 i k_z (F_x + F_y + F_z)\n", + "\\end{alignat*}\n", + "$$\n", + "\n", + "Equations for the $\\boldsymbol{B}$ field:\n", + "$$\n", + "\\begin{alignat*}{3}\n", + "& \\frac{\\partial B_{xx}}{\\partial t} = i k_x (G_x + G_y + G_z) \\quad\n", + "&& \\frac{\\partial B_{xy}}{\\partial t} = -i k_y (E_{zx} + E_{zy} + E_{zz}) \\quad\n", + "&& \\frac{\\partial B_{xz}}{\\partial t} = i k_z (E_{yx} + E_{yy} + E_{yz}) \\\\[5pt]\n", + "& \\frac{\\partial B_{yx}}{\\partial t} = i k_x (E_{zx} + E_{zy} + E_{zz}) \\quad\n", + "&& \\frac{\\partial B_{yy}}{\\partial t} = i k_y (G_x + G_y + G_z) \\quad\n", + "&& \\frac{\\partial B_{yz}}{\\partial t} = -i k_z (E_{xx} + E_{xy} + E_{xz}) \\\\[5pt]\n", + "& \\frac{\\partial B_{zx}}{\\partial t} = -i k_x (E_{yx} + E_{yy} + E_{yz}) \\quad\n", + "&& \\frac{\\partial B_{zy}}{\\partial t} = i k_y (E_{xx} + E_{xy} + E_{xz}) \\quad\n", + "&& \\frac{\\partial B_{zz}}{\\partial t} = i k_z (G_x + G_y + G_z)\n", + "\\end{alignat*}\n", + "$$\n", + "\n", + "Equations for the $F$ field:\n", + "$$\n", + "\\frac{\\partial F_x}{\\partial t} = i k_x (E_{xx} + E_{xy} + E_{xz}) \\quad\n", + "\\frac{\\partial F_y}{\\partial t} = i k_y (E_{yx} + E_{yy} + E_{yz}) \\quad\n", + "\\frac{\\partial F_z}{\\partial t} = i k_z (E_{zx} + E_{zy} + E_{zz})\n", + "$$\n", + "\n", + "Equations for the $G$ field:\n", + "$$\n", + "\\frac{\\partial G_x}{\\partial t} = c^2 i k_x (B_{xx} + B_{xy} + B_{xz}) \\quad\n", + "\\frac{\\partial G_y}{\\partial t} = c^2 i k_y (B_{yx} + B_{yy} + B_{yz}) \\quad\n", + "\\frac{\\partial G_z}{\\partial t} = c^2 i k_z (B_{zx} + B_{zy} + B_{zz})\n", + "$$" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "# indices 0 1 2 3 4 5 6 7 8\n", + "labels = ['xx', 'xy', 'xz', 'yx', 'yy', 'yz', 'zx', 'zy', 'zz']\n", + "\n", + "# E fields\n", + "Exx = sp.symbols(r'E_{xx}')\n", + "Exy = sp.symbols(r'E_{xy}')\n", + "Exz = sp.symbols(r'E_{xz}')\n", + "Eyx = sp.symbols(r'E_{yx}')\n", + "Eyy = sp.symbols(r'E_{yy}')\n", + "Eyz = sp.symbols(r'E_{yz}')\n", + "Ezx = sp.symbols(r'E_{zx}')\n", + "Ezy = sp.symbols(r'E_{zy}')\n", + "Ezz = sp.symbols(r'E_{zz}')\n", + "E = Matrix([[Exx],[Exy],[Exz],[Eyx],[Eyy],[Eyz],[Ezx],[Ezy],[Ezz]])\n", + "\n", + "# B fields\n", + "Bxx = sp.symbols(r'B_{xx}')\n", + "Bxy = sp.symbols(r'B_{xy}')\n", + "Bxz = sp.symbols(r'B_{xz}')\n", + "Byx = sp.symbols(r'B_{yx}')\n", + "Byy = sp.symbols(r'B_{yy}')\n", + "Byz = sp.symbols(r'B_{yz}')\n", + "Bzx = sp.symbols(r'B_{zx}')\n", + "Bzy = sp.symbols(r'B_{zy}')\n", + "Bzz = sp.symbols(r'B_{zz}')\n", + "B = Matrix([[Bxx],[Bxy],[Bxz],[Byx],[Byy],[Byz],[Bzx],[Bzy],[Bzz]])\n", + "\n", + "# F fields\n", + "Fx = sp.symbols(r'F_{x}')\n", + "Fy = sp.symbols(r'F_{y}')\n", + "Fz = sp.symbols(r'F_{z}')\n", + "F = Matrix([[Fx],[Fy],[Fz]])\n", + "\n", + "# G fields\n", + "Gx = sp.symbols(r'G_{x}')\n", + "Gy = sp.symbols(r'G_{y}')\n", + "Gz = sp.symbols(r'G_{z}')\n", + "G = Matrix([[Gx],[Gy],[Gz]])\n", + "\n", + "# dE/dt\n", + "dExx_dt = c**2 * I * kx * (Fx + Fy + Fz)\n", + "dExy_dt = c**2 * I * ky * (Bzx + Bzy + Bzz)\n", + "dExz_dt = - c**2 * I * kz * (Byx + Byy + Byz)\n", + "dEyx_dt = - c**2 * I * kx * (Bzx + Bzy + Bzz)\n", + "dEyy_dt = c**2 * I * ky * (Fx + Fy + Fz)\n", + "dEyz_dt = c**2 * I * kz * (Bxx + Bxy + Bxz)\n", + "dEzx_dt = c**2 * I * kx * (Byx + Byy + Byz)\n", + "dEzy_dt = - c**2 * I * ky * (Bxx + Bxy + Bxz)\n", + "dEzz_dt = c**2 * I * kz * (Fx + Fy + Fz)\n", + "dE_dt = Matrix([[dExx_dt],[dExy_dt],[dExz_dt],[dEyx_dt],[dEyy_dt],[dEyz_dt],[dEzx_dt],[dEzy_dt],[dEzz_dt]])\n", + "\n", + "# dB/dt\n", + "dBxx_dt = I * kx * (Gx + Gy + Gz)\n", + "dBxy_dt = - I * ky * (Ezx + Ezy + Ezz)\n", + "dBxz_dt = I * kz * (Eyx + Eyy + Eyz)\n", + "dByx_dt = I * kx * (Ezx + Ezy + Ezz)\n", + "dByy_dt = I * ky * (Gx + Gy + Gz)\n", + "dByz_dt = - I * kz * (Exx + Exy + Exz)\n", + "dBzx_dt = - I * kx * (Eyx + Eyy + Eyz)\n", + "dBzy_dt = I * ky * (Exx + Exy + Exz)\n", + "dBzz_dt = I * kz * (Gx + Gy + Gz)\n", + "dB_dt = Matrix([[dBxx_dt],[dBxy_dt],[dBxz_dt],[dByx_dt],[dByy_dt],[dByz_dt],[dBzx_dt],[dBzy_dt],[dBzz_dt]])\n", + "\n", + "# dF/dt\n", + "dFx_dt = I * kx * (Exx + Exy + Exz)\n", + "dFy_dt = I * ky * (Eyx + Eyy + Eyz)\n", + "dFz_dt = I * kz * (Ezx + Ezy + Ezz)\n", + "dF_dt = Matrix([[dFx_dt],[dFy_dt],[dFz_dt]])\n", + "\n", + "# dG/dt\n", + "dGx_dt = c**2 * I * kx * (Bxx + Bxy + Bxz)\n", + "dGy_dt = c**2 * I * ky * (Byx + Byy + Byz)\n", + "dGz_dt = c**2 * I * kz * (Bzx + Bzy + Bzz)\n", + "dG_dt = Matrix([[dGx_dt],[dGy_dt],[dGz_dt]])\n", + "\n", + "# d2E/dt2\n", + "d2Exx_dt2 = c**2 * I * kx * (dFx_dt + dFy_dt + dFz_dt)\n", + "d2Exy_dt2 = c**2 * I * ky * (dBzx_dt + dBzy_dt + dBzz_dt)\n", + "d2Exz_dt2 = - c**2 * I * kz * (dByx_dt + dByy_dt + dByz_dt)\n", + "d2Eyx_dt2 = - c**2 * I * kx * (dBzx_dt + dBzy_dt + dBzz_dt)\n", + "d2Eyy_dt2 = c**2 * I * ky * (dFx_dt + dFy_dt + dFz_dt)\n", + "d2Eyz_dt2 = c**2 * I * kz * (dBxx_dt + dBxy_dt + dBxz_dt)\n", + "d2Ezx_dt2 = c**2 * I * kx * (dByx_dt + dByy_dt + dByz_dt)\n", + "d2Ezy_dt2 = - c**2 * I * ky * (dBxx_dt + dBxy_dt + dBxz_dt)\n", + "d2Ezz_dt2 = c**2 * I * kz * (dFx_dt + dFy_dt + dFz_dt)\n", + "d2E_dt2 = Matrix([[d2Exx_dt2],[d2Exy_dt2],[d2Exz_dt2],[d2Eyx_dt2],[d2Eyy_dt2],[d2Eyz_dt2],[d2Ezx_dt2],[d2Ezy_dt2],[d2Ezz_dt2]])\n", + "\n", + "# d2B/dt2\n", + "d2Bxx_dt2 = I * kx * (dGx_dt + dGy_dt + dGz_dt)\n", + "d2Bxy_dt2 = - I * ky * (dEzx_dt + dEzy_dt + dEzz_dt)\n", + "d2Bxz_dt2 = I * kz * (dEyx_dt + dEyy_dt + dEyz_dt)\n", + "d2Byx_dt2 = I * kx * (dEzx_dt + dEzy_dt + dEzz_dt)\n", + "d2Byy_dt2 = I * ky * (dGx_dt + dGy_dt + dGz_dt)\n", + "d2Byz_dt2 = - I * kz * (dExx_dt + dExy_dt + dExz_dt)\n", + "d2Bzx_dt2 = - I * kx * (dEyx_dt + dEyy_dt + dEyz_dt)\n", + "d2Bzy_dt2 = I * ky * (dExx_dt + dExy_dt + dExz_dt)\n", + "d2Bzz_dt2 = I * kz * (dGx_dt + dGy_dt + dGz_dt)\n", + "d2B_dt2 = Matrix([[d2Bxx_dt2],[d2Bxy_dt2],[d2Bxz_dt2],[d2Byx_dt2],[d2Byy_dt2],[d2Byz_dt2],[d2Bzx_dt2],[d2Bzy_dt2],[d2Bzz_dt2]])\n", + "\n", + "# d2F/dt2\n", + "d2Fx_dt2 = I * kx * (dExx_dt + dExy_dt + dExz_dt)\n", + "d2Fy_dt2 = I * ky * (dEyx_dt + dEyy_dt + dEyz_dt)\n", + "d2Fz_dt2 = I * kz * (dEzx_dt + dEzy_dt + dEzz_dt)\n", + "d2F_dt2 = Matrix([[d2Fx_dt2],[d2Fy_dt2],[d2Fz_dt2]])\n", + "\n", + "# d2G/dt2\n", + "d2Gx_dt2 = c**2 * I * kx * (dBxx_dt + dBxy_dt + dBxz_dt)\n", + "d2Gy_dt2 = c**2 * I * ky * (dByx_dt + dByy_dt + dByz_dt)\n", + "d2Gz_dt2 = c**2 * I * kz * (dBzx_dt + dBzy_dt + dBzz_dt)\n", + "d2G_dt2 = Matrix([[d2Gx_dt2],[d2Gy_dt2],[d2Gz_dt2]])\n", + "\n", + "for i in range(dd):\n", + " d2E_dt2[i] = sp.expand(d2E_dt2[i])\n", + "\n", + "for i in range(dd):\n", + " d2B_dt2[i] = sp.expand(d2B_dt2[i])\n", + "\n", + "for i in range(3):\n", + " d2F_dt2[i] = sp.expand(d2F_dt2[i])\n", + " \n", + "for i in range(3):\n", + " d2G_dt2[i] = sp.expand(d2G_dt2[i])\n", + " \n", + "# Extended array for E and G\n", + "EG = zeros(DD,1)\n", + "for i in range(dd):\n", + " EG[i] = E[i]\n", + "for i in range(dd,DD):\n", + " EG[i] = G[i-dd]\n", + "\n", + "# dEG/dt\n", + "dEG_dt = zeros(DD,1)\n", + "for i in range(dd):\n", + " dEG_dt[i] = dE_dt[i]\n", + "for i in range(dd,DD):\n", + " dEG_dt[i] = dG_dt[i-dd]\n", + " \n", + "# d2EG/dt2\n", + "d2EG_dt2 = zeros(DD,1)\n", + "for i in range(dd):\n", + " d2EG_dt2[i] = d2E_dt2[i]\n", + "for i in range(dd,DD):\n", + " d2EG_dt2[i] = d2G_dt2[i-dd]\n", + " \n", + "# Extended array for B and F\n", + "BF = zeros(DD,1)\n", + "for i in range(dd):\n", + " BF[i] = B[i]\n", + "for i in range(dd,DD):\n", + " BF[i] = F[i-dd]\n", + "\n", + "# dBF/dt\n", + "dBF_dt = zeros(DD,1)\n", + "for i in range(dd):\n", + " dBF_dt[i] = dB_dt[i]\n", + "for i in range(dd,DD):\n", + " dBF_dt[i] = dF_dt[i-dd]\n", + "\n", + "# d2BF/dt2\n", + "d2BF_dt2 = zeros(DD,1)\n", + "for i in range(dd):\n", + " d2BF_dt2[i] = d2B_dt2[i]\n", + "for i in range(dd,DD):\n", + " d2BF_dt2[i] = d2F_dt2[i-dd]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Solve second-order ODEs for $\\boldsymbol{E}$, $\\boldsymbol{B}$, $F$ and $G$:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "print(r'Solve equations for E and G:')\n", + "EG_t, EG_new = evolve(EG, dEG_dt, d2EG_dt2)\n", + "\n", + "print(r'Solve equations for B and F:')\n", + "BF_t, BF_new = evolve(BF, dBF_dt, d2BF_dt2)\n", + "\n", + "# Check correctness by taking *first* derivative\n", + "# and comparing with initial right-hand side at time tn\n", + "# E,G\n", + "diff = EG_t.diff(t).subs(t, tn).subs(om, c*knorm).expand() - dEG_dt\n", + "diff.simplify()\n", + "if diff != zeros(DD,1):\n", + " print('Integration in time failed')\n", + " display(diff)\n", + "# B,F\n", + "diff = BF_t.diff(t).subs(t, tn).subs(om, c*knorm).expand() - dBF_dt\n", + "diff.simplify()\n", + "if diff != zeros(DD,1):\n", + " print('Integration in time failed')\n", + " display(diff)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Coefficients of PSATD equations in PML:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "# Code generation\n", + "from sympy.codegen.ast import Assignment\n", + "\n", + "# 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11\n", + "# EG: Exx, Exy, Exz, Eyx, Eyy, Eyz, Ezx, Ezy, Ezz, Gx, Gy, Gz\n", + "# BF: Bxx, Bxy, Bxz, Byx, Byy, Byz, Bzx, Bzy, Bzz, Fx, Fy, Fz\n", + "\n", + "# Select update equation (left hand side)\n", + "X_new = BF_new[0]\n", + "\n", + "# Extract individual terms (right hand side)\n", + "for i in range(DD):\n", + " X = EG[i]\n", + " C = X_new.coeff(X, 1).simplify()\n", + " print(r'Coefficient multiplying ' + str(X))\n", + " display(C)\n", + " #print(ccode(Assignment(sp.symbols(r'LHS'), C)))\n", + "for i in range(DD):\n", + " X = BF[i]\n", + " C = X_new.coeff(X, 1).simplify()\n", + " print(r'Coefficient multiplying ' + str(X))\n", + " display(C)\n", + " #print(ccode(Assignment(sp.symbols(r'LHS'), C)))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Tools/machines/lassen-llnl/install_v100_dependencies_toss3.sh b/Tools/machines/lassen-llnl/install_v100_dependencies_toss3.sh new file mode 100644 index 00000000000..f62c6018cd5 --- /dev/null +++ b/Tools/machines/lassen-llnl/install_v100_dependencies_toss3.sh @@ -0,0 +1,135 @@ +#!/bin/bash +# +# Copyright 2023 The WarpX Community +# +# This file is part of WarpX. +# +# Author: Axel Huebl +# License: BSD-3-Clause-LBNL + +# Exit on first error encountered ############################################# +# +set -eu -o pipefail + + +# Check: ###################################################################### +# +# Was lassen_v100_warpx.profile sourced and configured correctly? +if [ -z ${proj-} ]; then echo "WARNING: The 'proj' variable is not yet set in your lassen_v100_warpx_toss3.profile file! Please edit its line 2 to continue!"; exit 1; fi + + +# Remove old dependencies ##################################################### +# +SRC_DIR="/usr/workspace/${USER}/lassen-toss3/src" +SW_DIR="/usr/workspace/${USER}/lassen-toss3/gpu" +rm -rf ${SW_DIR} +mkdir -p ${SW_DIR} +mkdir -p ${SRC_DIR} + +# remove common user mistakes in python, located in .local instead of a venv +python3 -m pip uninstall -qq -y pywarpx +python3 -m pip uninstall -qq -y warpx +python3 -m pip uninstall -qqq -y mpi4py 2>/dev/null || true + + +# General extra dependencies ################################################## +# + +# tmpfs build directory: avoids issues often seen with $HOME and is faster +build_dir=$(mktemp -d) + +# c-blosc (I/O compression) +if [ -d ${SRC_DIR}/c-blosc ] +then + cd ${SRC_DIR}/c-blosc + git fetch --prune + git checkout v1.21.1 + cd - +else + git clone -b v1.21.1 https://github.com/Blosc/c-blosc.git ${SRC_DIR}/c-blosc +fi +cmake -S ${SRC_DIR}/c-blosc -B ${build_dir}/c-blosc-lassen-build -DBUILD_TESTS=OFF -DBUILD_BENCHMARKS=OFF -DDEACTIVATE_AVX2=OFF -DCMAKE_INSTALL_PREFIX=${SW_DIR}/c-blosc-1.21.1 +cmake --build ${build_dir}/c-blosc-lassen-build --target install --parallel 10 + +# HDF5 +if [ -d ${SRC_DIR}/hdf5 ] +then + cd ${SRC_DIR}/hdf5 + git fetch --prune + git checkout hdf5-1_14_1-2 + cd - +else + git clone -b hdf5-1_14_1-2 https://github.com/HDFGroup/hdf5.git ${SRC_DIR}/hdf5 +fi +cmake -S ${SRC_DIR}/hdf5 -B ${build_dir}/hdf5-lassen-build -DBUILD_TESTING=OFF -DHDF5_ENABLE_PARALLEL=ON -DCMAKE_INSTALL_PREFIX=${SW_DIR}/hdf5-1.14.1.2 +cmake --build ${build_dir}/hdf5-lassen-build --target install --parallel 10 + +# ADIOS2 +if [ -d ${SRC_DIR}/adios2 ] +then + cd ${SRC_DIR}/adios2 + git fetch --prune + git checkout v2.8.3 + cd - +else + git clone -b v2.8.3 https://github.com/ornladios/ADIOS2.git ${SRC_DIR}/adios2 +fi +cmake -S ${SRC_DIR}/adios2 -B ${build_dir}/adios2-lassen-build -DBUILD_TESTING=OFF -DADIOS2_BUILD_EXAMPLES=OFF -DADIOS2_USE_Blosc=ON -DADIOS2_USE_Fortran=OFF -DADIOS2_USE_Python=OFF -DADIOS2_USE_SST=OFF -DADIOS2_USE_ZeroMQ=OFF -DCMAKE_INSTALL_PREFIX=${SW_DIR}/adios2-2.8.3 +cmake --build ${build_dir}/adios2-lassen-build --target install -j 10 + +# BLAS++ (for PSATD+RZ) +if [ -d ${SRC_DIR}/blaspp ] +then + cd ${SRC_DIR}/blaspp + git fetch --prune + git checkout master + git pull + cd - +else + git clone https://github.com/icl-utk-edu/blaspp.git ${SRC_DIR}/blaspp +fi +cmake -S ${SRC_DIR}/blaspp -B ${build_dir}/blaspp-lassen-build -Duse_openmp=ON -Dgpu_backend=cuda -Duse_cmake_find_blas=ON -DCMAKE_CXX_STANDARD=17 -DCMAKE_INSTALL_PREFIX=${SW_DIR}/blaspp-master +cmake --build ${build_dir}/blaspp-lassen-build --target install --parallel 10 + +# LAPACK++ (for PSATD+RZ) +if [ -d ${SRC_DIR}/lapackpp ] +then + cd ${SRC_DIR}/lapackpp + git fetch --prune + git checkout master + git pull + cd - +else + git clone https://github.com/icl-utk-edu/lapackpp.git ${SRC_DIR}/lapackpp +fi +CXXFLAGS="-DLAPACK_FORTRAN_ADD_" cmake -S ${SRC_DIR}/lapackpp -B ${build_dir}/lapackpp-lassen-build -Duse_cmake_find_lapack=ON -DCMAKE_CXX_STANDARD=17 -Dbuild_tests=OFF -DCMAKE_INSTALL_RPATH_USE_LINK_PATH=ON -DCMAKE_INSTALL_PREFIX=${SW_DIR}/lapackpp-master -DLAPACK_LIBRARIES=/usr/lib64/liblapack.so +cmake --build ${build_dir}/lapackpp-lassen-build --target install --parallel 10 + + +# Python ###################################################################### +# +python3 -m pip install --upgrade --user virtualenv +rm -rf ${SW_DIR}/venvs/warpx-lassen-toss3 +python3 -m venv ${SW_DIR}/venvs/warpx-lassen-toss3 +source ${SW_DIR}/venvs/warpx-lassen-toss3/bin/activate +python3 -m pip install --upgrade pip +python3 -m pip cache purge +python3 -m pip install --upgrade wheel +python3 -m pip install --upgrade cython +python3 -m pip install --upgrade numpy +python3 -m pip install --upgrade pandas +CMAKE_PREFIX_PATH=/usr/lib64:${CMAKE_PREFIX_PATH} python3 -m pip install --upgrade -Ccompile-args="-j10" -Csetup-args=-Dblas=BLAS -Csetup-args=-Dlapack=BLAS scipy +python3 -m pip install --upgrade mpi4py --no-cache-dir --no-build-isolation --no-binary mpi4py +python3 -m pip install --upgrade openpmd-api +MPLLOCALFREETYPE=1 python3 -m pip install --upgrade matplotlib==3.2.2 # does not try to build freetype itself +echo "matplotlib==3.2.2" > ${build_dir}/constraints.txt +python3 -m pip install --upgrade -c ${build_dir}/constraints.txt yt + +# install or update WarpX dependencies such as picmistandard +python3 -m pip install --upgrade -r ${SRC_DIR}/warpx/requirements.txt + +# for ML dependencies, see install_v100_ml.sh + + +# remove build temporary directory +rm -rf ${build_dir} diff --git a/Tools/machines/lassen-llnl/lassen_v100_warpx_toss3.profile.example b/Tools/machines/lassen-llnl/lassen_v100_warpx_toss3.profile.example new file mode 100644 index 00000000000..979c27989fa --- /dev/null +++ b/Tools/machines/lassen-llnl/lassen_v100_warpx_toss3.profile.example @@ -0,0 +1,54 @@ +# please set your project account +#export proj="" # edit this and comment in + +# required dependencies +module load cmake/3.23.1 +module load gcc/11.2.1 +module load cuda/12.0.0 + +# optional: for QED lookup table generation support +module load boost/1.70.0 + +# optional: for openPMD support +SRC_DIR="/usr/workspace/${USER}/lassen-toss3/src" +SW_DIR="/usr/workspace/${USER}/lassen-toss3/gpu" +export CMAKE_PREFIX_PATH=${SW_DIR}/c-blosc-1.21.1:$CMAKE_PREFIX_PATH +export CMAKE_PREFIX_PATH=${SW_DIR}/hdf5-1.14.1.2:$CMAKE_PREFIX_PATH +export CMAKE_PREFIX_PATH=${SW_DIR}/adios2-2.8.3:$CMAKE_PREFIX_PATH +export LD_LIBRARY_PATH=${SW_DIR}/c-blosc-1.21.1/lib64:$LD_LIBRARY_PATH +export LD_LIBRARY_PATH=${SW_DIR}/hdf5-1.14.1.2/lib64:$LD_LIBRARY_PATH +export LD_LIBRARY_PATH=${SW_DIR}/adios2-2.8.3/lib64:$LD_LIBRARY_PATH + +# optional: for PSATD in RZ geometry support +export CMAKE_PREFIX_PATH=${SW_DIR}/blaspp-master:$CMAKE_PREFIX_PATH +export CMAKE_PREFIX_PATH=${SW_DIR}/lapackpp-master:$CMAKE_PREFIX_PATH +export LD_LIBRARY_PATH=${SW_DIR}/blaspp-master/lib64:$LD_LIBRARY_PATH +export LD_LIBRARY_PATH=${SW_DIR}/lapackpp-master/lib64:$LD_LIBRARY_PATH + +# optional: for Python bindings +module load python/3.8.2 + +if [ -d "${SW_DIR}/venvs/warpx-lassen-toss3" ] +then + source ${SW_DIR}/venvs/warpx-lassen-toss3/bin/activate +fi + +# optional: an alias to request an interactive node for two hours +alias getNode="bsub -G $proj -W 2:00 -nnodes 1 -Is /bin/bash" +# an alias to run a command on a batch node for up to 30min +# usage: runNode +alias runNode="bsub -q debug -P $proj -W 2:00 -nnodes 1 -I" + +# fix system defaults: do not escape $ with a \ on tab completion +shopt -s direxpand + +# optimize CUDA compilation for V100 +export AMREX_CUDA_ARCH=7.0 +export CUDAARCHS=70 + +# compiler environment hints +export CC=$(which gcc) +export CXX=$(which g++) +export FC=$(which gfortran) +export CUDACXX=$(which nvcc) +export CUDAHOSTCXX=${CXX} diff --git a/cmake/dependencies/AMReX.cmake b/cmake/dependencies/AMReX.cmake index 3e0f044f020..a4d1f0fed5c 100644 --- a/cmake/dependencies/AMReX.cmake +++ b/cmake/dependencies/AMReX.cmake @@ -243,7 +243,7 @@ macro(find_amrex) endif() set(COMPONENT_PRECISION ${WarpX_PRECISION} P${WarpX_PARTICLE_PRECISION}) - find_package(AMReX 23.09 CONFIG REQUIRED COMPONENTS ${COMPONENT_ASCENT} ${COMPONENT_DIMS} ${COMPONENT_EB} PARTICLES ${COMPONENT_PIC} ${COMPONENT_PRECISION} ${COMPONENT_SENSEI} TINYP LSOLVERS) + find_package(AMReX 23.10 CONFIG REQUIRED COMPONENTS ${COMPONENT_ASCENT} ${COMPONENT_DIMS} ${COMPONENT_EB} PARTICLES ${COMPONENT_PIC} ${COMPONENT_PRECISION} ${COMPONENT_SENSEI} TINYP LSOLVERS) message(STATUS "AMReX: Found version '${AMReX_VERSION}'") endif() endmacro() @@ -257,7 +257,7 @@ set(WarpX_amrex_src "" set(WarpX_amrex_repo "https://github.com/AMReX-Codes/amrex.git" CACHE STRING "Repository URI to pull and build AMReX from if(WarpX_amrex_internal)") -set(WarpX_amrex_branch "2e99628138df3b5b0ecf50b0c1201d5547f821a0" +set(WarpX_amrex_branch "23.10" CACHE STRING "Repository branch for WarpX_amrex_repo if(WarpX_amrex_internal)") diff --git a/cmake/dependencies/pyAMReX.cmake b/cmake/dependencies/pyAMReX.cmake index a36ef59d1f9..2ff354785da 100644 --- a/cmake/dependencies/pyAMReX.cmake +++ b/cmake/dependencies/pyAMReX.cmake @@ -64,7 +64,7 @@ function(find_pyamrex) endif() elseif(NOT WarpX_pyamrex_internal) # TODO: MPI control - find_package(pyAMReX 23.07 CONFIG REQUIRED) + find_package(pyAMReX 23.10 CONFIG REQUIRED) message(STATUS "pyAMReX: Found version '${pyamrex_VERSION}'") endif() endfunction() @@ -79,7 +79,7 @@ option(WarpX_pyamrex_internal "Download & build pyAMReX" ON) set(WarpX_pyamrex_repo "https://github.com/AMReX-Codes/pyamrex.git" CACHE STRING "Repository URI to pull and build pyamrex from if(WarpX_pyamrex_internal)") -set(WarpX_pyamrex_branch "development" +set(WarpX_pyamrex_branch "23.10" CACHE STRING "Repository branch for WarpX_pyamrex_repo if(WarpX_pyamrex_internal)") diff --git a/run_test.sh b/run_test.sh index 81237fb6971..1c948e48713 100755 --- a/run_test.sh +++ b/run_test.sh @@ -71,7 +71,7 @@ python3 -m pip install --upgrade -r warpx/Regression/requirements.txt # Clone AMReX and warpx-data git clone https://github.com/AMReX-Codes/amrex.git -cd amrex && git checkout --detach 2e99628138df3b5b0ecf50b0c1201d5547f821a0 && cd - +cd amrex && git checkout --detach 23.10 && cd - # warpx-data contains various required data sets git clone --depth 1 https://github.com/ECP-WarpX/warpx-data.git # openPMD-example-datasets contains various required data sets diff --git a/setup.py b/setup.py index 91c8f1ac828..4636d6995f6 100644 --- a/setup.py +++ b/setup.py @@ -283,7 +283,7 @@ def build_extension(self, ext): setup( name='pywarpx', # note PEP-440 syntax: x.y.zaN but x.y.z.devN - version = '23.09', + version = '23.10', packages = ['pywarpx'], package_dir = {'pywarpx': 'Python/pywarpx'}, author='Jean-Luc Vay, David P. Grote, Maxence Thévenet, Rémi Lehe, Andrew Myers, Weiqun Zhang, Axel Huebl, et al.',