From c894651465d192b6594ab35ea5334406405a55fe Mon Sep 17 00:00:00 2001
From: Edoardo Zoni <59625522+EZoni@users.noreply.github.com>
Date: Wed, 4 Oct 2023 10:30:42 -0700
Subject: [PATCH 01/12] Merge nodal synchronization with `FillBoundary` calls
 in PML (#3534)

* Add nodal synchronization option to PML functions

* Add missing #include directive

* Remove calls to `NodalSyncPML`

* Same changes for PML classes in RZ geometry

* Remove unused functions

* Update checksums of `divb_cleaning_3d`
---
 .../benchmarks_json/divb_cleaning_3d.json     | 18 ++---
 Source/BoundaryConditions/PML.H               | 14 ++--
 Source/BoundaryConditions/PML.cpp             | 61 +++------------
 Source/BoundaryConditions/PML_RZ.H            |  5 +-
 Source/BoundaryConditions/PML_RZ.cpp          |  8 +-
 Source/Evolve/WarpXEvolve.cpp                 | 21 +-----
 Source/Parallelization/WarpXComm.cpp          | 75 ++-----------------
 Source/WarpX.H                                | 15 ----
 Source/ablastr/utils/Communication.H          |  5 +-
 Source/ablastr/utils/Communication.cpp        |  8 +-
 10 files changed, 52 insertions(+), 178 deletions(-)
diff --git a/Regression/Checksum/benchmarks_json/divb_cleaning_3d.json b/Regression/Checksum/benchmarks_json/divb_cleaning_3d.json
index cbb43a81004..17d56dcc906 100644
--- a/Regression/Checksum/benchmarks_json/divb_cleaning_3d.json
+++ b/Regression/Checksum/benchmarks_json/divb_cleaning_3d.json
@@ -1,13 +1,13 @@
 {
   "lev=0": {
-    "Bx": 30128649.582633037,
-    "By": 30128649.58263304,
-    "Bz": 30128649.582633033,
-    "Ex": 137776481658696.53,
-    "Ey": 137776481658695.72,
-    "Ez": 137776481658736.47,
-    "G": 7382627392167406.0,
-    "divB": 6914882882637.787,
-    "divE": 60880563.38401385
+    "Bx": 30110529.73244452,
+    "By": 30110529.73244452,
+    "Bz": 30110529.732444517,
+    "Ex": 137103615680453.66,
+    "Ey": 137103615680454.5,
+    "Ez": 137103615680494.48,
+    "G": 7413121805692223.0,
+    "divB": 6944252335584.075,
+    "divE": 60882624.59445304
   }
 }
\ No newline at end of file
diff --git a/Source/BoundaryConditions/PML.H b/Source/BoundaryConditions/PML.H
index a925cd6c9ef..4be1be146cc 100644
--- a/Source/BoundaryConditions/PML.H
+++ b/Source/BoundaryConditions/PML.H
@@ -29,6 +29,7 @@
 
 #include <array>
 #include <memory>
+#include <optional>
 #include <string>
 #include <vector>
 
@@ -187,15 +188,10 @@ public:
     void ExchangeG (amrex::MultiFab* G_fp, amrex::MultiFab* G_cp, int do_pml_in_domain);
     void ExchangeG (PatchType patch_type, amrex::MultiFab* Gp, int do_pml_in_domain);
 
-    void FillBoundary ();
-    void FillBoundaryE ();
-    void FillBoundaryB ();
-    void FillBoundaryF ();
-    void FillBoundaryG ();
-    void FillBoundaryE (PatchType patch_type);
-    void FillBoundaryB (PatchType patch_type);
-    void FillBoundaryF (PatchType patch_type);
-    void FillBoundaryG (PatchType patch_type);
+    void FillBoundaryE (PatchType patch_type, std::optional<bool> nodal_sync=std::nullopt);
+    void FillBoundaryB (PatchType patch_type, std::optional<bool> nodal_sync=std::nullopt);
+    void FillBoundaryF (PatchType patch_type, std::optional<bool> nodal_sync=std::nullopt);
+    void FillBoundaryG (PatchType patch_type, std::optional<bool> nodal_sync=std::nullopt);
 
     bool ok () const { return m_ok; }
 
diff --git a/Source/BoundaryConditions/PML.cpp b/Source/BoundaryConditions/PML.cpp
index 5fc3e2dd3dc..811d1aa469d 100644
--- a/Source/BoundaryConditions/PML.cpp
+++ b/Source/BoundaryConditions/PML.cpp
@@ -1229,103 +1229,66 @@ PML::CopyToPML (MultiFab& pml, MultiFab& reg, const Geometry& geom)
 }
 
 void
-PML::FillBoundary ()
-{
-    FillBoundaryE();
-    FillBoundaryB();
-    FillBoundaryF();
-    FillBoundaryG();
-}
-
-void
-PML::FillBoundaryE ()
-{
-    FillBoundaryE(PatchType::fine);
-    FillBoundaryE(PatchType::coarse);
-}
-
-void
-PML::FillBoundaryE (PatchType patch_type)
+PML::FillBoundaryE (PatchType patch_type, std::optional<bool> nodal_sync)
 {
     if (patch_type == PatchType::fine && pml_E_fp[0] && pml_E_fp[0]->nGrowVect().max() > 0)
     {
         const auto& period = m_geom->periodicity();
         const Vector<MultiFab*> mf{pml_E_fp[0].get(),pml_E_fp[1].get(),pml_E_fp[2].get()};
-        ablastr::utils::communication::FillBoundary(mf, WarpX::do_single_precision_comms, period);
+        ablastr::utils::communication::FillBoundary(mf, WarpX::do_single_precision_comms, period, nodal_sync);
     }
     else if (patch_type == PatchType::coarse && pml_E_cp[0] && pml_E_cp[0]->nGrowVect().max() > 0)
     {
         const auto& period = m_cgeom->periodicity();
         const Vector<MultiFab*> mf{pml_E_cp[0].get(),pml_E_cp[1].get(),pml_E_cp[2].get()};
-        ablastr::utils::communication::FillBoundary(mf, WarpX::do_single_precision_comms, period);
+        ablastr::utils::communication::FillBoundary(mf, WarpX::do_single_precision_comms, period, nodal_sync);
     }
 }
 
 void
-PML::FillBoundaryB ()
-{
-    FillBoundaryB(PatchType::fine);
-    FillBoundaryB(PatchType::coarse);
-}
-
-void
-PML::FillBoundaryB (PatchType patch_type)
+PML::FillBoundaryB (PatchType patch_type, std::optional<bool> nodal_sync)
 {
     if (patch_type == PatchType::fine && pml_B_fp[0])
     {
         const auto& period = m_geom->periodicity();
         const Vector<MultiFab*> mf{pml_B_fp[0].get(),pml_B_fp[1].get(),pml_B_fp[2].get()};
-        ablastr::utils::communication::FillBoundary(mf, WarpX::do_single_precision_comms, period);
+        ablastr::utils::communication::FillBoundary(mf, WarpX::do_single_precision_comms, period, nodal_sync);
     }
     else if (patch_type == PatchType::coarse && pml_B_cp[0])
     {
         const auto& period = m_cgeom->periodicity();
         const Vector<MultiFab*> mf{pml_B_cp[0].get(),pml_B_cp[1].get(),pml_B_cp[2].get()};
-        ablastr::utils::communication::FillBoundary(mf, WarpX::do_single_precision_comms, period);
+        ablastr::utils::communication::FillBoundary(mf, WarpX::do_single_precision_comms, period, nodal_sync);
     }
 }
 
 void
-PML::FillBoundaryF ()
-{
-    FillBoundaryF(PatchType::fine);
-    FillBoundaryF(PatchType::coarse);
-}
-
-void
-PML::FillBoundaryF (PatchType patch_type)
+PML::FillBoundaryF (PatchType patch_type, std::optional<bool> nodal_sync)
 {
     if (patch_type == PatchType::fine && pml_F_fp && pml_F_fp->nGrowVect().max() > 0)
     {
         const auto& period = m_geom->periodicity();
-        ablastr::utils::communication::FillBoundary(*pml_F_fp, WarpX::do_single_precision_comms, period);
+        ablastr::utils::communication::FillBoundary(*pml_F_fp, WarpX::do_single_precision_comms, period, nodal_sync);
     }
     else if (patch_type == PatchType::coarse && pml_F_cp && pml_F_cp->nGrowVect().max() > 0)
     {
         const auto& period = m_cgeom->periodicity();
-        ablastr::utils::communication::FillBoundary(*pml_F_cp, WarpX::do_single_precision_comms, period);
+        ablastr::utils::communication::FillBoundary(*pml_F_cp, WarpX::do_single_precision_comms, period, nodal_sync);
     }
 }
 
 void
-PML::FillBoundaryG ()
-{
-    FillBoundaryG(PatchType::fine);
-    FillBoundaryG(PatchType::coarse);
-}
-
-void
-PML::FillBoundaryG (PatchType patch_type)
+PML::FillBoundaryG (PatchType patch_type, std::optional<bool> nodal_sync)
 {
     if (patch_type == PatchType::fine && pml_G_fp && pml_G_fp->nGrowVect().max() > 0)
     {
         const auto& period = m_geom->periodicity();
-        ablastr::utils::communication::FillBoundary(*pml_G_fp, WarpX::do_single_precision_comms, period);
+        ablastr::utils::communication::FillBoundary(*pml_G_fp, WarpX::do_single_precision_comms, period, nodal_sync);
     }
     else if (patch_type == PatchType::coarse && pml_G_cp && pml_G_cp->nGrowVect().max() > 0)
     {
         const auto& period = m_cgeom->periodicity();
-        ablastr::utils::communication::FillBoundary(*pml_G_cp, WarpX::do_single_precision_comms, period);
+        ablastr::utils::communication::FillBoundary(*pml_G_cp, WarpX::do_single_precision_comms, period, nodal_sync);
     }
 }
 
diff --git a/Source/BoundaryConditions/PML_RZ.H b/Source/BoundaryConditions/PML_RZ.H
index 72c901bf480..ac4b6ff2c4d 100644
--- a/Source/BoundaryConditions/PML_RZ.H
+++ b/Source/BoundaryConditions/PML_RZ.H
@@ -22,6 +22,7 @@
 #include <AMReX_BaseFwd.H>
 
 #include <array>
+#include <optional>
 #include <string>
 
 enum struct PatchType : int;
@@ -45,8 +46,8 @@ public:
 
     void FillBoundaryE ();
     void FillBoundaryB ();
-    void FillBoundaryE (PatchType patch_type);
-    void FillBoundaryB (PatchType patch_type);
+    void FillBoundaryE (PatchType patch_type, std::optional<bool> nodal_sync=std::nullopt);
+    void FillBoundaryB (PatchType patch_type, std::optional<bool> nodal_sync=std::nullopt);
 
     void CheckPoint (const std::string& dir) const;
     void Restart (const std::string& dir);
diff --git a/Source/BoundaryConditions/PML_RZ.cpp b/Source/BoundaryConditions/PML_RZ.cpp
index 98ed12e7d67..d04c6e53bf8 100644
--- a/Source/BoundaryConditions/PML_RZ.cpp
+++ b/Source/BoundaryConditions/PML_RZ.cpp
@@ -134,13 +134,13 @@ PML_RZ::FillBoundaryE ()
 }
 
 void
-PML_RZ::FillBoundaryE (PatchType patch_type)
+PML_RZ::FillBoundaryE (PatchType patch_type, std::optional<bool> nodal_sync)
 {
     if (patch_type == PatchType::fine && pml_E_fp[0] && pml_E_fp[0]->nGrowVect().max() > 0)
     {
         const amrex::Periodicity& period = m_geom->periodicity();
         const Vector<amrex::MultiFab*> mf{pml_E_fp[0].get(),pml_E_fp[1].get()};
-        ablastr::utils::communication::FillBoundary(mf, WarpX::do_single_precision_comms, period);
+        ablastr::utils::communication::FillBoundary(mf, WarpX::do_single_precision_comms, period, nodal_sync);
     }
 }
 
@@ -151,13 +151,13 @@ PML_RZ::FillBoundaryB ()
 }
 
 void
-PML_RZ::FillBoundaryB (PatchType patch_type)
+PML_RZ::FillBoundaryB (PatchType patch_type, std::optional<bool> nodal_sync)
 {
     if (patch_type == PatchType::fine && pml_B_fp[0])
     {
         const amrex::Periodicity& period = m_geom->periodicity();
         const Vector<amrex::MultiFab*> mf{pml_B_fp[0].get(),pml_B_fp[1].get()};
-        ablastr::utils::communication::FillBoundary(mf, WarpX::do_single_precision_comms, period);
+        ablastr::utils::communication::FillBoundary(mf, WarpX::do_single_precision_comms, period, nodal_sync);
     }
 }
 
diff --git a/Source/Evolve/WarpXEvolve.cpp b/Source/Evolve/WarpXEvolve.cpp
index 467a4268d97..a1b536f6c71 100644
--- a/Source/Evolve/WarpXEvolve.cpp
+++ b/Source/Evolve/WarpXEvolve.cpp
@@ -472,10 +472,6 @@ WarpX::OneStep_nosub (Real cur_time)
             if (WarpX::do_divb_cleaning || WarpX::do_pml_divb_cleaning)
                 FillBoundaryG(guard_cells.ng_alloc_G, WarpX::sync_nodal_points);
         }
-
-        if (do_pml) {
-            NodalSyncPML();
-        }
     } else {
         EvolveF(0.5_rt * dt[0], DtType::FirstHalf);
         EvolveG(0.5_rt * dt[0], DtType::FirstHalf);
@@ -502,11 +498,10 @@ WarpX::OneStep_nosub (Real cur_time)
 
         if (do_pml) {
             DampPML();
-            NodalSyncPML();
-            FillBoundaryE(guard_cells.ng_MovingWindow);
-            FillBoundaryB(guard_cells.ng_MovingWindow);
-            FillBoundaryF(guard_cells.ng_MovingWindow);
-            FillBoundaryG(guard_cells.ng_MovingWindow);
+            FillBoundaryE(guard_cells.ng_MovingWindow, WarpX::sync_nodal_points);
+            FillBoundaryB(guard_cells.ng_MovingWindow, WarpX::sync_nodal_points);
+            FillBoundaryF(guard_cells.ng_MovingWindow, WarpX::sync_nodal_points);
+            FillBoundaryG(guard_cells.ng_MovingWindow, WarpX::sync_nodal_points);
         }
 
         // E and B are up-to-date in the domain, but all guard cells are
@@ -746,11 +741,6 @@ WarpX::OneStep_multiJ (const amrex::Real cur_time)
     if (WarpX::do_divb_cleaning || WarpX::do_pml_divb_cleaning)
         FillBoundaryG(guard_cells.ng_alloc_G, WarpX::sync_nodal_points);
 
-    // Synchronize fields on nodal points in PML
-    if (do_pml)
-    {
-        NodalSyncPML();
-    }
 #else
     amrex::ignore_unused(cur_time);
     WARPX_ABORT_WITH_MESSAGE(
@@ -927,9 +917,6 @@ WarpX::OneStep_sub1 (Real curtime)
     if ( safe_guard_cells )
         FillBoundaryB(coarse_lev, PatchType::fine, guard_cells.ng_FieldSolver,
                       WarpX::sync_nodal_points);
-
-    // Synchronize nodal points at the end of the time step
-    if (do_pml) NodalSyncPML();
 }
 
 void
diff --git a/Source/Parallelization/WarpXComm.cpp b/Source/Parallelization/WarpXComm.cpp
index 9b7f16478fc..8c87cc9c772 100644
--- a/Source/Parallelization/WarpXComm.cpp
+++ b/Source/Parallelization/WarpXComm.cpp
@@ -561,13 +561,13 @@ WarpX::FillBoundaryE (const int lev, const PatchType patch_type, const amrex::In
                 (patch_type == PatchType::fine) ? pml[lev]->GetE_fp() : pml[lev]->GetE_cp();
 
             pml[lev]->Exchange(mf_pml, mf, patch_type, do_pml_in_domain);
-            pml[lev]->FillBoundaryE(patch_type);
+            pml[lev]->FillBoundaryE(patch_type, nodal_sync);
         }
 
 #if (defined WARPX_DIM_RZ) && (defined WARPX_USE_PSATD)
         if (pml_rz[lev])
         {
-            pml_rz[lev]->FillBoundaryE(patch_type);
+            pml_rz[lev]->FillBoundaryE(patch_type, nodal_sync);
         }
 #endif
     }
@@ -618,13 +618,13 @@ WarpX::FillBoundaryB (const int lev, const PatchType patch_type, const amrex::In
                 (patch_type == PatchType::fine) ? pml[lev]->GetB_fp() : pml[lev]->GetB_cp();
 
             pml[lev]->Exchange(mf_pml, mf, patch_type, do_pml_in_domain);
-            pml[lev]->FillBoundaryB(patch_type);
+            pml[lev]->FillBoundaryB(patch_type, nodal_sync);
         }
 
 #if (defined WARPX_DIM_RZ) && (defined WARPX_USE_PSATD)
         if (pml_rz[lev])
         {
-            pml_rz[lev]->FillBoundaryB(patch_type);
+            pml_rz[lev]->FillBoundaryB(patch_type, nodal_sync);
         }
 #endif
     }
@@ -761,7 +761,7 @@ WarpX::FillBoundaryF (int lev, PatchType patch_type, IntVect ng, std::optional<b
         if (do_pml && pml[lev] && pml[lev]->ok())
         {
             if (F_fp[lev]) pml[lev]->ExchangeF(patch_type, F_fp[lev].get(), do_pml_in_domain);
-            pml[lev]->FillBoundaryF(patch_type);
+            pml[lev]->FillBoundaryF(patch_type, nodal_sync);
         }
 
         if (F_fp[lev])
@@ -776,7 +776,7 @@ WarpX::FillBoundaryF (int lev, PatchType patch_type, IntVect ng, std::optional<b
         if (do_pml && pml[lev] && pml[lev]->ok())
         {
             if (F_cp[lev]) pml[lev]->ExchangeF(patch_type, F_cp[lev].get(), do_pml_in_domain);
-            pml[lev]->FillBoundaryF(patch_type);
+            pml[lev]->FillBoundaryF(patch_type, nodal_sync);
         }
 
         if (F_cp[lev])
@@ -805,7 +805,7 @@ void WarpX::FillBoundaryG (int lev, PatchType patch_type, IntVect ng, std::optio
         if (do_pml && pml[lev] && pml[lev]->ok())
         {
             if (G_fp[lev]) pml[lev]->ExchangeG(patch_type, G_fp[lev].get(), do_pml_in_domain);
-            pml[lev]->FillBoundaryG(patch_type);
+            pml[lev]->FillBoundaryG(patch_type, nodal_sync);
         }
 
         if (G_fp[lev])
@@ -820,7 +820,7 @@ void WarpX::FillBoundaryG (int lev, PatchType patch_type, IntVect ng, std::optio
         if (do_pml && pml[lev] && pml[lev]->ok())
         {
             if (G_cp[lev]) pml[lev]->ExchangeG(patch_type, G_cp[lev].get(), do_pml_in_domain);
-            pml[lev]->FillBoundaryG(patch_type);
+            pml[lev]->FillBoundaryG(patch_type, nodal_sync);
         }
 
         if (G_cp[lev])
@@ -1473,62 +1473,3 @@ void WarpX::NodalSyncRho (
         ablastr::utils::communication::OverrideSync(rhoc, WarpX::do_single_precision_comms, cperiod);
     }
 }
-
-void WarpX::NodalSyncPML ()
-{
-    for (int lev = 0; lev <= finest_level; lev++) {
-        NodalSyncPML(lev);
-    }
-}
-
-void WarpX::NodalSyncPML (int lev)
-{
-    NodalSyncPML(lev, PatchType::fine);
-    if (lev > 0) NodalSyncPML(lev, PatchType::coarse);
-}
-
-void WarpX::NodalSyncPML (int lev, PatchType patch_type)
-{
-    if (pml[lev] && pml[lev]->ok())
-    {
-        const std::array<amrex::MultiFab*,3>& pml_E = (patch_type == PatchType::fine) ?
-                                                      pml[lev]->GetE_fp() : pml[lev]->GetE_cp();
-        const std::array<amrex::MultiFab*,3>& pml_B = (patch_type == PatchType::fine) ?
-                                                      pml[lev]->GetB_fp() : pml[lev]->GetB_cp();
-        amrex::MultiFab* const pml_F = (patch_type == PatchType::fine) ? pml[lev]->GetF_fp() : pml[lev]->GetF_cp();
-        amrex::MultiFab* const pml_G = (patch_type == PatchType::fine) ? pml[lev]->GetG_fp() : pml[lev]->GetG_cp();
-
-        // Always synchronize nodal points
-        const amrex::Periodicity& period = Geom(lev).periodicity();
-        ablastr::utils::communication::OverrideSync(*pml_E[0], WarpX::do_single_precision_comms, period);
-        ablastr::utils::communication::OverrideSync(*pml_E[1], WarpX::do_single_precision_comms, period);
-        ablastr::utils::communication::OverrideSync(*pml_E[2], WarpX::do_single_precision_comms, period);
-        ablastr::utils::communication::OverrideSync(*pml_B[0], WarpX::do_single_precision_comms, period);
-        ablastr::utils::communication::OverrideSync(*pml_B[1], WarpX::do_single_precision_comms, period);
-        ablastr::utils::communication::OverrideSync(*pml_B[2], WarpX::do_single_precision_comms, period);
-        if (pml_F) {
-            ablastr::utils::communication::OverrideSync(*pml_F, WarpX::do_single_precision_comms, period);
-        }
-        if (pml_G) {
-            ablastr::utils::communication::OverrideSync(*pml_G, WarpX::do_single_precision_comms, period);
-        }
-    }
-
-#if (defined WARPX_DIM_RZ) && (defined WARPX_USE_PSATD)
-    if (pml_rz[lev])
-    {
-        // This is not actually needed with RZ PSATD since the
-        // arrays are always cell centered. Keep for now since
-        // it may be useful if the PML is used with FDTD
-        const std::array<amrex::MultiFab*,2> pml_rz_E = pml_rz[lev]->GetE_fp();
-        const std::array<amrex::MultiFab*,2> pml_rz_B = pml_rz[lev]->GetB_fp();
-
-        // Always synchronize nodal points
-        const amrex::Periodicity& period = Geom(lev).periodicity();
-        pml_rz_E[0]->OverrideSync(period);
-        pml_rz_E[1]->OverrideSync(period);
-        pml_rz_B[0]->OverrideSync(period);
-        pml_rz_B[1]->OverrideSync(period);
-    }
-#endif
-}
diff --git a/Source/WarpX.H b/Source/WarpX.H
index 034c321ba14..1bd3f5bf46c 100644
--- a/Source/WarpX.H
+++ b/Source/WarpX.H
@@ -726,21 +726,6 @@ public:
     void CopyJPML ();
     bool isAnyBoundaryPML();
 
-    /**
-     * \brief Synchronize the nodal points of the PML MultiFabs
-     */
-    void NodalSyncPML ();
-
-    /**
-     * \brief Synchronize the nodal points of the PML MultiFabs for given MR level
-     */
-    void NodalSyncPML (int lev);
-
-    /**
-     * \brief Synchronize the nodal points of the PML MultiFabs for given MR level and patch
-     */
-    void NodalSyncPML (int lev, PatchType patch_type);
-
     PML* GetPML (int lev);
 #if (defined WARPX_DIM_RZ) && (defined WARPX_USE_PSATD)
     PML_RZ* GetPML_RZ (int lev);
diff --git a/Source/ablastr/utils/Communication.H b/Source/ablastr/utils/Communication.H
index 9be9fec1e58..85f58ff9f65 100644
--- a/Source/ablastr/utils/Communication.H
+++ b/Source/ablastr/utils/Communication.H
@@ -60,7 +60,8 @@ void ParallelAdd (amrex::MultiFab &dst,
 
 void FillBoundary (amrex::MultiFab &mf,
                    bool do_single_precision_comms,
-                   const amrex::Periodicity &period = amrex::Periodicity::NonPeriodic());
+                   const amrex::Periodicity &period = amrex::Periodicity::NonPeriodic(),
+                   std::optional<bool> nodal_sync=std::nullopt);
 
 void FillBoundary (amrex::MultiFab &mf,
                    amrex::IntVect ng,
@@ -77,7 +78,7 @@ void FillBoundary (amrex::iMultiFab&         mf,
 
 void
 FillBoundary(amrex::Vector<amrex::MultiFab *> const &mf, bool do_single_precision_comms,
-             const amrex::Periodicity &period);
+             const amrex::Periodicity &period, std::optional<bool> nodal_sync=std::nullopt);
 
 void
 SumBoundary (amrex::MultiFab &mf,
diff --git a/Source/ablastr/utils/Communication.cpp b/Source/ablastr/utils/Communication.cpp
index 3ae18ad8d1c..c33e6a1d154 100644
--- a/Source/ablastr/utils/Communication.cpp
+++ b/Source/ablastr/utils/Communication.cpp
@@ -114,18 +114,18 @@ void FillBoundary (amrex::MultiFab &mf,
     }
 }
 
-void FillBoundary (amrex::MultiFab &mf, bool do_single_precision_comms, const amrex::Periodicity &period)
+void FillBoundary (amrex::MultiFab &mf, bool do_single_precision_comms, const amrex::Periodicity &period, std::optional<bool> nodal_sync)
 {
     amrex::IntVect const ng = mf.n_grow;
-    FillBoundary(mf, ng, do_single_precision_comms, period);
+    FillBoundary(mf, ng, do_single_precision_comms, period, nodal_sync);
 }
 
 void
 FillBoundary (amrex::Vector<amrex::MultiFab *> const &mf, bool do_single_precision_comms,
-             const amrex::Periodicity &period)
+             const amrex::Periodicity &period, std::optional<bool> nodal_sync)
 {
     for (auto x : mf) {
-        ablastr::utils::communication::FillBoundary(*x, do_single_precision_comms, period);
+        ablastr::utils::communication::FillBoundary(*x, do_single_precision_comms, period, nodal_sync);
     }
 }
 

From 7d4a5076ed56ab6ac3d490dd1f223f04062a5fff Mon Sep 17 00:00:00 2001
From: Edoardo Zoni <59625522+EZoni@users.noreply.github.com>
Date: Wed, 4 Oct 2023 10:31:20 -0700
Subject: [PATCH 02/12] Add SymPy notebook to derive PSATD equations in PML
 (#4316)

---
 Tools/Algorithms/psatd_pml.ipynb | 607 +++++++++++++++++++++++++++++++
 1 file changed, 607 insertions(+)
 create mode 100644 Tools/Algorithms/psatd_pml.ipynb

diff --git a/Tools/Algorithms/psatd_pml.ipynb b/Tools/Algorithms/psatd_pml.ipynb
new file mode 100644
index 00000000000..216be77fd87
--- /dev/null
+++ b/Tools/Algorithms/psatd_pml.ipynb
@@ -0,0 +1,607 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "import inspect\n",
+    "import sympy as sp\n",
+    "from sympy import *\n",
+    "from sympy.solvers.solveset import linsolve\n",
+    "\n",
+    "sp.init_session()\n",
+    "sp.init_printing()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Global variables:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Dimensional parameters\n",
+    "dd = 9\n",
+    "DD = 12\n",
+    "\n",
+    "# Speed of light, time, time step\n",
+    "c = sp.symbols(r'c', real = True, positive = True)\n",
+    "t = sp.symbols(r't', real = True)\n",
+    "tn = sp.symbols(r't_n', real = True)\n",
+    "dt = sp.symbols(r'\\Delta{t}', real = True, positive = True)\n",
+    "\n",
+    "# Components of k vector, omega, norm of k vector\n",
+    "kx = sp.symbols(r'k_x', real = True)\n",
+    "ky = sp.symbols(r'k_y', real = True)\n",
+    "kz = sp.symbols(r'k_z', real = True)\n",
+    "om = sp.symbols(r'omega', real = True, positive = True)\n",
+    "knorm = sp.sqrt(kx**2 + ky**2 + kz**2)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Method:\n",
+    "The goal is to solve a system of second-order ordinary differential equations in time of the form $\\boldsymbol{\\ddot{X}} = M \\boldsymbol{X}$, where the $d \\times d$ matrix $M$ has eigenpairs $(\\lambda_i, \\{\\boldsymbol{v}_{i,1}, \\dots, \\boldsymbol{v}_{i,\\mu_i}\\})$, where $\\mu_i$ denotes the algebraic multiplicity of $\\lambda_i$ and $\\sum_i \\mu_i = d$.\n",
+    "Assuming that all eigenvalues are either zero or negative, we can write $\\lambda_i = - \\omega_i^2$, with $\\omega_i = \\sqrt{- \\lambda_i} \\geq 0$.\n",
+    "Then the solution of $\\boldsymbol{\\ddot{X}} = M \\boldsymbol{X}$ reads\n",
+    "$$\n",
+    "\\boldsymbol{X} = \\sum_i \\sum_j^{\\mu_i} \\left(a_{i,j} C(\\omega_i, t) + b_{i,j} S(\\omega_i, t) \\right) \\boldsymbol{v}_{i,j} \\,,\n",
+    "$$\n",
+    "where $a_{i,j}$ and $b_{i,j}$ are integration constants to be determined by the initial conditions $\\boldsymbol{X}(t_n)$ and $\\boldsymbol{\\dot{X}}(t_n)$, $C(\\omega, t) = \\cos(\\omega \\, (t - t_n))$, and\n",
+    "$$\n",
+    "S(\\omega, t) =\n",
+    "\\begin{cases}\n",
+    "(t - t_n) & \\omega = 0 \\,, \\\\\n",
+    "\\dfrac{\\sin(\\omega \\, (t - t_n))}{\\omega} & \\omega \\neq 0 \\,.\n",
+    "\\end{cases}\n",
+    "$$\n",
+    "We remark that\n",
+    "$$\n",
+    "\\begin{aligned}\n",
+    "& \\boldsymbol{X}(t_n) = \\sum_i \\sum_j^{\\mu_i} \\left(a_{i,j} C(\\omega_i, t_n) + b_{i,j} S(\\omega_i, t_n) \\right) \\boldsymbol{v}_{i,j} = \\sum_i \\sum_j^{\\mu_i} a_{i,j} \\boldsymbol{v}_{i,j} \\,, \\\\\n",
+    "& \\boldsymbol{\\dot{X}}(t_n) = \\sum_i \\sum_j^{\\mu_i} \\left(a_{i,j} \\dot{C}(\\omega_i, t_n) + b_{i,j} \\dot{S}(\\omega_i, t_n) \\right) \\boldsymbol{v}_{i,j} = \\sum_i \\sum_j^{\\mu_i} b_{i,j} \\boldsymbol{v}_{i,j} \\,.\n",
+    "\\end{aligned}\n",
+    "$$\n",
+    "In fact, the second time derivative of $\\boldsymbol{X}$ yields\n",
+    "$$\n",
+    "\\begin{split}\n",
+    "\\boldsymbol{\\ddot{X}} & = \\sum_i \\sum_j^{\\mu_i} \\left(a_{i,j} \\ddot{C}(\\omega_i, t) + b_{i,j} \\ddot{S}(\\omega_i, t) \\right) \\boldsymbol{v}_{i,j} \\\\\n",
+    "& = \\sum_i (-\\omega_i^2) \\sum_j^{\\mu_i} \\left(a_{i,j} C(\\omega_i, t) + b_{i,j} S(\\omega_i, t) \\right) \\boldsymbol{v}_{i,j} = M \\boldsymbol{X} \\,,\n",
+    "\\end{split}\n",
+    "$$\n",
+    "where we used the fact that, by definition, $M \\boldsymbol{v}_{i,j} = \\lambda_i \\boldsymbol{v}_{i,j} = - \\omega_i^2 \\boldsymbol{v}_{i,j}$ for all $j = 1, \\dots, \\mu_i$."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Auxiliary functions:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def C(omega, t):\n",
+    "    return sp.cos(omega * (t-tn))\n",
+    "\n",
+    "def S(omega, t):\n",
+    "    return (t-tn) if omega == 0. else sp.sin(omega * (t-tn)) / omega\n",
+    "\n",
+    "def Xt(eigenpairs, a, b, t):\n",
+    "    \"\"\"\n",
+    "    Compute X(t) according to the formulas above\n",
+    "    for a given set of eigenpairs and coefficients.\n",
+    "    \"\"\"\n",
+    "    XX = zeros(DD, 1)\n",
+    "    # Index used for the integration constants a_n and b_n\n",
+    "    i = 0\n",
+    "    # Loop over matrix eigenpairs\n",
+    "    for ep in eigenpairs:\n",
+    "        # ep[0] is an eigenvalue and om = sp.sqrt(-ep[0])\n",
+    "        omega = 0. if ep[0] == 0. else om\n",
+    "        # am is the algebraic multiplicity of the eigenvalue\n",
+    "        am = ep[1]\n",
+    "        # vF is the list of all eigenvectors corresponding to the eigenvalue\n",
+    "        vX = ep[2]\n",
+    "        # Loop over algebraic multiplicity of the eigenvalue\n",
+    "        for j in range(am):\n",
+    "            XX += (a[i] * C(omega, t) + b[i] * S(omega, t)) * vX[j]\n",
+    "            i += 1\n",
+    "    return XX\n",
+    "\n",
+    "def evolve(X, dX_dt, d2X_dt2):\n",
+    "    \"\"\"\n",
+    "    Solve ordinary differential equation X'' = M*X.\n",
+    "    \"\"\"\n",
+    "    # Set matrix for given ODE\n",
+    "    MX = zeros(DD)\n",
+    "    for i in range(DD):\n",
+    "        for j in range(DD):\n",
+    "            MX[i,j] = d2X_dt2[i].coeff(X[j], 1)\n",
+    "    #MX /= c**2\n",
+    "\n",
+    "    print()\n",
+    "    print(r'Matrix:')\n",
+    "    display(MX)\n",
+    "\n",
+    "    # Characteristic matrix\n",
+    "    lamda = sp.symbols(r'lamda')\n",
+    "    Id = eye(DD)\n",
+    "    MX_charmat = MX - lamda * Id\n",
+    "\n",
+    "    # Characteristic polynomial\n",
+    "    MX_charpoly = MX_charmat.det()\n",
+    "    MX_charpoly = factor(MX_charpoly.as_expr())\n",
+    "\n",
+    "    print(r'Characteristic polynomial:')\n",
+    "    display(MX_charpoly)\n",
+    "    \n",
+    "    MX_eigenvals = sp.solve(MX_charpoly, lamda)\n",
+    "\n",
+    "    # List of eigenvectors\n",
+    "    MX_eigenvects = []\n",
+    "\n",
+    "    # List of eigenpairs\n",
+    "    MX_eigenpairs = []\n",
+    "    \n",
+    "    # Compute eigenvectors as null spaces\n",
+    "    for l in MX_eigenvals:\n",
+    "\n",
+    "        # M - lamda * Id\n",
+    "        A = MX_charmat.subs(lamda, l)\n",
+    "        A.simplify()\n",
+    "\n",
+    "        print(r'Eigenvalue:')\n",
+    "        display(l)\n",
+    "\n",
+    "        print(r'Characteristic matrix:')\n",
+    "        display(A)\n",
+    "\n",
+    "        # Perform Gaussian elimination (necessary for lamda != 0)\n",
+    "        if (l != 0.):\n",
+    "            print(r'Gaussian elimination:')\n",
+    "            print(r'A[0,:] += A[1,:]')\n",
+    "            A[0,:] += A[1,:]\n",
+    "            print(r'A[0,:] += A[2,:]')\n",
+    "            A[0,:] += A[2,:]\n",
+    "            print(r'Swap A[0,:] and A[11,:]')\n",
+    "            row = A[11,:]\n",
+    "            A[11,:] = A[0,:]\n",
+    "            A[0,:] = row\n",
+    "            print(r'A[3,:] += A[4,:]')\n",
+    "            A[3,:] += A[4,:]\n",
+    "            print(r'A[3,:] += A[5,:]')\n",
+    "            A[3,:] += A[5,:]\n",
+    "            print(r'Swap A[3,:] and A[10,:]')\n",
+    "            row = A[10,:]\n",
+    "            A[10,:] = A[3,:]\n",
+    "            A[3,:] = row\n",
+    "            print(r'A[0,:] += A[3,:]')\n",
+    "            A[0,:] += A[3,:]\n",
+    "            print(r'A[0,:] += A[9,:]')\n",
+    "            A[0,:] += A[9,:]\n",
+    "            print(r'Swap A[0,:] and A[9,:]')\n",
+    "            row = A[9,:]\n",
+    "            A[9,:] = A[0,:]\n",
+    "            A[0,:] = row\n",
+    "            print(r'A[6,:] += A[8,:]')\n",
+    "            A[6,:] += A[8,:]\n",
+    "            print(r'A[6,:] += A[7,:]')\n",
+    "            A[6,:] += A[7,:]\n",
+    "            print(r'Swap A[6,:] and A[8,:]')\n",
+    "            row = A[8,:]\n",
+    "            A[8,:] = A[6,:]\n",
+    "            A[6,:] = row\n",
+    "            print(r'A[6,:] += A[7,:]')\n",
+    "            A[6,:] += A[7,:]\n",
+    "            print(r'A[4,:] += A[5,:]')\n",
+    "            A[4,:] += A[5,:]\n",
+    "            A.simplify()\n",
+    "            display(A)\n",
+    "\n",
+    "        # Compute null space and store eigenvectors\n",
+    "        v = A.nullspace()\n",
+    "        MX_eigenvects.append(v)\n",
+    "\n",
+    "        print(r'Eigenvectors:')\n",
+    "        display(v)\n",
+    "    \n",
+    "        # Store eigenpairs (eigenvalue, algebraic multiplicity, eigenvectors)\n",
+    "        MX_eigenpairs.append((l, len(v), v))\n",
+    "    \n",
+    "        #print(r'Eigenpairs:')\n",
+    "        #display(MX_eigenpairs)\n",
+    "\n",
+    "    # Verify that the eigenpairs satisfy the charcteristic equations\n",
+    "    for ep in MX_eigenpairs:\n",
+    "        for j in range(ep[1]):\n",
+    "            diff = MX * ep[2][j] - ep[0] * ep[2][j]\n",
+    "            diff.simplify()\n",
+    "            if diff != zeros(DD,1):\n",
+    "                print('The charcteristic equation is not verified for some eigenpairs')\n",
+    "                display(diff)\n",
+    "\n",
+    "    # Define integration constants\n",
+    "    a = []\n",
+    "    b = []\n",
+    "    for i in range(DD):\n",
+    "        an = r'a_{:d}'.format(i+1)\n",
+    "        bn = r'b_{:d}'.format(i+1) \n",
+    "        a.append(sp.symbols(an))\n",
+    "        b.append(sp.symbols(bn))\n",
+    "\n",
+    "    # Set equations corresponding to initial conditions\n",
+    "    lhs_a = Xt(MX_eigenpairs, a, b, tn) - X\n",
+    "    lhs_b = Xt(MX_eigenpairs, a, b, t ).diff(t).subs(t, tn) - dX_dt\n",
+    "\n",
+    "    # Compute integration constants from initial conditions\n",
+    "    # (convert list of tuples to list using list comprehension)\n",
+    "    a = list(linsolve(list(lhs_a), a))\n",
+    "    a = [item for el in a for item in el]\n",
+    "    b = list(linsolve(list(lhs_b), b))\n",
+    "    b = [item for el in b for item in el]\n",
+    "\n",
+    "    # Evaluate solution at t = tn + dt\n",
+    "    X_new = Xt(MX_eigenpairs, a, b, tn+dt).expand()\n",
+    "    for d in range(DD):\n",
+    "        for Eij in E:\n",
+    "            X_new[d] = X_new[d].collect(Eij)\n",
+    "        for Bij in B:\n",
+    "            X_new[d] = X_new[d].collect(Bij)\n",
+    "        for Fi in F:\n",
+    "            X_new[d] = X_new[d].collect(Fi)\n",
+    "        for Gi in G:\n",
+    "            X_new[d] = X_new[d].collect(Gi)\n",
+    "\n",
+    "    # Check correctness by taking *second* derivative\n",
+    "    # and comparing with initial right-hand side at time tn\n",
+    "    X_t = Xt(MX_eigenpairs, a, b, t)\n",
+    "    diff = X_t.diff(t).diff(t).subs(t, tn).subs(om, c*knorm).expand() - d2X_dt2\n",
+    "    diff.simplify()\n",
+    "    if diff != zeros(DD,1):\n",
+    "        print('Integration in time failed')\n",
+    "        display(diff)\n",
+    "    \n",
+    "    return X_t, X_new"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### First-order and second-order ODEs for $\\boldsymbol{E}$, $\\boldsymbol{B}$, $F$ and $G$:\n",
+    "Equations for the $\\boldsymbol{E}$ field:\n",
+    "$$\n",
+    "\\begin{alignat*}{3}\n",
+    "&  \\frac{\\partial E_{xx}}{\\partial t} = c^2 i k_x (F_x + F_y  + F_z) \\quad\n",
+    "&& \\frac{\\partial E_{xy}}{\\partial t} = c^2 i k_y (B_{zx} + B_{zy} + B_{zz}) \\quad\n",
+    "&& \\frac{\\partial E_{xz}}{\\partial t} = -c^2 i k_z (B_{yx} + B_{yy} + B_{yz}) \\\\[5pt]\n",
+    "&  \\frac{\\partial E_{yx}}{\\partial t} = -c^2 i k_x (B_{zx} + B_{zy} + B_{zz}) \\quad\n",
+    "&& \\frac{\\partial E_{yy}}{\\partial t} = c^2 i k_y (F_x + F_y + F_z) \\quad\n",
+    "&& \\frac{\\partial E_{yz}}{\\partial t} = c^2 i k_z (B_{xx} + B_{xy} + B_{xz}) \\\\[5pt]\n",
+    "&  \\frac{\\partial E_{zx}}{\\partial t} = c^2 i k_x (B_{yx} + B_{yy} + B_{yz}) \\quad\n",
+    "&& \\frac{\\partial E_{zy}}{\\partial t} = -c^2 i k_y (B_{xx} + B_{xy} + B_{xz})\\quad\n",
+    "&& \\frac{\\partial E_{zz}}{\\partial t} = c^2 i k_z (F_x + F_y + F_z)\n",
+    "\\end{alignat*}\n",
+    "$$\n",
+    "\n",
+    "Equations for the $\\boldsymbol{B}$ field:\n",
+    "$$\n",
+    "\\begin{alignat*}{3}\n",
+    "&  \\frac{\\partial B_{xx}}{\\partial t} = i k_x (G_x + G_y + G_z) \\quad\n",
+    "&& \\frac{\\partial B_{xy}}{\\partial t} = -i k_y (E_{zx} + E_{zy} + E_{zz}) \\quad\n",
+    "&& \\frac{\\partial B_{xz}}{\\partial t} = i k_z (E_{yx} + E_{yy} + E_{yz}) \\\\[5pt]\n",
+    "&  \\frac{\\partial B_{yx}}{\\partial t} = i k_x (E_{zx} + E_{zy} + E_{zz}) \\quad\n",
+    "&& \\frac{\\partial B_{yy}}{\\partial t} = i k_y (G_x + G_y + G_z) \\quad\n",
+    "&& \\frac{\\partial B_{yz}}{\\partial t} = -i k_z (E_{xx} + E_{xy} + E_{xz}) \\\\[5pt]\n",
+    "&  \\frac{\\partial B_{zx}}{\\partial t} = -i k_x (E_{yx} + E_{yy} + E_{yz}) \\quad\n",
+    "&& \\frac{\\partial B_{zy}}{\\partial t} = i k_y (E_{xx} + E_{xy} + E_{xz}) \\quad\n",
+    "&& \\frac{\\partial B_{zz}}{\\partial t} = i k_z (G_x + G_y + G_z)\n",
+    "\\end{alignat*}\n",
+    "$$\n",
+    "\n",
+    "Equations for the $F$ field:\n",
+    "$$\n",
+    "\\frac{\\partial F_x}{\\partial t} = i k_x (E_{xx} + E_{xy} + E_{xz}) \\quad\n",
+    "\\frac{\\partial F_y}{\\partial t} = i k_y (E_{yx} + E_{yy} + E_{yz}) \\quad\n",
+    "\\frac{\\partial F_z}{\\partial t} = i k_z (E_{zx} + E_{zy} + E_{zz})\n",
+    "$$\n",
+    "\n",
+    "Equations for the $G$ field:\n",
+    "$$\n",
+    "\\frac{\\partial G_x}{\\partial t} = c^2 i k_x (B_{xx} + B_{xy} + B_{xz}) \\quad\n",
+    "\\frac{\\partial G_y}{\\partial t} = c^2 i k_y (B_{yx} + B_{yy} + B_{yz}) \\quad\n",
+    "\\frac{\\partial G_z}{\\partial t} = c^2 i k_z (B_{zx} + B_{zy} + B_{zz})\n",
+    "$$"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "# indices  0     1     2     3     4     5     6     7     8\n",
+    "labels = ['xx', 'xy', 'xz', 'yx', 'yy', 'yz', 'zx', 'zy', 'zz']\n",
+    "\n",
+    "# E fields\n",
+    "Exx = sp.symbols(r'E_{xx}')\n",
+    "Exy = sp.symbols(r'E_{xy}')\n",
+    "Exz = sp.symbols(r'E_{xz}')\n",
+    "Eyx = sp.symbols(r'E_{yx}')\n",
+    "Eyy = sp.symbols(r'E_{yy}')\n",
+    "Eyz = sp.symbols(r'E_{yz}')\n",
+    "Ezx = sp.symbols(r'E_{zx}')\n",
+    "Ezy = sp.symbols(r'E_{zy}')\n",
+    "Ezz = sp.symbols(r'E_{zz}')\n",
+    "E = Matrix([[Exx],[Exy],[Exz],[Eyx],[Eyy],[Eyz],[Ezx],[Ezy],[Ezz]])\n",
+    "\n",
+    "# B fields\n",
+    "Bxx = sp.symbols(r'B_{xx}')\n",
+    "Bxy = sp.symbols(r'B_{xy}')\n",
+    "Bxz = sp.symbols(r'B_{xz}')\n",
+    "Byx = sp.symbols(r'B_{yx}')\n",
+    "Byy = sp.symbols(r'B_{yy}')\n",
+    "Byz = sp.symbols(r'B_{yz}')\n",
+    "Bzx = sp.symbols(r'B_{zx}')\n",
+    "Bzy = sp.symbols(r'B_{zy}')\n",
+    "Bzz = sp.symbols(r'B_{zz}')\n",
+    "B = Matrix([[Bxx],[Bxy],[Bxz],[Byx],[Byy],[Byz],[Bzx],[Bzy],[Bzz]])\n",
+    "\n",
+    "# F fields\n",
+    "Fx = sp.symbols(r'F_{x}')\n",
+    "Fy = sp.symbols(r'F_{y}')\n",
+    "Fz = sp.symbols(r'F_{z}')\n",
+    "F = Matrix([[Fx],[Fy],[Fz]])\n",
+    "\n",
+    "# G fields\n",
+    "Gx = sp.symbols(r'G_{x}')\n",
+    "Gy = sp.symbols(r'G_{y}')\n",
+    "Gz = sp.symbols(r'G_{z}')\n",
+    "G = Matrix([[Gx],[Gy],[Gz]])\n",
+    "\n",
+    "# dE/dt\n",
+    "dExx_dt =   c**2 * I * kx * (Fx  + Fy  + Fz)\n",
+    "dExy_dt =   c**2 * I * ky * (Bzx + Bzy + Bzz)\n",
+    "dExz_dt = - c**2 * I * kz * (Byx + Byy + Byz)\n",
+    "dEyx_dt = - c**2 * I * kx * (Bzx + Bzy + Bzz)\n",
+    "dEyy_dt =   c**2 * I * ky * (Fx  + Fy  + Fz)\n",
+    "dEyz_dt =   c**2 * I * kz * (Bxx + Bxy + Bxz)\n",
+    "dEzx_dt =   c**2 * I * kx * (Byx + Byy + Byz)\n",
+    "dEzy_dt = - c**2 * I * ky * (Bxx + Bxy + Bxz)\n",
+    "dEzz_dt =   c**2 * I * kz * (Fx  + Fy  + Fz)\n",
+    "dE_dt = Matrix([[dExx_dt],[dExy_dt],[dExz_dt],[dEyx_dt],[dEyy_dt],[dEyz_dt],[dEzx_dt],[dEzy_dt],[dEzz_dt]])\n",
+    "\n",
+    "# dB/dt\n",
+    "dBxx_dt =   I * kx * (Gx  + Gy  + Gz)\n",
+    "dBxy_dt = - I * ky * (Ezx + Ezy + Ezz)\n",
+    "dBxz_dt =   I * kz * (Eyx + Eyy + Eyz)\n",
+    "dByx_dt =   I * kx * (Ezx + Ezy + Ezz)\n",
+    "dByy_dt =   I * ky * (Gx  + Gy  + Gz)\n",
+    "dByz_dt = - I * kz * (Exx + Exy + Exz)\n",
+    "dBzx_dt = - I * kx * (Eyx + Eyy + Eyz)\n",
+    "dBzy_dt =   I * ky * (Exx + Exy + Exz)\n",
+    "dBzz_dt =   I * kz * (Gx  + Gy  + Gz)\n",
+    "dB_dt = Matrix([[dBxx_dt],[dBxy_dt],[dBxz_dt],[dByx_dt],[dByy_dt],[dByz_dt],[dBzx_dt],[dBzy_dt],[dBzz_dt]])\n",
+    "\n",
+    "# dF/dt\n",
+    "dFx_dt = I * kx * (Exx + Exy + Exz)\n",
+    "dFy_dt = I * ky * (Eyx + Eyy + Eyz)\n",
+    "dFz_dt = I * kz * (Ezx + Ezy + Ezz)\n",
+    "dF_dt = Matrix([[dFx_dt],[dFy_dt],[dFz_dt]])\n",
+    "\n",
+    "# dG/dt\n",
+    "dGx_dt = c**2 * I * kx * (Bxx + Bxy + Bxz)\n",
+    "dGy_dt = c**2 * I * ky * (Byx + Byy + Byz)\n",
+    "dGz_dt = c**2 * I * kz * (Bzx + Bzy + Bzz)\n",
+    "dG_dt = Matrix([[dGx_dt],[dGy_dt],[dGz_dt]])\n",
+    "\n",
+    "# d2E/dt2\n",
+    "d2Exx_dt2 =   c**2 * I * kx * (dFx_dt + dFy_dt + dFz_dt)\n",
+    "d2Exy_dt2 =   c**2 * I * ky * (dBzx_dt + dBzy_dt + dBzz_dt)\n",
+    "d2Exz_dt2 = - c**2 * I * kz * (dByx_dt + dByy_dt + dByz_dt)\n",
+    "d2Eyx_dt2 = - c**2 * I * kx * (dBzx_dt + dBzy_dt + dBzz_dt)\n",
+    "d2Eyy_dt2 =   c**2 * I * ky * (dFx_dt + dFy_dt + dFz_dt)\n",
+    "d2Eyz_dt2 =   c**2 * I * kz * (dBxx_dt + dBxy_dt + dBxz_dt)\n",
+    "d2Ezx_dt2 =   c**2 * I * kx * (dByx_dt + dByy_dt + dByz_dt)\n",
+    "d2Ezy_dt2 = - c**2 * I * ky * (dBxx_dt + dBxy_dt + dBxz_dt)\n",
+    "d2Ezz_dt2 =   c**2 * I * kz * (dFx_dt + dFy_dt + dFz_dt)\n",
+    "d2E_dt2 = Matrix([[d2Exx_dt2],[d2Exy_dt2],[d2Exz_dt2],[d2Eyx_dt2],[d2Eyy_dt2],[d2Eyz_dt2],[d2Ezx_dt2],[d2Ezy_dt2],[d2Ezz_dt2]])\n",
+    "\n",
+    "# d2B/dt2\n",
+    "d2Bxx_dt2 =   I * kx * (dGx_dt  + dGy_dt  + dGz_dt)\n",
+    "d2Bxy_dt2 = - I * ky * (dEzx_dt + dEzy_dt + dEzz_dt)\n",
+    "d2Bxz_dt2 =   I * kz * (dEyx_dt + dEyy_dt + dEyz_dt)\n",
+    "d2Byx_dt2 =   I * kx * (dEzx_dt + dEzy_dt + dEzz_dt)\n",
+    "d2Byy_dt2 =   I * ky * (dGx_dt  + dGy_dt  + dGz_dt)\n",
+    "d2Byz_dt2 = - I * kz * (dExx_dt + dExy_dt + dExz_dt)\n",
+    "d2Bzx_dt2 = - I * kx * (dEyx_dt + dEyy_dt + dEyz_dt)\n",
+    "d2Bzy_dt2 =   I * ky * (dExx_dt + dExy_dt + dExz_dt)\n",
+    "d2Bzz_dt2 =   I * kz * (dGx_dt  + dGy_dt  + dGz_dt)\n",
+    "d2B_dt2 = Matrix([[d2Bxx_dt2],[d2Bxy_dt2],[d2Bxz_dt2],[d2Byx_dt2],[d2Byy_dt2],[d2Byz_dt2],[d2Bzx_dt2],[d2Bzy_dt2],[d2Bzz_dt2]])\n",
+    "\n",
+    "# d2F/dt2\n",
+    "d2Fx_dt2 = I * kx * (dExx_dt + dExy_dt + dExz_dt)\n",
+    "d2Fy_dt2 = I * ky * (dEyx_dt + dEyy_dt + dEyz_dt)\n",
+    "d2Fz_dt2 = I * kz * (dEzx_dt + dEzy_dt + dEzz_dt)\n",
+    "d2F_dt2 = Matrix([[d2Fx_dt2],[d2Fy_dt2],[d2Fz_dt2]])\n",
+    "\n",
+    "# d2G/dt2\n",
+    "d2Gx_dt2 = c**2 * I * kx * (dBxx_dt + dBxy_dt + dBxz_dt)\n",
+    "d2Gy_dt2 = c**2 * I * ky * (dByx_dt + dByy_dt + dByz_dt)\n",
+    "d2Gz_dt2 = c**2 * I * kz * (dBzx_dt + dBzy_dt + dBzz_dt)\n",
+    "d2G_dt2 = Matrix([[d2Gx_dt2],[d2Gy_dt2],[d2Gz_dt2]])\n",
+    "\n",
+    "for i in range(dd):\n",
+    "    d2E_dt2[i] = sp.expand(d2E_dt2[i])\n",
+    "\n",
+    "for i in range(dd):\n",
+    "    d2B_dt2[i] = sp.expand(d2B_dt2[i])\n",
+    "\n",
+    "for i in range(3):\n",
+    "    d2F_dt2[i] = sp.expand(d2F_dt2[i])\n",
+    "    \n",
+    "for i in range(3):\n",
+    "    d2G_dt2[i] = sp.expand(d2G_dt2[i])\n",
+    "    \n",
+    "# Extended array for E and G\n",
+    "EG = zeros(DD,1)\n",
+    "for i in range(dd):\n",
+    "    EG[i] = E[i]\n",
+    "for i in range(dd,DD):\n",
+    "    EG[i] = G[i-dd]\n",
+    "\n",
+    "# dEG/dt\n",
+    "dEG_dt = zeros(DD,1)\n",
+    "for i in range(dd):\n",
+    "    dEG_dt[i] = dE_dt[i]\n",
+    "for i in range(dd,DD):\n",
+    "    dEG_dt[i] = dG_dt[i-dd]\n",
+    "    \n",
+    "# d2EG/dt2\n",
+    "d2EG_dt2 = zeros(DD,1)\n",
+    "for i in range(dd):\n",
+    "    d2EG_dt2[i] = d2E_dt2[i]\n",
+    "for i in range(dd,DD):\n",
+    "    d2EG_dt2[i] = d2G_dt2[i-dd]\n",
+    "    \n",
+    "# Extended array for B and F\n",
+    "BF = zeros(DD,1)\n",
+    "for i in range(dd):\n",
+    "    BF[i] = B[i]\n",
+    "for i in range(dd,DD):\n",
+    "    BF[i] = F[i-dd]\n",
+    "\n",
+    "# dBF/dt\n",
+    "dBF_dt = zeros(DD,1)\n",
+    "for i in range(dd):\n",
+    "    dBF_dt[i] = dB_dt[i]\n",
+    "for i in range(dd,DD):\n",
+    "    dBF_dt[i] = dF_dt[i-dd]\n",
+    "\n",
+    "# d2BF/dt2\n",
+    "d2BF_dt2 = zeros(DD,1)\n",
+    "for i in range(dd):\n",
+    "    d2BF_dt2[i] = d2B_dt2[i]\n",
+    "for i in range(dd,DD):\n",
+    "    d2BF_dt2[i] = d2F_dt2[i-dd]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Solve second-order ODEs for $\\boldsymbol{E}$, $\\boldsymbol{B}$, $F$ and $G$:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "print(r'Solve equations for E and G:')\n",
+    "EG_t, EG_new = evolve(EG, dEG_dt, d2EG_dt2)\n",
+    "\n",
+    "print(r'Solve equations for B and F:')\n",
+    "BF_t, BF_new = evolve(BF, dBF_dt, d2BF_dt2)\n",
+    "\n",
+    "# Check correctness by taking *first* derivative\n",
+    "# and comparing with initial right-hand side at time tn\n",
+    "# E,G\n",
+    "diff = EG_t.diff(t).subs(t, tn).subs(om, c*knorm).expand() - dEG_dt\n",
+    "diff.simplify()\n",
+    "if diff != zeros(DD,1):\n",
+    "    print('Integration in time failed')\n",
+    "    display(diff)\n",
+    "# B,F\n",
+    "diff = BF_t.diff(t).subs(t, tn).subs(om, c*knorm).expand() - dBF_dt\n",
+    "diff.simplify()\n",
+    "if diff != zeros(DD,1):\n",
+    "    print('Integration in time failed')\n",
+    "    display(diff)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Coefficients of PSATD equations in PML:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "# Code generation\n",
+    "from sympy.codegen.ast import Assignment\n",
+    "\n",
+    "#       0,   1,   2,   3,   4,   5,   6,   7,   8,  9, 10, 11\n",
+    "# EG: Exx, Exy, Exz, Eyx, Eyy, Eyz, Ezx, Ezy, Ezz, Gx, Gy, Gz\n",
+    "# BF: Bxx, Bxy, Bxz, Byx, Byy, Byz, Bzx, Bzy, Bzz, Fx, Fy, Fz\n",
+    "\n",
+    "# Select update equation (left hand side)\n",
+    "X_new = BF_new[0]\n",
+    "\n",
+    "# Extract individual terms (right hand side)\n",
+    "for i in range(DD):\n",
+    "    X = EG[i]\n",
+    "    C = X_new.coeff(X, 1).simplify()\n",
+    "    print(r'Coefficient multiplying ' + str(X))\n",
+    "    display(C)\n",
+    "    #print(ccode(Assignment(sp.symbols(r'LHS'), C)))\n",
+    "for i in range(DD):\n",
+    "    X = BF[i]\n",
+    "    C = X_new.coeff(X, 1).simplify()\n",
+    "    print(r'Coefficient multiplying ' + str(X))\n",
+    "    display(C)\n",
+    "    #print(ccode(Assignment(sp.symbols(r'LHS'), C)))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

From 9dc9d2a8a62e5139485be4d174c34beeeb2eb4d9 Mon Sep 17 00:00:00 2001
From: Axel Huebl <axel.huebl@plasma.ninja>
Date: Wed, 4 Oct 2023 14:31:15 -0700
Subject: [PATCH 03/12] CI: Unbreak macOS (libomp) (#4341)

Something changed in the base image again.
---
 .github/workflows/macos.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml
index 2fd979feff9..c81ee598114 100644
--- a/.github/workflows/macos.yml
+++ b/.github/workflows/macos.yml
@@ -45,7 +45,7 @@ jobs:
         brew install ccache
         brew install fftw
         brew install libomp
-        brew link --force libomp
+        brew link --overwrite --force libomp
         brew install ninja
         brew install open-mpi
         brew install pkg-config

From 01f90d89973f4c8f777333b50a716d6e5c746628 Mon Sep 17 00:00:00 2001
From: Remi Lehe <remi.lehe@normalesup.org>
Date: Wed, 4 Oct 2023 14:31:37 -0700
Subject: [PATCH 04/12] Close #4331 segfault when activating nuclearfusion
 collisions (#4338)

* Raise proper error message when specifying product species

* Fix case where 0 particles should be created

* Clang-Tidy: Disable `modernize-return-braced-init-list`

Buggy transform for non-init-list constructors.

---------

Co-authored-by: Axel Huebl <axel.huebl@plasma.ninja>
---
 .clang-tidy                                                   | 2 +-
 Source/Particles/Collision/BinaryCollision/BinaryCollision.H  | 4 ++++
 .../Collision/BinaryCollision/ParticleCreationFunc.H          | 2 +-
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/.clang-tidy b/.clang-tidy
index 42f4fc36065..f40dd987add 100644
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -33,10 +33,10 @@ Checks: '-*,
     modernize-replace-auto-ptr,
     modernize-replace-disallow-copy-and-assign-macro,
     modernize-replace-random-shuffle,
-    modernize-return-braced-init-list,
     modernize-shrink-to-fit,
     modernize-unary-static-assert,
     modernize-use-nullptr,
+        -modernize-return-braced-init-list,
     mpi-*,
     performance-faster-string-find,
     performance-for-range-copy,
diff --git a/Source/Particles/Collision/BinaryCollision/BinaryCollision.H b/Source/Particles/Collision/BinaryCollision/BinaryCollision.H
index 5473f0093f0..9cf2f91b5aa 100644
--- a/Source/Particles/Collision/BinaryCollision/BinaryCollision.H
+++ b/Source/Particles/Collision/BinaryCollision/BinaryCollision.H
@@ -97,6 +97,10 @@ public:
         const amrex::ParmParse pp_collision_name(collision_name);
         pp_collision_name.queryarr("product_species", m_product_species);
         m_have_product_species = !m_product_species.empty();
+        if ((std::is_same<CopyTransformFunctorType, NoParticleCreationFunc>::value) & (m_have_product_species)) {
+            WARPX_ABORT_WITH_MESSAGE( "Binary collision " + collision_name +
+                " does not produce species. Thus, `product_species` should not be specified in the input script." );
+        }
         m_copy_transform_functor = CopyTransformFunctorType(collision_name, mypc);
     }
 
diff --git a/Source/Particles/Collision/BinaryCollision/ParticleCreationFunc.H b/Source/Particles/Collision/BinaryCollision/ParticleCreationFunc.H
index 2b43f76e89f..7fdf04de6ca 100644
--- a/Source/Particles/Collision/BinaryCollision/ParticleCreationFunc.H
+++ b/Source/Particles/Collision/BinaryCollision/ParticleCreationFunc.H
@@ -115,7 +115,7 @@ public:
     {
         using namespace amrex::literals;
 
-        if (n_total_pairs == 0) return {m_num_product_species, 0};
+        if (n_total_pairs == 0) return amrex::Vector<int>(m_num_product_species, 0);
 
         // Compute offset array and allocate memory for the produced species
         amrex::Gpu::DeviceVector<index_type> offsets(n_total_pairs);

From 1ad0dbc9166dd204a2a40c932fa8e366bd0eab67 Mon Sep 17 00:00:00 2001
From: Edoardo Zoni <59625522+EZoni@users.noreply.github.com>
Date: Wed, 4 Oct 2023 18:39:51 -0700
Subject: [PATCH 05/12] Momentum-conserving gather for MR ratio higher than 2
 (#1650)

* Momentum-Conserving Interpolation for Arbitrary Mesh Refinement

* Implement 1D Case, Cleaning

* Add Zero Padding

* Add Langmuir Test (FDTD)

* Fix CI test

* Abort when using PSATD + MR + momentum-conserving

* Apply suggestions from code review

* Improve docstring
---
 ...gmuir_multi_2d_MR_momentum_conserving.json |  40 +
 Regression/WarpX-tests.ini                    |  19 +
 Source/Parallelization/WarpXComm.cpp          |  32 +-
 Source/Parallelization/WarpXComm_K.H          | 681 +++++-------------
 Source/WarpX.cpp                              |  12 +
 5 files changed, 273 insertions(+), 511 deletions(-)
 create mode 100644 Regression/Checksum/benchmarks_json/Langmuir_multi_2d_MR_momentum_conserving.json

diff --git a/Regression/Checksum/benchmarks_json/Langmuir_multi_2d_MR_momentum_conserving.json b/Regression/Checksum/benchmarks_json/Langmuir_multi_2d_MR_momentum_conserving.json
new file mode 100644
index 00000000000..72c11245e82
--- /dev/null
+++ b/Regression/Checksum/benchmarks_json/Langmuir_multi_2d_MR_momentum_conserving.json
@@ -0,0 +1,40 @@
+{
+  "electrons": {
+    "particle_momentum_x": 4.2339125386796835e-20,
+    "particle_momentum_y": 0.0,
+    "particle_momentum_z": 4.2339186493637336e-20,
+    "particle_position_x": 0.6553603030161138,
+    "particle_position_y": 0.6553603026454923,
+    "particle_weight": 3200000000000000.5
+  },
+  "lev=0": {
+    "Bx": 0.0,
+    "By": 44.73189518299763,
+    "Bz": 0.0,
+    "Ex": 7583674122515.894,
+    "Ey": 0.0,
+    "Ez": 7583672042436.007,
+    "jx": 7283798122345162.0,
+    "jy": 0.0,
+    "jz": 7283804678733656.0
+  },
+  "lev=1": {
+    "Bx": 0.0,
+    "By": 467.8271651312273,
+    "Bz": 0.0,
+    "Ex": 7609732451490.526,
+    "Ey": 0.0,
+    "Ez": 7604891944279.171,
+    "jx": 7103859890231833.0,
+    "jy": 0.0,
+    "jz": 7099873215674540.0
+  },
+  "positrons": {
+    "particle_momentum_x": 4.233907500710132e-20,
+    "particle_momentum_y": 0.0,
+    "particle_momentum_z": 4.2339135644675305e-20,
+    "particle_position_x": 0.6553596934061604,
+    "particle_position_y": 0.6553596937869579,
+    "particle_weight": 3200000000000000.5
+  }
+}
\ No newline at end of file
diff --git a/Regression/WarpX-tests.ini b/Regression/WarpX-tests.ini
index e4c502d858c..9905f77f34a 100644
--- a/Regression/WarpX-tests.ini
+++ b/Regression/WarpX-tests.ini
@@ -1225,6 +1225,25 @@ particleTypes = electrons positrons
 analysisRoutine = Examples/Tests/langmuir/analysis_2d.py
 analysisOutputImage = Langmuir_multi_2d_MR.png
 
+[Langmuir_multi_2d_MR_momentum_conserving]
+buildDir = .
+inputFile = Examples/Tests/langmuir/inputs_2d
+runtime_params = algo.maxwell_solver=ckc warpx.use_filter=1 amr.max_level=1 amr.ref_ratio=4 warpx.fine_tag_lo=-10.e-6 -10.e-6 warpx.fine_tag_hi=10.e-6 10.e-6 algo.field_gathering=momentum-conserving diag1.electrons.variables=w ux uy uz diag1.positrons.variables=w ux uy uz
+dim = 2
+addToCompileString =
+cmakeSetupOpts = -DWarpX_DIMS=2
+restartTest = 0
+useMPI = 1
+numprocs = 2
+useOMP = 1
+numthreads = 1
+compileTest = 0
+doVis = 0
+compareParticles = 1
+particleTypes = electrons positrons
+analysisRoutine = Examples/Tests/langmuir/analysis_2d.py
+analysisOutputImage = Langmuir_multi_2d_MR_momentum_conserving.png
+
 [Langmuir_multi_2d_MR_psatd]
 buildDir = .
 inputFile = Examples/Tests/langmuir/inputs_2d
diff --git a/Source/Parallelization/WarpXComm.cpp b/Source/Parallelization/WarpXComm.cpp
index 8c87cc9c772..e5ede95272c 100644
--- a/Source/Parallelization/WarpXComm.cpp
+++ b/Source/Parallelization/WarpXComm.cpp
@@ -181,6 +181,16 @@ WarpX::UpdateAuxilaryDataStagToNodal ()
                                                             ng_src, ng, WarpX::do_single_precision_comms, cperiod);
             }
 
+            const amrex::IntVect& refinement_ratio = refRatio(lev-1);
+
+            const amrex::IntVect& Bx_fp_stag = Bfield_fp[lev][0]->ixType().toIntVect();
+            const amrex::IntVect& By_fp_stag = Bfield_fp[lev][1]->ixType().toIntVect();
+            const amrex::IntVect& Bz_fp_stag = Bfield_fp[lev][2]->ixType().toIntVect();
+
+            const amrex::IntVect& Bx_cp_stag = Bfield_cp[lev][0]->ixType().toIntVect();
+            const amrex::IntVect& By_cp_stag = Bfield_cp[lev][1]->ixType().toIntVect();
+            const amrex::IntVect& Bz_cp_stag = Bfield_cp[lev][2]->ixType().toIntVect();
+
 #ifdef AMREX_USE_OMP
 #pragma omp parallel if (Gpu::notInLaunchRegion())
 #endif
@@ -203,9 +213,9 @@ WarpX::UpdateAuxilaryDataStagToNodal ()
                 amrex::ParallelFor(bx,
                 [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept
                 {
-                    warpx_interp_nd_bfield_x(j,k,l, bx_aux, bx_fp, bx_cp, bx_c);
-                    warpx_interp_nd_bfield_y(j,k,l, by_aux, by_fp, by_cp, by_c);
-                    warpx_interp_nd_bfield_z(j,k,l, bz_aux, bz_fp, bz_cp, bz_c);
+                    warpx_interp(j, k, l, bx_aux, bx_fp, bx_cp, bx_c, Bx_fp_stag, Bx_cp_stag, refinement_ratio);
+                    warpx_interp(j, k, l, by_aux, by_fp, by_cp, by_c, By_fp_stag, By_cp_stag, refinement_ratio);
+                    warpx_interp(j, k, l, bz_aux, bz_fp, bz_cp, bz_c, Bz_fp_stag, Bz_cp_stag, refinement_ratio);
                 });
             }
         }
@@ -239,6 +249,16 @@ WarpX::UpdateAuxilaryDataStagToNodal ()
                                                             ng_src, ng, WarpX::do_single_precision_comms, cperiod);
             }
 
+            const amrex::IntVect& refinement_ratio = refRatio(lev-1);
+
+            const amrex::IntVect& Ex_fp_stag = Efield_fp[lev][0]->ixType().toIntVect();
+            const amrex::IntVect& Ey_fp_stag = Efield_fp[lev][1]->ixType().toIntVect();
+            const amrex::IntVect& Ez_fp_stag = Efield_fp[lev][2]->ixType().toIntVect();
+
+            const amrex::IntVect& Ex_cp_stag = Efield_cp[lev][0]->ixType().toIntVect();
+            const amrex::IntVect& Ey_cp_stag = Efield_cp[lev][1]->ixType().toIntVect();
+            const amrex::IntVect& Ez_cp_stag = Efield_cp[lev][2]->ixType().toIntVect();
+
 #ifdef AMREX_USE_OMP
 #pragma omp parallel if (Gpu::notInLaunchRegion())
 #endif
@@ -261,9 +281,9 @@ WarpX::UpdateAuxilaryDataStagToNodal ()
                 amrex::ParallelFor(bx,
                 [=] AMREX_GPU_DEVICE (int j, int k, int l) noexcept
                 {
-                    warpx_interp_nd_efield_x(j,k,l, ex_aux, ex_fp, ex_cp, ex_c);
-                    warpx_interp_nd_efield_y(j,k,l, ey_aux, ey_fp, ey_cp, ey_c);
-                    warpx_interp_nd_efield_z(j,k,l, ez_aux, ez_fp, ez_cp, ez_c);
+                    warpx_interp(j, k, l, ex_aux, ex_fp, ex_cp, ex_c, Ex_fp_stag, Ex_cp_stag, refinement_ratio);
+                    warpx_interp(j, k, l, ey_aux, ey_fp, ey_cp, ey_c, Ey_fp_stag, Ey_cp_stag, refinement_ratio);
+                    warpx_interp(j, k, l, ez_aux, ez_fp, ez_cp, ez_c, Ez_fp_stag, Ez_cp_stag, refinement_ratio);
                 });
             }
         }
diff --git a/Source/Parallelization/WarpXComm_K.H b/Source/Parallelization/WarpXComm_K.H
index b97773af251..4cec33bc84f 100644
--- a/Source/Parallelization/WarpXComm_K.H
+++ b/Source/Parallelization/WarpXComm_K.H
@@ -10,6 +10,20 @@
 #include <AMReX.H>
 #include <AMReX_FArrayBox.H>
 
+/**
+ * \brief Interpolation function called within WarpX::UpdateAuxilaryDataSameType
+ * to interpolate data from the coarse and fine grids to the fine aux grid,
+ * assuming that all grids have the same staggering (either collocated or staggered).
+ *
+ * \param[in] j index along x of the output array
+ * \param[in] k index along y (in 3D) or z (in 2D) of the output array
+ * \param[in] l index along z (in 3D, l=0 in 2D) of the output array
+ * \param[in,out] arr_aux output array where interpolated values are stored
+ * \param[in] arr_fine input fine-patch array storing the values to interpolate
+ * \param[in] arr_coarse input coarse-patch array storing the values to interpolate
+ * \param[in] arr_stag IndexType of the arrays
+ * \param[in] rr mesh refinement ratios along each direction
+ */
 AMREX_GPU_DEVICE AMREX_FORCE_INLINE
 void warpx_interp (int j, int k, int l,
                    amrex::Array4<amrex::Real      > const& arr_aux,
@@ -68,561 +82,218 @@ void warpx_interp (int j, int k, int l,
     arr_aux(j,k,l) = arr_fine(j,k,l) + res;
 }
 
+/**
+ * \brief Interpolation function called within WarpX::UpdateAuxilaryDataStagToNodal
+ * to interpolate data from the coarse and fine grids to the fine aux grid,
+ * with momentum-conserving field gathering, hence between grids with different staggering,
+ * and assuming that the aux grid is collocated.
+ *
+ * \param[in] j index along x of the output array
+ * \param[in] k index along y (in 3D) or z (in 2D) of the output array
+ * \param[in] l index along z (in 3D, l=0 in 2D) of the output array
+ * \param[in,out] arr_aux output array where interpolated values are stored
+ * \param[in] arr_fine input fine-patch array storing the values to interpolate
+ * \param[in] arr_coarse input coarse-patch array storing the values to interpolate
+ * \param[in] arr_fine_stag IndexType of the fine-patch arrays
+ * \param[in] arr_coarse_stag IndexType of the coarse-patch arrays
+ * \param[in] rr mesh refinement ratios along each direction
+ */
 AMREX_GPU_DEVICE AMREX_FORCE_INLINE
-void warpx_interp_nd_bfield_x (int j, int k, int l,
-                               amrex::Array4<amrex::Real> const& Bxa,
-                               amrex::Array4<amrex::Real const> const& Bxf,
-                               amrex::Array4<amrex::Real const> const& Bxc,
-                               amrex::Array4<amrex::Real const> const& Bxg)
+void warpx_interp (int j, int k, int l,
+                   amrex::Array4<amrex::Real      > const& arr_aux,
+                   amrex::Array4<amrex::Real const> const& arr_fine,
+                   amrex::Array4<amrex::Real const> const& arr_coarse,
+                   amrex::Array4<amrex::Real const> const& arr_tmp,
+                   const amrex::IntVect& arr_fine_stag,
+                   const amrex::IntVect& arr_coarse_stag,
+                   const amrex::IntVect& rr)
 {
     using namespace amrex;
 
-    // Pad Bxf with zeros beyond ghost cells for out-of-bound accesses
-    const auto Bxf_zeropad = [Bxf] (const int jj, const int kk, const int ll) noexcept
+    // Pad input arrays with zeros beyond ghost cells
+    // for out-of-bound accesses due to large-stencil operations
+    const auto arr_fine_zeropad = [arr_fine] (const int jj, const int kk, const int ll) noexcept
     {
-        return Bxf.contains(jj,kk,ll) ? Bxf(jj,kk,ll) : 0.0_rt;
+        return arr_fine.contains(jj,kk,ll) ? arr_fine(jj,kk,ll) : 0.0_rt;
+    };
+    const auto arr_coarse_zeropad = [arr_coarse] (const int jj, const int kk, const int ll) noexcept
+    {
+        return arr_coarse.contains(jj,kk,ll) ? arr_coarse(jj,kk,ll) : 0.0_rt;
+    };
+    const auto arr_tmp_zeropad = [arr_tmp] (const int jj, const int kk, const int ll) noexcept
+    {
+        return arr_tmp.contains(jj,kk,ll) ? arr_tmp(jj,kk,ll) : 0.0_rt;
     };
 
-    const int jg = amrex::coarsen(j,2);
-    const Real wx = (j == jg*2) ? 0.0_rt : 0.5_rt;
-    const Real owx = 1.0_rt-wx;
-
-    const int kg = amrex::coarsen(k,2);
-    Real wy = 0.0_rt;
-    Real owy = 0.0_rt;
-    wy = (k == kg*2) ? 0.0_rt : 0.5_rt;
-    owy = 1.0_rt-wy;
+    // NOTE Indices (j,k,l) in the following refer to:
+    // - (z,-,-) in 1D
+    // - (x,z,-) in 2D
+    // - (r,z,-) in RZ
+    // - (x,y,z) in 3D
 
+    // Refinement ratio
+    const int rj = rr[0];
 #if defined(WARPX_DIM_1D_Z)
-
-    // interp from coarse nodal to fine nodal
-    const Real bg = owx * Bxg(jg  ,0,0)
-        +      wx * Bxg(jg+1,0,0);
-
-    // interp from coarse staggered to fine nodal
-    const Real bc = owx * Bxc(jg  ,0,0)
-        +      wx * Bxc(jg+1,0,0);
-
-    // interp from fine staggered to fine nodal
-    const Real bf = 0.5_rt*(Bxf_zeropad(j-1,0,0) + Bxf_zeropad(j,0,0));
-    amrex::ignore_unused(owy);
-
+    constexpr int rk = 1;
+    constexpr int rl = 1;
 #elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
-
-    // interp from coarse nodal to fine nodal
-    const Real bg = owx * owy * Bxg(jg  ,kg  ,0)
-        +     owx *  wy * Bxg(jg  ,kg+1,0)
-        +      wx * owy * Bxg(jg+1,kg  ,0)
-        +      wx *  wy * Bxg(jg+1,kg+1,0);
-
-    // interp from coarse staggered to fine nodal
-    wy = 0.5_rt-wy;  owy = 1.0_rt-wy;
-    const Real bc = owx * owy * Bxc(jg  ,kg  ,0)
-        +     owx *  wy * Bxc(jg  ,kg-1,0)
-        +      wx * owy * Bxc(jg+1,kg  ,0)
-        +      wx *  wy * Bxc(jg+1,kg-1,0);
-
-    // interp from fine staggered to fine nodal
-    const Real bf = 0.5_rt*(Bxf_zeropad(j,k-1,0) + Bxf_zeropad(j,k,0));
-
+    const     int rk = rr[1];
+    constexpr int rl = 1;
 #else
-
-    const int lg = amrex::coarsen(l,2);
-    Real wz = (l == lg*2) ? 0.0_rt : 0.5_rt;
-    Real owz = 1.0_rt-wz;
-
-    // interp from coarse nodal to fine nodal
-    const Real bg = owx * owy * owz * Bxg(jg  ,kg  ,lg  )
-        +      wx * owy * owz * Bxg(jg+1,kg  ,lg  )
-        +     owx *  wy * owz * Bxg(jg  ,kg+1,lg  )
-        +      wx *  wy * owz * Bxg(jg+1,kg+1,lg  )
-        +     owx * owy *  wz * Bxg(jg  ,kg  ,lg+1)
-        +      wx * owy *  wz * Bxg(jg+1,kg  ,lg+1)
-        +     owx *  wy *  wz * Bxg(jg  ,kg+1,lg+1)
-        +      wx *  wy *  wz * Bxg(jg+1,kg+1,lg+1);
-
-    // interp from coarse staggered to fine nodal
-    wy = 0.5_rt-wy;  owy = 1.0_rt-wy;
-    wz = 0.5_rt-wz;  owz = 1.0_rt-wz;
-    const Real bc = owx * owy * owz * Bxc(jg  ,kg  ,lg  )
-        +      wx * owy * owz * Bxc(jg+1,kg  ,lg  )
-        +     owx *  wy * owz * Bxc(jg  ,kg-1,lg  )
-        +      wx *  wy * owz * Bxc(jg+1,kg-1,lg  )
-        +     owx * owy *  wz * Bxc(jg  ,kg  ,lg-1)
-        +      wx * owy *  wz * Bxc(jg+1,kg  ,lg-1)
-        +     owx *  wy *  wz * Bxc(jg  ,kg-1,lg-1)
-        +      wx *  wy *  wz * Bxc(jg+1,kg-1,lg-1);
-
-    // interp from fine stagged to fine nodal
-    const Real bf = 0.25_rt*(Bxf_zeropad(j,k-1,l-1) + Bxf_zeropad(j,k,l-1) + Bxf_zeropad(j,k-1,l) + Bxf_zeropad(j,k,l));
+    const int rk = rr[1];
+    const int rl = rr[2];
 #endif
 
-    Bxa(j,k,l) = bg + (bf-bc);
-}
-
-AMREX_GPU_DEVICE AMREX_FORCE_INLINE
-void warpx_interp_nd_bfield_y (int j, int k, int l,
-                               amrex::Array4<amrex::Real> const& Bya,
-                               amrex::Array4<amrex::Real const> const& Byf,
-                               amrex::Array4<amrex::Real const> const& Byc,
-                               amrex::Array4<amrex::Real const> const& Byg)
-{
-    using namespace amrex;
-
-    // Pad Byf with zeros beyond ghost cells for out-of-bound accesses
-    const auto Byf_zeropad = [Byf] (const int jj, const int kk, const int ll) noexcept
-    {
-        return Byf.contains(jj,kk,ll) ? Byf(jj,kk,ll) : 0.0_rt;
-    };
-
-    const int jg = amrex::coarsen(j,2);
-    Real wx, owx;
-    wx = (j == jg*2) ? 0.0_rt : 0.5_rt;
-    owx = 1.0_rt-wx;
-
-    const int kg = amrex::coarsen(k,2);
-    Real wy = 0.0_rt;
-    Real owy = 0.0_rt;
-    wy = (k == kg*2) ? 0.0_rt : 0.5_rt;
-    owy = 1.0_rt-wy;
+    // Staggering of fine array (0: cell-centered; 1: nodal)
+    const int sj_fp = arr_fine_stag[0];
+#if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
+    const int sk_fp = arr_fine_stag[1];
+#elif defined(WARPX_DIM_3D)
+    const int sk_fp = arr_fine_stag[1];
+    const int sl_fp = arr_fine_stag[2];
+#endif
 
+    // Staggering of coarse array (0: cell-centered; 1: nodal)
+    const int sj_cp = arr_coarse_stag[0];
 #if defined(WARPX_DIM_1D_Z)
-
-    // interp from coarse nodal to fine nodal
-    const Real bg = owx * Byg(jg  ,0,0)
-        +      wx * Byg(jg+1,0,0);
-
-    // interp from coarse staggered to fine nodal
-    const Real bc = owx * Byc(jg  ,0,0)
-        +      wx * Byc(jg+1,0,0);
-
-    // interp from fine staggered to fine nodal
-    const Real bf = 0.5_rt*(Byf_zeropad(j-1,0,0) + Byf_zeropad(j,0,0));
-    amrex::ignore_unused(owy);
-
+    constexpr int sk_cp = 0;
+    constexpr int sl_cp = 0;
 #elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
-
-    // interp from coarse nodal to fine nodal
-    const Real bg = owx * owy * Byg(jg  ,kg  ,0)
-        +     owx *  wy * Byg(jg  ,kg+1,0)
-        +      wx * owy * Byg(jg+1,kg  ,0)
-        +      wx *  wy * Byg(jg+1,kg+1,0);
-
-    // interp from coarse stagged (cell-centered for By) to fine nodal
-    wx = 0.5_rt-wx;  owx = 1.0_rt-wx;
-    wy = 0.5_rt-wy;  owy = 1.0_rt-wy;
-    const Real bc = owx * owy * Byc(jg  ,kg  ,0)
-        +     owx *  wy * Byc(jg  ,kg-1,0)
-        +      wx * owy * Byc(jg-1,kg  ,0)
-        +      wx *  wy * Byc(jg-1,kg-1,0);
-
-    // interp form fine stagger (cell-centered for By) to fine nodal
-    const Real bf = 0.25_rt*(Byf_zeropad(j,k,0) + Byf_zeropad(j-1,k,0) + Byf_zeropad(j,k-1,0) + Byf_zeropad(j-1,k-1,0));
-
+    const     int sk_cp = arr_coarse_stag[1];
+    constexpr int sl_cp = 0;
 #else
-
-    const int lg = amrex::coarsen(l,2);
-    Real wz = (l == lg*2) ? 0.0_rt : 0.5_rt;
-    Real owz = 1.0_rt-wz;
-
-    // interp from coarse nodal to fine nodal
-    const Real bg = owx * owy * owz * Byg(jg  ,kg  ,lg  )
-        +      wx * owy * owz * Byg(jg+1,kg  ,lg  )
-        +     owx *  wy * owz * Byg(jg  ,kg+1,lg  )
-        +      wx *  wy * owz * Byg(jg+1,kg+1,lg  )
-        +     owx * owy *  wz * Byg(jg  ,kg  ,lg+1)
-        +      wx * owy *  wz * Byg(jg+1,kg  ,lg+1)
-        +     owx *  wy *  wz * Byg(jg  ,kg+1,lg+1)
-        +      wx *  wy *  wz * Byg(jg+1,kg+1,lg+1);
-
-    // interp from coarse staggered to fine nodal
-    wx = 0.5_rt-wx;  owx = 1.0_rt-wx;
-    wz = 0.5_rt-wz;  owz = 1.0_rt-wz;
-    const Real bc = owx * owy * owz * Byc(jg  ,kg  ,lg  )
-        +      wx * owy * owz * Byc(jg-1,kg  ,lg  )
-        +     owx *  wy * owz * Byc(jg  ,kg+1,lg  )
-        +      wx *  wy * owz * Byc(jg-1,kg+1,lg  )
-        +     owx * owy *  wz * Byc(jg  ,kg  ,lg-1)
-        +      wx * owy *  wz * Byc(jg-1,kg  ,lg-1)
-        +     owx *  wy *  wz * Byc(jg  ,kg+1,lg-1)
-        +      wx *  wy *  wz * Byc(jg-1,kg+1,lg-1);
-
-    // interp from fine stagged to fine nodal
-    const Real bf = 0.25_rt*(Byf_zeropad(j-1,k,l-1) + Byf_zeropad(j,k,l-1) + Byf_zeropad(j-1,k,l) + Byf_zeropad(j,k,l));
-
+    const int sk_cp = arr_coarse_stag[1];
+    const int sl_cp = arr_coarse_stag[2];
 #endif
 
-    Bya(j,k,l) = bg + (bf-bc);
-}
+    // Number of points used for interpolation from coarse grid to fine grid
+    int nj;
+    int nk;
+    int nl;
 
-AMREX_GPU_DEVICE AMREX_FORCE_INLINE
-void warpx_interp_nd_bfield_z (int j, int k, int l,
-                               amrex::Array4<amrex::Real> const& Bza,
-                               amrex::Array4<amrex::Real const> const& Bzf,
-                               amrex::Array4<amrex::Real const> const& Bzc,
-                               amrex::Array4<amrex::Real const> const& Bzg)
-{
-    using namespace amrex;
+    int jc = amrex::coarsen(j, rj);
+    int kc = amrex::coarsen(k, rk);
+    int lc = amrex::coarsen(l, rl);
 
-    // Pad Bzf with zeros beyond ghost cells for out-of-bound accesses
-    const auto Bzf_zeropad = [Bzf] (const int jj, const int kk, const int ll) noexcept
-    {
-        return Bzf.contains(jj,kk,ll) ? Bzf(jj,kk,ll) : 0.0_rt;
-    };
+    amrex::Real tmp = 0.0_rt;
+    amrex::Real fine = 0.0_rt;
+    amrex::Real coarse = 0.0_rt;
 
-    const int jg = amrex::coarsen(j,2);
-    Real wx, owx;
-    wx = (j == jg*2) ? 0.0_rt : 0.5_rt;
-    owx = 1.0_rt-wx;
+    amrex::Real wj;
+    amrex::Real wk;
+    amrex::Real wl;
 
-    const int kg = amrex::coarsen(k,2);
-    Real wy = 0.0_rt;
-    Real owy = 0.0_rt;
-    wy = (k == kg*2) ? 0.0_rt : 0.5_rt;
-    owy = 1.0_rt-wy;
+    // 1) Interpolation from coarse nodal to fine nodal
 
+    nj = 2;
 #if defined(WARPX_DIM_1D_Z)
-
-    // interp from coarse nodal to fine nodal
-    const Real bg = owx * Bzg(jg  ,0,0)
-        +      wx * Bzg(jg+1,0,0);
-
-    // interp from coarse staggered to fine nodal
-    const Real bc = owx * Bzc(jg  ,0,0)
-        +      wx * Bzc(jg+1,0,0);
-
-    // interp from fine staggered to fine nodal
-    const Real bf =  Bzf_zeropad(j,0,0);
-    amrex::ignore_unused(owy);
-
+    nk = 1;
+    nl = 1;
 #elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
-
-    // interp from coarse nodal to fine nodal
-    const Real bg = owx * owy * Bzg(jg  ,kg  ,0)
-        +     owx *  wy * Bzg(jg  ,kg+1,0)
-        +      wx * owy * Bzg(jg+1,kg  ,0)
-        +      wx *  wy * Bzg(jg+1,kg+1,0);
-
-    // interp from coarse staggered to fine nodal
-    wx = 0.5_rt-wx;  owx = 1.0_rt-wx;
-    const Real bc = owx * owy * Bzc(jg  ,kg  ,0)
-        +     owx *  wy * Bzc(jg  ,kg+1,0)
-        +      wx * owy * Bzc(jg-1,kg  ,0)
-        +      wx *  wy * Bzc(jg-1,kg+1,0);
-
-    // interp from fine staggered to fine nodal
-    const Real bf = 0.5_rt*(Bzf_zeropad(j-1,k,0) + Bzf_zeropad(j,k,0));
-
+    nk = 2;
+    nl = 1;
 #else
-
-    const int lg = amrex::coarsen(l,2);
-    const Real wz = (l == lg*2) ? 0.0_rt : 0.5_rt;
-    const Real owz = 1.0_rt-wz;
-
-    // interp from coarse nodal to fine nodal
-    const Real bg = owx * owy * owz * Bzg(jg  ,kg  ,lg  )
-        +      wx * owy * owz * Bzg(jg+1,kg  ,lg  )
-        +     owx *  wy * owz * Bzg(jg  ,kg+1,lg  )
-        +      wx *  wy * owz * Bzg(jg+1,kg+1,lg  )
-        +     owx * owy *  wz * Bzg(jg  ,kg  ,lg+1)
-        +      wx * owy *  wz * Bzg(jg+1,kg  ,lg+1)
-        +     owx *  wy *  wz * Bzg(jg  ,kg+1,lg+1)
-        +      wx *  wy *  wz * Bzg(jg+1,kg+1,lg+1);
-
-    // interp from coarse staggered to fine nodal
-    wx = 0.5_rt-wx;  owx = 1.0_rt-wx;
-    wy = 0.5_rt-wy;  owy = 1.0_rt-wy;
-    const Real bc = owx * owy * owz * Bzc(jg  ,kg  ,lg  )
-        +      wx * owy * owz * Bzc(jg-1,kg  ,lg  )
-        +     owx *  wy * owz * Bzc(jg  ,kg-1,lg  )
-        +      wx *  wy * owz * Bzc(jg-1,kg-1,lg  )
-        +     owx * owy *  wz * Bzc(jg  ,kg  ,lg+1)
-        +      wx * owy *  wz * Bzc(jg-1,kg  ,lg+1)
-        +     owx *  wy *  wz * Bzc(jg  ,kg-1,lg+1)
-        +      wx *  wy *  wz * Bzc(jg-1,kg-1,lg+1);
-
-    // interp from fine stagged to fine nodal
-    const Real bf = 0.25_rt*(Bzf_zeropad(j-1,k-1,l) + Bzf_zeropad(j,k-1,l) + Bzf_zeropad(j-1,k,l) + Bzf_zeropad(j,k,l));
-
+    nk = 2;
+    nl = 2;
 #endif
 
-    Bza(j,k,l) = bg + (bf-bc);
-}
-
-AMREX_GPU_DEVICE AMREX_FORCE_INLINE
-void warpx_interp_nd_efield_x (int j, int k, int l,
-                               amrex::Array4<amrex::Real> const& Exa,
-                               amrex::Array4<amrex::Real const> const& Exf,
-                               amrex::Array4<amrex::Real const> const& Exc,
-                               amrex::Array4<amrex::Real const> const& Exg)
-{
-    using namespace amrex;
-
-    // Pad Exf with zeros beyond ghost cells for out-of-bound accesses
-    const auto Exf_zeropad = [Exf] (const int jj, const int kk, const int ll) noexcept
-    {
-        return Exf.contains(jj,kk,ll) ? Exf(jj,kk,ll) : 0.0_rt;
-    };
-
-    const int jg = amrex::coarsen(j,2);
-    Real wx, owx;
-    wx = (j == jg*2) ? 0.0_rt : 0.5_rt;
-    owx = 1.0_rt-wx;
+    wj = 1.0_rt;
+    wk = 1.0_rt;
+    wl = 1.0_rt;
+    for         (int jj = 0; jj < nj; jj++) {
+        for     (int kk = 0; kk < nk; kk++) {
+            for (int ll = 0; ll < nl; ll++) {
+                wj = (rj - amrex::Math::abs(j - (jc + jj) * rj)) / static_cast<amrex::Real>(rj);
+#if (AMREX_SPACEDIM >= 2)
+                wk = (rk - amrex::Math::abs(k - (kc + kk) * rk)) / static_cast<amrex::Real>(rk);
+#endif
+#if (AMREX_SPACEDIM == 3)
+                wl = (rl - amrex::Math::abs(l - (lc + ll) * rl)) / static_cast<amrex::Real>(rl);
+#endif
+                tmp += wj * wk * wl * arr_tmp_zeropad(jc+jj,kc+kk,lc+ll);
+            }
+        }
+    }
 
-    const int kg = amrex::coarsen(k,2);
-    Real wy = 0.0_rt;
-    Real owy =0.0_rt;
-    wy = (k == kg*2) ? 0.0_rt : 0.5_rt;
-    owy = 1.0_rt-wy;
+    // 2) Interpolation from coarse staggered to fine nodal
 
+    nj = 2;
 #if defined(WARPX_DIM_1D_Z)
-
-    // interp from coarse nodal to fine nodal
-    const Real eg = owx * Exg(jg  ,0,0)
-        +      wx * Exg(jg+1,0,0);
-
-    // interp from coarse staggered to fine nodal
-    const Real ec = owx * Exc(jg  ,0,0)
-        +      wx * Exc(jg+1,0,0);
-
-    // interp from fine staggered to fine nodal
-    const Real ef = Exf_zeropad(j,0,0);
-    amrex::ignore_unused(owy);
-
+    nk = 1;
+    nl = 1;
 #elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
-
-    // interp from coarse nodal to fine nodal
-    const Real eg = owx * owy * Exg(jg  ,kg  ,0)
-        +     owx *  wy * Exg(jg  ,kg+1,0)
-        +      wx * owy * Exg(jg+1,kg  ,0)
-        +      wx *  wy * Exg(jg+1,kg+1,0);
-
-    // interp from coarse staggered to fine nodal
-    wx = 0.5_rt-wx;  owx = 1.0_rt-wx;
-    const Real ec = owx * owy * Exc(jg  ,kg  ,0)
-        +     owx *  wy * Exc(jg  ,kg+1,0)
-        +      wx * owy * Exc(jg-1,kg  ,0)
-        +      wx *  wy * Exc(jg-1,kg+1,0);
-
-    // interp from fine staggered to fine nodal
-    const Real ef = 0.5_rt*(Exf_zeropad(j-1,k,0) + Exf_zeropad(j,k,0));
-
+    nk = 2;
+    nl = 1;
 #else
-
-    const int lg = amrex::coarsen(l,2);
-    const Real wz = (l == lg*2) ? 0.0 : 0.5;
-    const Real owz = 1.0_rt-wz;
-
-    // interp from coarse nodal to fine nodal
-    const Real eg = owx * owy * owz * Exg(jg  ,kg  ,lg  )
-        +      wx * owy * owz * Exg(jg+1,kg  ,lg  )
-        +     owx *  wy * owz * Exg(jg  ,kg+1,lg  )
-        +      wx *  wy * owz * Exg(jg+1,kg+1,lg  )
-        +     owx * owy *  wz * Exg(jg  ,kg  ,lg+1)
-        +      wx * owy *  wz * Exg(jg+1,kg  ,lg+1)
-        +     owx *  wy *  wz * Exg(jg  ,kg+1,lg+1)
-        +      wx *  wy *  wz * Exg(jg+1,kg+1,lg+1);
-
-    // interp from coarse staggered to fine nodal
-    wx = 0.5_rt-wx;  owx = 1.0_rt-wx;
-    const Real ec = owx * owy * owz * Exc(jg  ,kg  ,lg  )
-        +      wx * owy * owz * Exc(jg-1,kg  ,lg  )
-        +     owx *  wy * owz * Exc(jg  ,kg+1,lg  )
-        +      wx *  wy * owz * Exc(jg-1,kg+1,lg  )
-        +     owx * owy *  wz * Exc(jg  ,kg  ,lg+1)
-        +      wx * owy *  wz * Exc(jg-1,kg  ,lg+1)
-        +     owx *  wy *  wz * Exc(jg  ,kg+1,lg+1)
-        +      wx *  wy *  wz * Exc(jg-1,kg+1,lg+1);
-
-    // interp from fine staggered to fine nodal
-    const Real ef = 0.5_rt*(Exf_zeropad(j-1,k,l) + Exf_zeropad(j,k,l));
-
+    nk = 2;
+    nl = 2;
 #endif
 
-    Exa(j,k,l) = eg + (ef-ec);
-}
-
-AMREX_GPU_DEVICE AMREX_FORCE_INLINE
-void warpx_interp_nd_efield_y (int j, int k, int l,
-                               amrex::Array4<amrex::Real> const& Eya,
-                               amrex::Array4<amrex::Real const> const& Eyf,
-                               amrex::Array4<amrex::Real const> const& Eyc,
-                               amrex::Array4<amrex::Real const> const& Eyg)
-{
-    using namespace amrex;
+    const int jn = (sj_cp == 1) ? j : j - rj / 2;
+    const int kn = (sk_cp == 1) ? k : k - rk / 2;
+    const int ln = (sl_cp == 1) ? l : l - rl / 2;
 
-    // Pad Eyf with zeros beyond ghost cells for out-of-bound accesses
-    const auto Eyf_zeropad = [Eyf] (const int jj, const int kk, const int ll) noexcept
-    {
-        return Eyf.contains(jj,kk,ll) ? Eyf(jj,kk,ll) : 0.0_rt;
-    };
+    jc = amrex::coarsen(jn, rj);
+    kc = amrex::coarsen(kn, rk);
+    lc = amrex::coarsen(ln, rl);
 
-    const int jg = amrex::coarsen(j,2);
-    const Real wx = (j == jg*2) ? 0.0_rt : 0.5_rt;
-    const Real owx = 1.0_rt-wx;
+    wj = 1.0_rt;
+    wk = 1.0_rt;
+    wl = 1.0_rt;
+    for         (int jj = 0; jj < nj; jj++) {
+        for     (int kk = 0; kk < nk; kk++) {
+            for (int ll = 0; ll < nl; ll++) {
+                wj = (rj - amrex::Math::abs(jn - (jc + jj) * rj)) / static_cast<amrex::Real>(rj);
+#if (AMREX_SPACEDIM >= 2)
+                wk = (rk - amrex::Math::abs(kn - (kc + kk) * rk)) / static_cast<amrex::Real>(rk);
+#endif
+#if (AMREX_SPACEDIM == 3)
+                wl = (rl - amrex::Math::abs(ln - (lc + ll) * rl)) / static_cast<amrex::Real>(rl);
+#endif
+                coarse += wj * wk * wl * arr_coarse_zeropad(jc+jj,kc+kk,lc+ll);
+            }
+        }
+    }
 
-    const int kg = amrex::coarsen(k,2);
-    Real wy = 0.0_rt;
-    Real owy = 0.0_rt;
-    wy = (k == kg*2) ? 0.0_rt : 0.5_rt;
-    owy = 1.0_rt-wy;
+    // 3) Interpolation from fine staggered to fine nodal
 
+    nj = (sj_fp == 0) ? 2 : 1;
 #if defined(WARPX_DIM_1D_Z)
-
-    // interp from coarse nodal to fine nodal
-    const Real eg = owx * Eyg(jg  ,0,0)
-        +      wx * Eyg(jg+1,0,0);
-
-    // interp from coarse staggered to fine nodal
-    const Real ec = owx * Eyc(jg  ,0,0)
-        +      wx * Eyc(jg+1,0,0);
-
-    // interp from fine staggered to fine nodal
-    const Real ef = Eyf_zeropad(j,0,0);
-    amrex::ignore_unused(owy);
-
+    nk = 1;
+    nl = 1;
 #elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
-
-    // interp from coarse nodal to fine nodal
-    const Real eg = owx * owy * Eyg(jg  ,kg  ,0)
-        +     owx *  wy * Eyg(jg  ,kg+1,0)
-        +      wx * owy * Eyg(jg+1,kg  ,0)
-        +      wx *  wy * Eyg(jg+1,kg+1,0);
-
-    // interp from coarse staggered to fine nodal (Eyc is actually nodal)
-   const Real ec = owx * owy * Eyc(jg  ,kg  ,0)
-        +     owx *  wy * Eyc(jg  ,kg+1,0)
-        +      wx * owy * Eyc(jg+1,kg  ,0)
-        +      wx *  wy * Eyc(jg+1,kg+1,0);
-
-    // interp from fine staggered to fine nodal
-    const Real ef = Eyf_zeropad(j,k,0);
-
+    nk = (sk_fp == 0) ? 2 : 1;
+    nl = 1;
 #else
-
-    const int lg = amrex::coarsen(l,2);
-    const Real wz = (l == lg*2) ? 0.0 : 0.5;
-    const Real owz = 1.0_rt-wz;
-
-    // interp from coarse nodal to fine nodal
-    const Real eg = owx * owy * owz * Eyg(jg  ,kg  ,lg  )
-        +      wx * owy * owz * Eyg(jg+1,kg  ,lg  )
-        +     owx *  wy * owz * Eyg(jg  ,kg+1,lg  )
-        +      wx *  wy * owz * Eyg(jg+1,kg+1,lg  )
-        +     owx * owy *  wz * Eyg(jg  ,kg  ,lg+1)
-        +      wx * owy *  wz * Eyg(jg+1,kg  ,lg+1)
-        +     owx *  wy *  wz * Eyg(jg  ,kg+1,lg+1)
-        +      wx *  wy *  wz * Eyg(jg+1,kg+1,lg+1);
-
-    // interp from coarse staggered to fine nodal
-    wy = 0.5_rt-wy;  owy = 1.0_rt-wy;
-    const Real ec = owx * owy * owz * Eyc(jg  ,kg  ,lg  )
-        +      wx * owy * owz * Eyc(jg+1,kg  ,lg  )
-        +     owx *  wy * owz * Eyc(jg  ,kg-1,lg  )
-        +      wx *  wy * owz * Eyc(jg+1,kg-1,lg  )
-        +     owx * owy *  wz * Eyc(jg  ,kg  ,lg+1)
-        +      wx * owy *  wz * Eyc(jg+1,kg  ,lg+1)
-        +     owx *  wy *  wz * Eyc(jg  ,kg-1,lg+1)
-        +      wx *  wy *  wz * Eyc(jg+1,kg-1,lg+1);
-
-    // interp from fine staggered to fine nodal
-    const Real ef = 0.5_rt*(Eyf_zeropad(j,k-1,l) + Eyf_zeropad(j,k,l));
-
+    nk = (sk_fp == 0) ? 2 : 1;
+    nl = (sl_fp == 0) ? 2 : 1;
 #endif
 
-    Eya(j,k,l) = eg + (ef-ec);
-}
-
-AMREX_GPU_DEVICE AMREX_FORCE_INLINE
-void warpx_interp_nd_efield_z (int j, int k, int l,
-                               amrex::Array4<amrex::Real> const& Eza,
-                               amrex::Array4<amrex::Real const> const& Ezf,
-                               amrex::Array4<amrex::Real const> const& Ezc,
-                               amrex::Array4<amrex::Real const> const& Ezg)
-{
-    using namespace amrex;
-
-    // Pad Ezf with zeros beyond ghost cells for out-of-bound accesses
-    const auto Ezf_zeropad = [Ezf] (const int jj, const int kk, const int ll) noexcept
-    {
-        return Ezf.contains(jj,kk,ll) ? Ezf(jj,kk,ll) : 0.0_rt;
-    };
-
-    const int jg = amrex::coarsen(j,2);
-    const Real wx = (j == jg*2) ? 0.0_rt : 0.5_rt;
-    const Real owx = 1.0_rt-wx;
-
-    const int kg = amrex::coarsen(k,2);
-    Real wy = 0.0_rt;
-    Real owy = 0.0_rt;
-    wy = (k == kg*2) ? 0.0_rt : 0.5_rt;
-    owy = 1.0_rt-wy;
-
+    const int jm = (sj_fp == 0) ? j-1 : j;
 #if defined(WARPX_DIM_1D_Z)
-
-    // interp from coarse nodal to fine nodal
-    const Real eg = owx * Ezg(jg  ,0,0)
-        +      wx * Ezg(jg+1,0,0);
-
-    // interp from coarse staggered to fine nodal
-    const Real ec = owx * Ezc(jg  ,0,0)
-        +      wx * Ezc(jg+1,0,0);
-
-    // interp from fine staggered to fine nodal
-    const Real ef = 0.5_rt*(Ezf_zeropad(j-1,0,0) + Ezf_zeropad(j,0,0));
-    amrex::ignore_unused(owy);
-
+    const int km = k;
+    const int lm = l;
 #elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
-
-    // interp from coarse nodal to fine nodal
-    const Real eg = owx * owy * Ezg(jg  ,kg  ,0)
-        +     owx *  wy * Ezg(jg  ,kg+1,0)
-        +      wx * owy * Ezg(jg+1,kg  ,0)
-        +      wx *  wy * Ezg(jg+1,kg+1,0);
-
-    // interp from coarse stagged to fine nodal
-    wy = 0.5_rt-wy;  owy = 1.0_rt-wy;
-    const Real ec = owx * owy * Ezc(jg  ,kg  ,0)
-        +     owx *  wy * Ezc(jg  ,kg-1,0)
-        +      wx * owy * Ezc(jg+1,kg  ,0)
-        +      wx *  wy * Ezc(jg+1,kg-1,0);
-
-    // interp from fine staggered to fine nodal
-    const Real ef = 0.5_rt*(Ezf_zeropad(j,k-1,0) + Ezf_zeropad(j,k,0));
-
+    const int km = (sk_fp == 0) ? k-1 : k;
+    const int lm = l;
 #else
-
-    const int lg = amrex::coarsen(l,2);
-    Real wz = (l == lg*2) ? 0.0_rt : 0.5_rt;
-    Real owz = 1.0_rt-wz;
-
-    // interp from coarse nodal to fine nodal
-    const Real eg = owx * owy * owz * Ezg(jg  ,kg  ,lg  )
-        +      wx * owy * owz * Ezg(jg+1,kg  ,lg  )
-        +     owx *  wy * owz * Ezg(jg  ,kg+1,lg  )
-        +      wx *  wy * owz * Ezg(jg+1,kg+1,lg  )
-        +     owx * owy *  wz * Ezg(jg  ,kg  ,lg+1)
-        +      wx * owy *  wz * Ezg(jg+1,kg  ,lg+1)
-        +     owx *  wy *  wz * Ezg(jg  ,kg+1,lg+1)
-        +      wx *  wy *  wz * Ezg(jg+1,kg+1,lg+1);
-
-    // interp from coarse staggered to fine nodal
-    wz = 0.5_rt-wz;  owz = 1.0_rt-wz;
-    const Real ec = owx * owy * owz * Ezc(jg  ,kg  ,lg  )
-        +      wx * owy * owz * Ezc(jg+1,kg  ,lg  )
-        +     owx *  wy * owz * Ezc(jg  ,kg+1,lg  )
-        +      wx *  wy * owz * Ezc(jg+1,kg+1,lg  )
-        +     owx * owy *  wz * Ezc(jg  ,kg  ,lg-1)
-        +      wx * owy *  wz * Ezc(jg+1,kg  ,lg-1)
-        +     owx *  wy *  wz * Ezc(jg  ,kg+1,lg-1)
-        +      wx *  wy *  wz * Ezc(jg+1,kg+1,lg-1);
-
-    // interp from fine staggered to fine nodal
-    const Real ef = 0.5_rt*(Ezf_zeropad(j,k,l-1) + Ezf_zeropad(j,k,l));
-
+    const int km = (sk_fp == 0) ? k-1 : k;
+    const int lm = (sl_fp == 0) ? l-1 : l;
 #endif
 
-    Eza(j,k,l) = eg + (ef-ec);
+    for         (int jj = 0; jj < nj; jj++) {
+        for     (int kk = 0; kk < nk; kk++) {
+            for (int ll = 0; ll < nl; ll++) {
+                wj = 1.0_rt / static_cast<amrex::Real>(nj);
+                wk = 1.0_rt / static_cast<amrex::Real>(nk);
+                wl = 1.0_rt / static_cast<amrex::Real>(nl);
+                fine += wj * wk * wl * arr_fine_zeropad(jm+jj,km+kk,lm+ll);
+            }
+        }
+    }
+
+    // Final result
+    arr_aux(j,k,l) = tmp + (fine - coarse);
 }
 
 /**
diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp
index 348b3125f57..6be8d517775 100644
--- a/Source/WarpX.cpp
+++ b/Source/WarpX.cpp
@@ -1198,6 +1198,18 @@ WarpX::ReadParameters ()
         // Use same shape factors in all directions, for gathering
         if (field_gathering_algo == GatheringAlgo::MomentumConserving) galerkin_interpolation = false;
 
+        // With the PSATD solver, momentum-conserving field gathering
+        // combined with mesh refinement does not seem to work correctly
+        // TODO Needs debugging
+        if (electromagnetic_solver_id == ElectromagneticSolverAlgo::PSATD &&
+            field_gathering_algo == GatheringAlgo::MomentumConserving &&
+            maxLevel() > 0)
+        {
+            WARPX_ABORT_WITH_MESSAGE(
+                "With the PSATD solver, momentum-conserving field gathering"
+                " combined with mesh refinement is currently not implemented");
+        }
+
         em_solver_medium = GetAlgorithmInteger(pp_algo, "em_solver_medium");
         if (em_solver_medium == MediumForEM::Macroscopic ) {
             macroscopic_solver_algo = GetAlgorithmInteger(pp_algo,"macroscopic_sigma_method");

From cb094d1915fe5d6aaff38d9d60bdb645032b897e Mon Sep 17 00:00:00 2001
From: Axel Huebl <axel.huebl@plasma.ninja>
Date: Wed, 4 Oct 2023 19:49:31 -0700
Subject: [PATCH 06/12] Badge: Minor RTD Link Cleanup

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index ef0551405a0..87cf2b012c0 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
 
 [![Code Status development](https://dev.azure.com/ECP-WarpX/WarpX/_apis/build/status/ECP-WarpX.WarpX?branchName=development)](https://dev.azure.com/ECP-WarpX/WarpX/_build/latest?definitionId=1&branchName=development)
 [![Nightly Installation Tests](https://dev.azure.com/ECP-WarpX/WarpX/_apis/build/status/ECP-WarpX.Nightly?branchName=nightly&label=nightly%20packages)](https://dev.azure.com/ECP-WarpX/WarpX/_build?definitionId=2)
-[![Documentation Status](https://readthedocs.org/projects/warpx/badge/?version=latest)](https://warpx.readthedocs.io/en/latest/?badge=latest)
+[![Documentation Status](https://readthedocs.org/projects/warpx/badge/?version=latest)](https://warpx.readthedocs.io)
 [![Spack Version](https://img.shields.io/spack/v/warpx)](https://spack.readthedocs.io/en/latest/package_list.html#warpx)
 [![Conda Version](https://img.shields.io/conda/vn/conda-forge/warpx)](https://anaconda.org/conda-forge/warpx)
 [![Discussions](https://img.shields.io/badge/chat-discussions-turquoise.svg)](https://github.com/ECP-WarpX/WarpX/discussions)  

From 7905cb99004685ff71e6fb012e09422b322f0a95 Mon Sep 17 00:00:00 2001
From: Luca Fedeli <luca.fedeli@cea.fr>
Date: Thu, 5 Oct 2023 06:51:08 +0200
Subject: [PATCH 07/12] ensure that mass is specified (#4337)

---
 Source/Utils/SpeciesUtils.cpp | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/Source/Utils/SpeciesUtils.cpp b/Source/Utils/SpeciesUtils.cpp
index 2ac5879f9d1..bdba97f10b2 100644
--- a/Source/Utils/SpeciesUtils.cpp
+++ b/Source/Utils/SpeciesUtils.cpp
@@ -42,14 +42,6 @@ namespace SpeciesUtils {
         const bool mass_is_specified =
             utils::parser::queryWithParser(pp_species_name, "mass", mass);
 
-        if ( charge_is_specified && species_is_specified ){
-            ablastr::warn_manager::WMRecordWarning("Species",
-                "Both '" + species_name +  ".charge' and " +
-                    species_name + ".species_type' are specified.\n" +
-                    species_name + ".charge' will take precedence.\n");
-
-        }
-
         WARPX_ALWAYS_ASSERT_WITH_MESSAGE(
             charge_is_specified ||
             species_is_specified ||
@@ -58,6 +50,22 @@ namespace SpeciesUtils {
             species_name + "'."
         );
 
+        WARPX_ALWAYS_ASSERT_WITH_MESSAGE(
+            mass_is_specified ||
+            species_is_specified ||
+            (injection_style == "external_file"),
+            "Need to specify at least one of species_type or mass for species '" +
+            species_name + "'."
+        );
+
+        if ( charge_is_specified && species_is_specified ){
+            ablastr::warn_manager::WMRecordWarning("Species",
+                "Both '" + species_name +  ".charge' and " +
+                    species_name + ".species_type' are specified.\n" +
+                    species_name + ".charge' will take precedence.\n");
+
+        }
+
         if ( mass_is_specified && species_is_specified ){
             ablastr::warn_manager::WMRecordWarning("Species",
                 "Both '" + species_name +  ".mass' and " +

From 12414e432944fdd7024ce19455e0cec2060687f1 Mon Sep 17 00:00:00 2001
From: Axel Huebl <axel.huebl@plasma.ninja>
Date: Thu, 5 Oct 2023 07:41:23 -0700
Subject: [PATCH 08/12] Release 23.10 (#4344)

* AMReX: 23.10

* pyAMReX: 23.10

* WarpX: 23.10
---
 .github/workflows/cuda.yml       | 2 +-
 CMakeLists.txt                   | 2 +-
 Docs/source/conf.py              | 4 ++--
 Python/setup.py                  | 2 +-
 Regression/WarpX-GPU-tests.ini   | 2 +-
 Regression/WarpX-tests.ini       | 2 +-
 cmake/dependencies/AMReX.cmake   | 4 ++--
 cmake/dependencies/pyAMReX.cmake | 4 ++--
 run_test.sh                      | 2 +-
 setup.py                         | 2 +-
 10 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml
index 03a3044848f..1754549ab2d 100644
--- a/.github/workflows/cuda.yml
+++ b/.github/workflows/cuda.yml
@@ -111,7 +111,7 @@ jobs:
         which nvcc || echo "nvcc not in PATH!"
 
         git clone https://github.com/AMReX-Codes/amrex.git ../amrex
-        cd ../amrex && git checkout --detach 2e99628138df3b5b0ecf50b0c1201d5547f821a0 && cd -
+        cd ../amrex && git checkout --detach 23.10 && cd -
         make COMP=gcc QED=FALSE USE_MPI=TRUE USE_GPU=TRUE USE_OMP=FALSE USE_PSATD=TRUE USE_CCACHE=TRUE -j 2
 
   build_nvhpc21-11-nvcc:
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f9b1a8bbc2b..5d00093e893 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,7 +1,7 @@
 # Preamble ####################################################################
 #
 cmake_minimum_required(VERSION 3.20.0)
-project(WarpX VERSION 23.09)
+project(WarpX VERSION 23.10)
 
 include(${WarpX_SOURCE_DIR}/cmake/WarpXFunctions.cmake)
 
diff --git a/Docs/source/conf.py b/Docs/source/conf.py
index e2cdfe15cda..f9e06bcc8c3 100644
--- a/Docs/source/conf.py
+++ b/Docs/source/conf.py
@@ -79,9 +79,9 @@
 # built documents.
 #
 # The short X.Y version.
-version = u'23.09'
+version = u'23.10'
 # The full version, including alpha/beta/rc tags.
-release = u'23.09'
+release = u'23.10'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/Python/setup.py b/Python/setup.py
index 99f79cd1d18..568c3f83ddc 100644
--- a/Python/setup.py
+++ b/Python/setup.py
@@ -54,7 +54,7 @@
     package_data = {}
 
 setup(name = 'pywarpx',
-      version = '23.09',
+      version = '23.10',
       packages = ['pywarpx'],
       package_dir = {'pywarpx': 'pywarpx'},
       description = """Wrapper of WarpX""",
diff --git a/Regression/WarpX-GPU-tests.ini b/Regression/WarpX-GPU-tests.ini
index c6caa6b915c..1a1c1fc612c 100644
--- a/Regression/WarpX-GPU-tests.ini
+++ b/Regression/WarpX-GPU-tests.ini
@@ -60,7 +60,7 @@ emailBody = Check https://ccse.lbl.gov/pub/GpuRegressionTesting/WarpX/ for more
 
 [AMReX]
 dir = /home/regtester/git/amrex/
-branch = 2e99628138df3b5b0ecf50b0c1201d5547f821a0
+branch = 23.10
 
 [source]
 dir = /home/regtester/git/WarpX
diff --git a/Regression/WarpX-tests.ini b/Regression/WarpX-tests.ini
index 9905f77f34a..3eb8101addb 100644
--- a/Regression/WarpX-tests.ini
+++ b/Regression/WarpX-tests.ini
@@ -59,7 +59,7 @@ emailBody = Check https://ccse.lbl.gov/pub/RegressionTesting/WarpX/ for more det
 
 [AMReX]
 dir = /home/regtester/AMReX_RegTesting/amrex/
-branch = 2e99628138df3b5b0ecf50b0c1201d5547f821a0
+branch = 23.10
 
 [source]
 dir = /home/regtester/AMReX_RegTesting/warpx
diff --git a/cmake/dependencies/AMReX.cmake b/cmake/dependencies/AMReX.cmake
index 3e0f044f020..a4d1f0fed5c 100644
--- a/cmake/dependencies/AMReX.cmake
+++ b/cmake/dependencies/AMReX.cmake
@@ -243,7 +243,7 @@ macro(find_amrex)
         endif()
         set(COMPONENT_PRECISION ${WarpX_PRECISION} P${WarpX_PARTICLE_PRECISION})
 
-        find_package(AMReX 23.09 CONFIG REQUIRED COMPONENTS ${COMPONENT_ASCENT} ${COMPONENT_DIMS} ${COMPONENT_EB} PARTICLES ${COMPONENT_PIC} ${COMPONENT_PRECISION} ${COMPONENT_SENSEI} TINYP LSOLVERS)
+        find_package(AMReX 23.10 CONFIG REQUIRED COMPONENTS ${COMPONENT_ASCENT} ${COMPONENT_DIMS} ${COMPONENT_EB} PARTICLES ${COMPONENT_PIC} ${COMPONENT_PRECISION} ${COMPONENT_SENSEI} TINYP LSOLVERS)
         message(STATUS "AMReX: Found version '${AMReX_VERSION}'")
     endif()
 endmacro()
@@ -257,7 +257,7 @@ set(WarpX_amrex_src ""
 set(WarpX_amrex_repo "https://github.com/AMReX-Codes/amrex.git"
     CACHE STRING
     "Repository URI to pull and build AMReX from if(WarpX_amrex_internal)")
-set(WarpX_amrex_branch "2e99628138df3b5b0ecf50b0c1201d5547f821a0"
+set(WarpX_amrex_branch "23.10"
     CACHE STRING
     "Repository branch for WarpX_amrex_repo if(WarpX_amrex_internal)")
 
diff --git a/cmake/dependencies/pyAMReX.cmake b/cmake/dependencies/pyAMReX.cmake
index a36ef59d1f9..2ff354785da 100644
--- a/cmake/dependencies/pyAMReX.cmake
+++ b/cmake/dependencies/pyAMReX.cmake
@@ -64,7 +64,7 @@ function(find_pyamrex)
         endif()
     elseif(NOT WarpX_pyamrex_internal)
         # TODO: MPI control
-        find_package(pyAMReX 23.07 CONFIG REQUIRED)
+        find_package(pyAMReX 23.10 CONFIG REQUIRED)
         message(STATUS "pyAMReX: Found version '${pyamrex_VERSION}'")
     endif()
 endfunction()
@@ -79,7 +79,7 @@ option(WarpX_pyamrex_internal "Download & build pyAMReX" ON)
 set(WarpX_pyamrex_repo "https://github.com/AMReX-Codes/pyamrex.git"
     CACHE STRING
     "Repository URI to pull and build pyamrex from if(WarpX_pyamrex_internal)")
-set(WarpX_pyamrex_branch "development"
+set(WarpX_pyamrex_branch "23.10"
     CACHE STRING
     "Repository branch for WarpX_pyamrex_repo if(WarpX_pyamrex_internal)")
 
diff --git a/run_test.sh b/run_test.sh
index 81237fb6971..1c948e48713 100755
--- a/run_test.sh
+++ b/run_test.sh
@@ -71,7 +71,7 @@ python3 -m pip install --upgrade -r warpx/Regression/requirements.txt
 
 # Clone AMReX and warpx-data
 git clone https://github.com/AMReX-Codes/amrex.git
-cd amrex && git checkout --detach 2e99628138df3b5b0ecf50b0c1201d5547f821a0 && cd -
+cd amrex && git checkout --detach 23.10 && cd -
 # warpx-data contains various required data sets
 git clone --depth 1 https://github.com/ECP-WarpX/warpx-data.git
 # openPMD-example-datasets contains various required data sets
diff --git a/setup.py b/setup.py
index 91c8f1ac828..4636d6995f6 100644
--- a/setup.py
+++ b/setup.py
@@ -283,7 +283,7 @@ def build_extension(self, ext):
 setup(
     name='pywarpx',
     # note PEP-440 syntax: x.y.zaN but x.y.z.devN
-    version = '23.09',
+    version = '23.10',
     packages = ['pywarpx'],
     package_dir = {'pywarpx': 'Python/pywarpx'},
     author='Jean-Luc Vay, David P. Grote, Maxence Thévenet, Rémi Lehe, Andrew Myers, Weiqun Zhang, Axel Huebl, et al.',

From 3a06794120bd3c9af0f7d0a7c4c52154e1f10146 Mon Sep 17 00:00:00 2001
From: Remi Lehe <remi.lehe@normalesup.org>
Date: Mon, 9 Oct 2023 10:26:52 -0700
Subject: [PATCH 09/12] Add new PRL paper using WarpX (#4351)

* Add new PRL paper using WarpX

* Update PRAB citation
---
 Docs/source/highlights.rst | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/Docs/source/highlights.rst b/Docs/source/highlights.rst
index dd9f6fd3e81..1e99629b9c2 100644
--- a/Docs/source/highlights.rst
+++ b/Docs/source/highlights.rst
@@ -14,6 +14,11 @@ Plasma-Based Acceleration
 
 Scientific works in laser-plasma and beam-plasma acceleration.
 
+#. Peng, H. and Huang, T. W. and Jiang, K. and Li, R. and Wu, C. N. and Yu, M. Y. and Riconda, C. and Weber, S. and Zhou, C. T. and Ruan, S. C.
+   **Coherent Subcycle Optical Shock from a Superluminal Plasma Wake**.
+   Phys. Rev. Lett. **131**, 145003, 2023
+   `DOI:10.1103/PhysRevLett.131.145003 <https://doi.org/10.1103/PhysRevLett.131.145003>`__
+
 #. Mewes SM, Boyle GJ, Ferran Pousa A, Shalloo RJ, Osterhoff J, Arran C, Corner L, Walczak R, Hooker SM, Thévenet M.
    **Demonstration of tunability of HOFI waveguides via start-to-end simulations**.
    Phys. Rev. Research **5**, 033112, 2023
@@ -27,8 +32,8 @@ Scientific works in laser-plasma and beam-plasma acceleration.
 
 #. Wang J, Zeng M, Li D, Wang X, Gao J.
    **High quality beam produced by tightly focused laser driven wakefield accelerators**.
-   arXiv pre-print, 2023.
-   `DOI:10.48550/arXiv.2304.10730 <https://doi.org/10.48550/arXiv.2304.10730>`__
+   Phys. Rev. Accel. Beams, **26**, 091303, 2023.
+   `DOI:10.1103/PhysRevAccelBeams.26.091303 <https://doi.org/10.1103/PhysRevAccelBeams.26.091303>`__
 
 #. Fedeli L, Huebl A, Boillod-Cerneux F, Clark T, Gott K, Hillairet C, Jaure S, Leblanc A, Lehe R, Myers A, Piechurski C, Sato M, Zaim N, Zhang W, Vay J-L, Vincenti H.
    **Pushing the Frontier in the Design of Laser-Based Electron Accelerators with Groundbreaking Mesh-Refined Particle-In-Cell Simulations on Exascale-Class Supercomputers**.

From 57a7a01d894bdfb17e45f0e8aa1fcb70380db6a4 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 10 Oct 2023 02:31:15 +0200
Subject: [PATCH 10/12] [pre-commit.ci] pre-commit autoupdate (#4352)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

updates:
- [github.com/pre-commit/pre-commit-hooks: v4.4.0 → v4.5.0](https://github.com/pre-commit/pre-commit-hooks/compare/v4.4.0...v4.5.0)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 1c32c94c99c..bb8b6c8b33d 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -18,7 +18,7 @@ exclude: '^share/openPMD/thirdParty'
 # See https://pre-commit.com/hooks.html for more hooks
 repos:
 -   repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.4.0
+    rev: v4.5.0
     hooks:
     - id: trailing-whitespace
       args: [--markdown-linebreak-ext=md]

From 45569111e4e8b8e8d4174864999d1b11992f764e Mon Sep 17 00:00:00 2001
From: Axel Huebl <axel.huebl@plasma.ninja>
Date: Mon, 9 Oct 2023 17:48:28 -0700
Subject: [PATCH 11/12] Lassen (LLNL): TOSS3 Setup (#4346)

* Lassen (LLNL): TOSS3 Setup

Add a TOSS3 section again.

* Fix SciPy and Finalize Python Env
---
 Docs/source/install/hpc/lassen.rst            | 168 +++++++++++++-----
 .../install_v100_dependencies_toss3.sh        | 135 ++++++++++++++
 .../lassen_v100_warpx_toss3.profile.example   |  54 ++++++
 3 files changed, 309 insertions(+), 48 deletions(-)
 create mode 100644 Tools/machines/lassen-llnl/install_v100_dependencies_toss3.sh
 create mode 100644 Tools/machines/lassen-llnl/lassen_v100_warpx_toss3.profile.example

diff --git a/Docs/source/install/hpc/lassen.rst b/Docs/source/install/hpc/lassen.rst
index 5726254652a..ac15844a5ff 100644
--- a/Docs/source/install/hpc/lassen.rst
+++ b/Docs/source/install/hpc/lassen.rst
@@ -24,12 +24,27 @@ If you are new to this system, **please see the following resources**:
 Login
 -----
 
-.. note::
+.. tab-set::
 
-   Lassen is currently transitioning to RHEL8.
-   During this transition, first SSH into lassen and then ``ssh eatoss4`` next to work with the updated RHEL8/TOSS4 nodes.
+   .. tab-item:: TOSS4 (RHEL8)
 
-   Approximately October 2023, the new software environment on these nodes will be the new default.
+      Lassen is currently transitioning to RHEL8.
+      During this transition, first SSH into lassen and then to the updated RHEL8/TOSS4 nodes.
+
+      .. code-block:: bash
+
+         ssh lassen.llnl.gov
+         ssh eatoss4
+
+      Approximately October/November 2023, the new software environment on these nodes will be the new default.
+
+   .. tab-item:: TOSS3 (RHEL7)
+
+      .. code-block:: bash
+
+         ssh lassen.llnl.gov
+
+      Approximately October/November 2023, this partition will become TOSS4 (RHEL8) as well.
 
 
 .. _building-lassen-preparation:
@@ -43,78 +58,135 @@ Use the following commands to download the WarpX source code:
 
    git clone https://github.com/ECP-WarpX/WarpX.git /usr/workspace/${USER}/lassen/src/warpx
 
-We use system software modules, add environment hints and further dependencies via the file ``$HOME/lassen_v100_warpx.profile``.
-Create it now:
+.. tab-set::
 
-.. code-block:: bash
+   .. tab-item:: TOSS4 (RHEL8)
 
-   cp /usr/workspace/${USER}/lassen/src/warpx/Tools/machines/lassen-llnl/lassen_v100_warpx.profile.example $HOME/lassen_v100_warpx.profile
+      We use system software modules, add environment hints and further dependencies via the file ``$HOME/lassen_v100_warpx.profile``.
+      Create it now:
 
-.. dropdown:: Script Details
-   :color: light
-   :icon: info
-   :animate: fade-in-slide-down
+      .. code-block:: bash
 
-   .. literalinclude:: ../../../../Tools/machines/lassen-llnl/lassen_v100_warpx.profile.example
-      :language: bash
+         cp /usr/workspace/${USER}/lassen/src/warpx/Tools/machines/lassen-llnl/lassen_v100_warpx.profile.example $HOME/lassen_v100_warpx.profile
 
-Edit the 2nd line of this script, which sets the ``export proj=""`` variable.
-For example, if you are member of the project ``nsldt``, then run ``vi $HOME/lassen_v100_warpx.profile``.
-Enter the edit mode by typing ``i`` and edit line 2 to read:
+      .. dropdown:: Script Details
+         :color: light
+         :icon: info
+         :animate: fade-in-slide-down
 
-.. code-block:: bash
+         .. literalinclude:: ../../../../Tools/machines/lassen-llnl/lassen_v100_warpx.profile.example
+            :language: bash
 
-   export proj="nsldt"
+      Edit the 2nd line of this script, which sets the ``export proj=""`` variable.
+      For example, if you are member of the project ``nsldt``, then run ``vi $HOME/lassen_v100_warpx.profile``.
+      Enter the edit mode by typing ``i`` and edit line 2 to read:
 
-Exit the ``vi`` editor with ``Esc`` and then type ``:wq`` (write & quit).
+      .. code-block:: bash
 
-.. important::
+         export proj="nsldt"
 
-   Now, and as the first step on future logins to lassen, activate these environment settings:
+      Exit the ``vi`` editor with ``Esc`` and then type ``:wq`` (write & quit).
 
-   .. code-block:: bash
+      .. important::
+
+         Now, and as the first step on future logins to lassen, activate these environment settings:
+
+         .. code-block:: bash
+
+            source $HOME/lassen_v100_warpx.profile
+
+   .. tab-item:: TOSS3 (RHEL7)
+
+      We use system software modules, add environment hints and further dependencies via the file ``$HOME/lassen_v100_warpx_toss3.profile``.
+      Create it now:
+
+      .. code-block:: bash
+
+         cp /usr/workspace/${USER}/lassen/src/warpx/Tools/machines/lassen-llnl/lassen_v100_warpx_toss3.profile.example $HOME/lassen_v100_warpx_toss3.profile
+
+      .. dropdown:: Script Details
+         :color: light
+         :icon: info
+         :animate: fade-in-slide-down
 
-      source $HOME/lassen_v100_warpx.profile
+         .. literalinclude:: ../../../../Tools/machines/lassen-llnl/lassen_v100_warpx_toss3.profile.example
+            :language: bash
+
+      Edit the 2nd line of this script, which sets the ``export proj=""`` variable.
+      For example, if you are member of the project ``nsldt``, then run ``vi $HOME/lassen_v100_warpx_toss3.profile``.
+      Enter the edit mode by typing ``i`` and edit line 2 to read:
+
+      .. code-block:: bash
+
+         export proj="nsldt"
+
+      Exit the ``vi`` editor with ``Esc`` and then type ``:wq`` (write & quit).
+
+      .. important::
+
+         Now, and as the first step on future logins to lassen, activate these environment settings:
+
+         .. code-block:: bash
+
+            source $HOME/lassen_v100_warpx_toss3.profile
 
 Finally, since lassen does not yet provide software modules for some of our dependencies, install them once:
 
-.. code-block:: bash
+.. tab-set::
 
-   bash /usr/workspace/${USER}/lassen/src/warpx/Tools/machines/lassen-llnl/install_v100_dependencies.sh
-   source /usr/workspace/${USER}/lassen/gpu/venvs/warpx-lassen/bin/activate
+   .. tab-item:: TOSS4 (RHEL8)
 
-.. dropdown:: Script Details
-   :color: light
-   :icon: info
-   :animate: fade-in-slide-down
+      .. code-block:: bash
 
-   .. literalinclude:: ../../../../Tools/machines/lassen-llnl/install_v100_dependencies.sh
-      :language: bash
+         bash /usr/workspace/${USER}/lassen/src/warpx/Tools/machines/lassen-llnl/install_v100_dependencies.sh
+         source /usr/workspace/${USER}/lassen/gpu/venvs/warpx-lassen/bin/activate
 
-.. dropdown:: AI/ML Dependencies (Optional)
-   :animate: fade-in-slide-down
+      .. dropdown:: Script Details
+         :color: light
+         :icon: info
+         :animate: fade-in-slide-down
 
-   If you plan to run AI/ML workflows depending on pyTorch, run the next step as well.
-   This will take a while and should be skipped if not needed.
+         .. literalinclude:: ../../../../Tools/machines/lassen-llnl/install_v100_dependencies.sh
+            :language: bash
 
-   .. code-block:: bash
+      .. dropdown:: AI/ML Dependencies (Optional)
+         :animate: fade-in-slide-down
 
-      runNode bash /usr/workspace/${USER}/lassen/src/warpx/Tools/machines/lassen-llnl/install_v100_ml.sh
+         If you plan to run AI/ML workflows depending on pyTorch, run the next step as well.
+         This will take a while and should be skipped if not needed.
 
-   .. dropdown:: Script Details
-      :color: light
-      :icon: info
-      :animate: fade-in-slide-down
+         .. code-block:: bash
 
-      .. literalinclude:: ../../../../Tools/machines/lassen-llnl/install_v100_ml.sh
-         :language: bash
+            runNode bash /usr/workspace/${USER}/lassen/src/warpx/Tools/machines/lassen-llnl/install_v100_ml.sh
 
-   For `optimas dependencies <https://github.com/optimas-org/optimas>`__ (incl. scikit-learn), plan another hour of build time:
+         .. dropdown:: Script Details
+            :color: light
+            :icon: info
+            :animate: fade-in-slide-down
 
-   .. code-block:: bash
+            .. literalinclude:: ../../../../Tools/machines/lassen-llnl/install_v100_ml.sh
+               :language: bash
+
+         For `optimas dependencies <https://github.com/optimas-org/optimas>`__ (incl. scikit-learn), plan another hour of build time:
+
+         .. code-block:: bash
+
+            python3 -m pip install -r /usr/workspace/${USER}/lassen/src/warpx/Tools/optimas/requirements.txt
+
+   .. tab-item:: TOSS3 (RHEL7)
+
+      .. code-block:: bash
+
+         bash /usr/workspace/${USER}/lassen/src/warpx/Tools/machines/lassen-llnl/install_v100_dependencies_toss3.sh
+         source /usr/workspace/${USER}/lassen/gpu/venvs/warpx-lassen-toss3/bin/activate
 
-      python3 -m pip install -r /usr/workspace/${USER}/lassen/src/warpx/Tools/optimas/requirements.txt
+      .. dropdown:: Script Details
+         :color: light
+         :icon: info
+         :animate: fade-in-slide-down
 
+         .. literalinclude:: ../../../../Tools/machines/lassen-llnl/install_v100_dependencies_toss3.sh
+            :language: bash
 
 .. _building-lassen-compilation:
 
diff --git a/Tools/machines/lassen-llnl/install_v100_dependencies_toss3.sh b/Tools/machines/lassen-llnl/install_v100_dependencies_toss3.sh
new file mode 100644
index 00000000000..f62c6018cd5
--- /dev/null
+++ b/Tools/machines/lassen-llnl/install_v100_dependencies_toss3.sh
@@ -0,0 +1,135 @@
+#!/bin/bash
+#
+# Copyright 2023 The WarpX Community
+#
+# This file is part of WarpX.
+#
+# Author: Axel Huebl
+# License: BSD-3-Clause-LBNL
+
+# Exit on first error encountered #############################################
+#
+set -eu -o pipefail
+
+
+# Check: ######################################################################
+#
+#   Was lassen_v100_warpx.profile sourced and configured correctly?
+if [ -z ${proj-} ]; then echo "WARNING: The 'proj' variable is not yet set in your lassen_v100_warpx_toss3.profile file! Please edit its line 2 to continue!"; exit 1; fi
+
+
+# Remove old dependencies #####################################################
+#
+SRC_DIR="/usr/workspace/${USER}/lassen-toss3/src"
+SW_DIR="/usr/workspace/${USER}/lassen-toss3/gpu"
+rm -rf ${SW_DIR}
+mkdir -p ${SW_DIR}
+mkdir -p ${SRC_DIR}
+
+# remove common user mistakes in python, located in .local instead of a venv
+python3 -m pip uninstall -qq -y pywarpx
+python3 -m pip uninstall -qq -y warpx
+python3 -m pip uninstall -qqq -y mpi4py 2>/dev/null || true
+
+
+# General extra dependencies ##################################################
+#
+
+# tmpfs build directory: avoids issues often seen with $HOME and is faster
+build_dir=$(mktemp -d)
+
+# c-blosc (I/O compression)
+if [ -d ${SRC_DIR}/c-blosc ]
+then
+  cd ${SRC_DIR}/c-blosc
+  git fetch --prune
+  git checkout v1.21.1
+  cd -
+else
+  git clone -b v1.21.1 https://github.com/Blosc/c-blosc.git ${SRC_DIR}/c-blosc
+fi
+cmake -S ${SRC_DIR}/c-blosc -B ${build_dir}/c-blosc-lassen-build -DBUILD_TESTS=OFF -DBUILD_BENCHMARKS=OFF -DDEACTIVATE_AVX2=OFF -DCMAKE_INSTALL_PREFIX=${SW_DIR}/c-blosc-1.21.1
+cmake --build ${build_dir}/c-blosc-lassen-build --target install --parallel 10
+
+# HDF5
+if [ -d ${SRC_DIR}/hdf5 ]
+then
+  cd ${SRC_DIR}/hdf5
+  git fetch --prune
+  git checkout hdf5-1_14_1-2
+  cd -
+else
+  git clone -b hdf5-1_14_1-2 https://github.com/HDFGroup/hdf5.git ${SRC_DIR}/hdf5
+fi
+cmake -S ${SRC_DIR}/hdf5 -B ${build_dir}/hdf5-lassen-build -DBUILD_TESTING=OFF -DHDF5_ENABLE_PARALLEL=ON -DCMAKE_INSTALL_PREFIX=${SW_DIR}/hdf5-1.14.1.2
+cmake --build ${build_dir}/hdf5-lassen-build --target install --parallel 10
+
+# ADIOS2
+if [ -d ${SRC_DIR}/adios2 ]
+then
+  cd ${SRC_DIR}/adios2
+  git fetch --prune
+  git checkout v2.8.3
+  cd -
+else
+  git clone -b v2.8.3 https://github.com/ornladios/ADIOS2.git ${SRC_DIR}/adios2
+fi
+cmake -S ${SRC_DIR}/adios2 -B ${build_dir}/adios2-lassen-build -DBUILD_TESTING=OFF -DADIOS2_BUILD_EXAMPLES=OFF -DADIOS2_USE_Blosc=ON -DADIOS2_USE_Fortran=OFF -DADIOS2_USE_Python=OFF -DADIOS2_USE_SST=OFF -DADIOS2_USE_ZeroMQ=OFF -DCMAKE_INSTALL_PREFIX=${SW_DIR}/adios2-2.8.3
+cmake --build ${build_dir}/adios2-lassen-build --target install -j 10
+
+# BLAS++ (for PSATD+RZ)
+if [ -d ${SRC_DIR}/blaspp ]
+then
+  cd ${SRC_DIR}/blaspp
+  git fetch --prune
+  git checkout master
+  git pull
+  cd -
+else
+  git clone https://github.com/icl-utk-edu/blaspp.git ${SRC_DIR}/blaspp
+fi
+cmake -S ${SRC_DIR}/blaspp -B ${build_dir}/blaspp-lassen-build -Duse_openmp=ON -Dgpu_backend=cuda -Duse_cmake_find_blas=ON -DCMAKE_CXX_STANDARD=17 -DCMAKE_INSTALL_PREFIX=${SW_DIR}/blaspp-master
+cmake --build ${build_dir}/blaspp-lassen-build --target install --parallel 10
+
+# LAPACK++ (for PSATD+RZ)
+if [ -d ${SRC_DIR}/lapackpp ]
+then
+  cd ${SRC_DIR}/lapackpp
+  git fetch --prune
+  git checkout master
+  git pull
+  cd -
+else
+  git clone https://github.com/icl-utk-edu/lapackpp.git ${SRC_DIR}/lapackpp
+fi
+CXXFLAGS="-DLAPACK_FORTRAN_ADD_" cmake -S ${SRC_DIR}/lapackpp -B ${build_dir}/lapackpp-lassen-build -Duse_cmake_find_lapack=ON -DCMAKE_CXX_STANDARD=17 -Dbuild_tests=OFF -DCMAKE_INSTALL_RPATH_USE_LINK_PATH=ON -DCMAKE_INSTALL_PREFIX=${SW_DIR}/lapackpp-master -DLAPACK_LIBRARIES=/usr/lib64/liblapack.so
+cmake --build ${build_dir}/lapackpp-lassen-build --target install --parallel 10
+
+
+# Python ######################################################################
+#
+python3 -m pip install --upgrade --user virtualenv
+rm -rf ${SW_DIR}/venvs/warpx-lassen-toss3
+python3 -m venv ${SW_DIR}/venvs/warpx-lassen-toss3
+source ${SW_DIR}/venvs/warpx-lassen-toss3/bin/activate
+python3 -m pip install --upgrade pip
+python3 -m pip cache purge
+python3 -m pip install --upgrade wheel
+python3 -m pip install --upgrade cython
+python3 -m pip install --upgrade numpy
+python3 -m pip install --upgrade pandas
+CMAKE_PREFIX_PATH=/usr/lib64:${CMAKE_PREFIX_PATH} python3 -m pip install --upgrade -Ccompile-args="-j10" -Csetup-args=-Dblas=BLAS -Csetup-args=-Dlapack=BLAS scipy
+python3 -m pip install --upgrade mpi4py --no-cache-dir --no-build-isolation --no-binary mpi4py
+python3 -m pip install --upgrade openpmd-api
+MPLLOCALFREETYPE=1 python3 -m pip install --upgrade matplotlib==3.2.2  # does not try to build freetype itself
+echo "matplotlib==3.2.2" > ${build_dir}/constraints.txt
+python3 -m pip install --upgrade -c ${build_dir}/constraints.txt yt
+
+# install or update WarpX dependencies such as picmistandard
+python3 -m pip install --upgrade -r ${SRC_DIR}/warpx/requirements.txt
+
+# for ML dependencies, see install_v100_ml.sh
+
+
+# remove build temporary directory
+rm -rf ${build_dir}
diff --git a/Tools/machines/lassen-llnl/lassen_v100_warpx_toss3.profile.example b/Tools/machines/lassen-llnl/lassen_v100_warpx_toss3.profile.example
new file mode 100644
index 00000000000..979c27989fa
--- /dev/null
+++ b/Tools/machines/lassen-llnl/lassen_v100_warpx_toss3.profile.example
@@ -0,0 +1,54 @@
+# please set your project account
+#export proj="<yourProjectNameHere>"  # edit this and comment in
+
+# required dependencies
+module load cmake/3.23.1
+module load gcc/11.2.1
+module load cuda/12.0.0
+
+# optional: for QED lookup table generation support
+module load boost/1.70.0
+
+# optional: for openPMD support
+SRC_DIR="/usr/workspace/${USER}/lassen-toss3/src"
+SW_DIR="/usr/workspace/${USER}/lassen-toss3/gpu"
+export CMAKE_PREFIX_PATH=${SW_DIR}/c-blosc-1.21.1:$CMAKE_PREFIX_PATH
+export CMAKE_PREFIX_PATH=${SW_DIR}/hdf5-1.14.1.2:$CMAKE_PREFIX_PATH
+export CMAKE_PREFIX_PATH=${SW_DIR}/adios2-2.8.3:$CMAKE_PREFIX_PATH
+export LD_LIBRARY_PATH=${SW_DIR}/c-blosc-1.21.1/lib64:$LD_LIBRARY_PATH
+export LD_LIBRARY_PATH=${SW_DIR}/hdf5-1.14.1.2/lib64:$LD_LIBRARY_PATH
+export LD_LIBRARY_PATH=${SW_DIR}/adios2-2.8.3/lib64:$LD_LIBRARY_PATH
+
+# optional: for PSATD in RZ geometry support
+export CMAKE_PREFIX_PATH=${SW_DIR}/blaspp-master:$CMAKE_PREFIX_PATH
+export CMAKE_PREFIX_PATH=${SW_DIR}/lapackpp-master:$CMAKE_PREFIX_PATH
+export LD_LIBRARY_PATH=${SW_DIR}/blaspp-master/lib64:$LD_LIBRARY_PATH
+export LD_LIBRARY_PATH=${SW_DIR}/lapackpp-master/lib64:$LD_LIBRARY_PATH
+
+# optional: for Python bindings
+module load python/3.8.2
+
+if [ -d "${SW_DIR}/venvs/warpx-lassen-toss3" ]
+then
+    source ${SW_DIR}/venvs/warpx-lassen-toss3/bin/activate
+fi
+
+# optional: an alias to request an interactive node for two hours
+alias getNode="bsub -G $proj -W 2:00 -nnodes 1 -Is /bin/bash"
+# an alias to run a command on a batch node for up to 30min
+#   usage: runNode <command>
+alias runNode="bsub -q debug -P $proj -W 2:00 -nnodes 1 -I"
+
+# fix system defaults: do not escape $ with a \ on tab completion
+shopt -s direxpand
+
+# optimize CUDA compilation for V100
+export AMREX_CUDA_ARCH=7.0
+export CUDAARCHS=70
+
+# compiler environment hints
+export CC=$(which gcc)
+export CXX=$(which g++)
+export FC=$(which gfortran)
+export CUDACXX=$(which nvcc)
+export CUDAHOSTCXX=${CXX}

From b71b89c0fae093f9f552fc348b979d21e07b54c2 Mon Sep 17 00:00:00 2001
From: Remi Lehe <remi.lehe@normalesup.org>
Date: Tue, 10 Oct 2023 16:30:59 -0700
Subject: [PATCH 12/12] Fix issues with cupy-WarpX interoperability (#4348)

* Fix issues with cupy-WarpX interoperability

* Update cupy binding
---
 Python/pywarpx/fields.py | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/Python/pywarpx/fields.py b/Python/pywarpx/fields.py
index fd647287a53..5eeeda50ea7 100644
--- a/Python/pywarpx/fields.py
+++ b/Python/pywarpx/fields.py
@@ -298,10 +298,14 @@ def _get_field(self, mfi):
         # self.mf.array(mfi) is in C ordering.
         # Note: transposing creates a view and not a copy.
         device_arr4 = self.mf.array(mfi)
-        if cp is not None:
-            device_arr = cp.array(device_arr4, copy=False).T
+        if libwarpx.libwarpx_so.Config.have_gpu:
+            if cp is not None:
+                device_arr = device_arr4.to_cupy(copy=False)
+            else:
+                # Relies on managed memory
+                device_arr = device_arr4.to_numpy(copy=False)
         else:
-            device_arr = np.array(device_arr4, copy=False).T
+            device_arr = device_arr4.to_numpy(copy=False)
         if not self.include_ghosts:
             nghosts = self._get_n_ghosts()
             device_arr = device_arr[tuple([slice(ng, -ng) for ng in nghosts[:self.dim]])]
@@ -423,11 +427,10 @@ def __getitem__(self, index):
             if global_slices is not None:
                 # Note that the array will always have 4 dimensions.
                 device_arr = self._get_field(mfi)
-                if cp is not None:
-                    # Copy the data from the device to the host
-                    slice_arr = cp.asnumpy(device_arr[block_slices])
-                else:
-                    slice_arr = device_arr[block_slices]
+                slice_arr = device_arr[block_slices]
+                if (cp is not None) and (type(slice_arr) is cp.ndarray):
+                    # Copy data from host to device using cupy syntax
+                    slice_arr = slice_arr.get()
                 datalist.append((global_slices, slice_arr))
 
         # Gather the data from all processors
@@ -528,9 +531,9 @@ def __setitem__(self, index, value):
                 mf_arr = self._get_field(mfi)
                 if isinstance(value, np.ndarray):
                     slice_value = value3d[global_slices]
-                    if cp is not None:
+                    if libwarpx.libwarpx_so.Config.have_gpu:
                         # Copy data from host to device
-                        slice_value = cp.asarray(value3d[global_slices])
+                        slice_value = cp.asarray(slice_value)
                     mf_arr[block_slices] = slice_value
                 else:
                     mf_arr[block_slices] = value