From fd8b4b885c62e0c1f5e794907a1a124579179b4e Mon Sep 17 00:00:00 2001
From: Avigdor Veksler <124003120+aveksler1@users.noreply.github.com>
Date: Tue, 4 Jun 2024 20:34:08 -0700
Subject: [PATCH 01/11] Add upper bound for weight of product particles from
 particle resampling (#4969)

* add upper bound for weight parameter on particle merging

* increase readability, fix clang-tidy test

* rework from ignoring particles above some maximum weight to instead resample particles such that they stay under a target weight

* avoid implicit capture of this, cleanup

* remove leftover code

Co-authored-by: Roelof Groenewald <40245517+roelof-groenewald@users.noreply.github.com>

* clarify docstring

* clarifying comment

---------

Co-authored-by: Avigdor Veksler <aveksler@TAE7750-MLAP.local>
Co-authored-by: Roelof Groenewald <40245517+roelof-groenewald@users.noreply.github.com>
---
 Python/pywarpx/picmi.py                           |  5 +++++
 .../Resampling/VelocityCoincidenceThinning.H      |  1 +
 .../Resampling/VelocityCoincidenceThinning.cpp    | 15 +++++++++++++--
 3 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/Python/pywarpx/picmi.py b/Python/pywarpx/picmi.py
index 34c21c2a34a..61fad85ec94 100644
--- a/Python/pywarpx/picmi.py
+++ b/Python/pywarpx/picmi.py
@@ -137,6 +137,9 @@ class Species(picmistandard.PICMI_Species):
         Cells with fewer particles than this number will be
         skipped during resampling.
 
+    warpx_resampling_algorithm_target_weight: float
+        Weight that the product particles from resampling will not exceed.
+
     warpx_resampling_trigger_intervals: bool, default=0
         Timesteps at which to resample
 
@@ -249,6 +252,7 @@ def init(self, kw):
         self.resampling_min_ppc = kw.pop('warpx_resampling_min_ppc', None)
         self.resampling_trigger_intervals = kw.pop('warpx_resampling_trigger_intervals', None)
         self.resampling_triggering_max_avg_ppc = kw.pop('warpx_resampling_trigger_max_avg_ppc', None)
+        self.resampling_algorithm_target_weight = kw.pop('warpx_resampling_algorithm_target_weight', None)
         self.resampling_algorithm_velocity_grid_type = kw.pop('warpx_resampling_algorithm_velocity_grid_type', None)
         self.resampling_algorithm_delta_ur = kw.pop('warpx_resampling_algorithm_delta_ur', None)
         self.resampling_algorithm_n_theta = kw.pop('warpx_resampling_algorithm_n_theta', None)
@@ -298,6 +302,7 @@ def species_initialize_inputs(self, layout,
                                              resampling_min_ppc=self.resampling_min_ppc,
                                              resampling_trigger_intervals=self.resampling_trigger_intervals,
                                              resampling_trigger_max_avg_ppc=self.resampling_triggering_max_avg_ppc,
+                                             resampling_algorithm_target_weight=self.resampling_algorithm_target_weight,
                                              resampling_algorithm_velocity_grid_type=self.resampling_algorithm_velocity_grid_type,
                                              resampling_algorithm_delta_ur=self.resampling_algorithm_delta_ur,
                                              resampling_algorithm_n_theta=self.resampling_algorithm_n_theta,
diff --git a/Source/Particles/Resampling/VelocityCoincidenceThinning.H b/Source/Particles/Resampling/VelocityCoincidenceThinning.H
index b8d67d198a4..bb325734777 100644
--- a/Source/Particles/Resampling/VelocityCoincidenceThinning.H
+++ b/Source/Particles/Resampling/VelocityCoincidenceThinning.H
@@ -200,5 +200,6 @@ private:
     int m_ntheta, m_nphi;
     amrex::ParticleReal m_delta_ur;
     amrex::Vector<amrex::ParticleReal> m_delta_u;
+    amrex::ParticleReal m_cluster_weight = std::numeric_limits<amrex::ParticleReal>::max();
 };
 #endif // WARPX_VELOCITY_COINCIDENCE_THINNING_H_
diff --git a/Source/Particles/Resampling/VelocityCoincidenceThinning.cpp b/Source/Particles/Resampling/VelocityCoincidenceThinning.cpp
index db4ddb801c6..2880ace200d 100644
--- a/Source/Particles/Resampling/VelocityCoincidenceThinning.cpp
+++ b/Source/Particles/Resampling/VelocityCoincidenceThinning.cpp
@@ -24,6 +24,14 @@ VelocityCoincidenceThinning::VelocityCoincidenceThinning (const std::string& spe
         "Resampling min_ppc should be greater than or equal to 1"
     );
 
+    amrex::ParticleReal target_weight = 0;
+    if (utils::parser::queryWithParser(
+        pp_species_name, "resampling_algorithm_target_weight", target_weight
+    )) {
+        // factor of 2 since each cluster is reduced to 2 particles
+        m_cluster_weight = target_weight * 2.0_prt;
+    }
+
     std::string velocity_grid_type_str = "spherical";
     pp_species_name.query(
         "resampling_algorithm_velocity_grid_type", velocity_grid_type_str
@@ -84,7 +92,7 @@ void VelocityCoincidenceThinning::operator() (WarpXParIter& pti, const int lev,
     auto *const cell_offsets = bins.offsetsPtr();
 
     const auto min_ppc = m_min_ppc;
-
+    const auto cluster_weight = m_cluster_weight;
     const auto mass = pc->getMass();
 
     // check if species mass > 0
@@ -207,10 +215,13 @@ void VelocityCoincidenceThinning::operator() (WarpXParIter& pti, const int lev,
                     ux[part_idx], uy[part_idx], uz[part_idx], mass
                 );
 
-                // check if this is the last particle in the current momentum bin
+                // check if this is the last particle in the current momentum bin,
+                // or if the next particle would push the current cluster weight
+                // to exceed the maximum specified cluster weight
                 if (
                     (i == cell_stop - 1)
                     || (momentum_bin_number_data[sorted_indices_data[i]] != momentum_bin_number_data[sorted_indices_data[i + 1]])
+                    || (total_weight + w[indices[sorted_indices_data[i+1]]] > cluster_weight)
                 ) {
                     // check if the bin has more than 2 particles in it
                     if ( particles_in_bin > 2 && total_weight > std::numeric_limits<amrex::ParticleReal>::min() ){

From 67419c6ad3edbbbfafb4d5e2ecbca4cece524bbe Mon Sep 17 00:00:00 2001
From: Roelof Groenewald <40245517+roelof-groenewald@users.noreply.github.com>
Date: Wed, 5 Jun 2024 19:15:10 -0700
Subject: [PATCH 02/11] Add initialization of extra attribs to picmi (#4972)

* add initialization of extra attribs to picmi

* avoid new `Bucket`

* update CI checksum

* update ionization test analysis to always include runtime attribute

* Update Examples Further, Fix CI

* update docstring; update checksum values

* allow user defined attributes to be added to `ParticleDiagnostic` output

* more CI fixes

* revert adding `z_orig` to inputs_2d_bf_rt

---------

Co-authored-by: Axel Huebl <axel.huebl@plasma.ninja>
---
 .../laser_acceleration/PICMI_inputs_3d.py     |  4 ++-
 .../laser_acceleration/inputs_3d              |  2 +-
 .../laser_ion/PICMI_inputs_2d.py              |  5 ++--
 Examples/Tests/ionization/PICMI_inputs_2d.py  |  8 +++---
 .../Tests/ionization/analysis_ionization.py   |  9 ++++---
 Examples/Tests/ionization/inputs_2d_bf_rt     |  2 +-
 Python/pywarpx/picmi.py                       | 25 +++++++++++++++++++
 .../Python_LaserAcceleration.json             |  4 ++-
 .../benchmarks_json/Python_LaserIonAcc2d.json |  4 ++-
 .../benchmarks_json/Python_ionization.json    | 20 ++++++++-------
 10 files changed, 59 insertions(+), 24 deletions(-)

diff --git a/Examples/Physics_applications/laser_acceleration/PICMI_inputs_3d.py b/Examples/Physics_applications/laser_acceleration/PICMI_inputs_3d.py
index ac3398e43fc..13bf492e203 100755
--- a/Examples/Physics_applications/laser_acceleration/PICMI_inputs_3d.py
+++ b/Examples/Physics_applications/laser_acceleration/PICMI_inputs_3d.py
@@ -55,7 +55,9 @@
 electrons = picmi.Species(
     particle_type = 'electron',
     name = 'electrons',
-    initial_distribution = uniform_distribution)
+    initial_distribution = uniform_distribution,
+    warpx_add_int_attributes = {'regionofinterest': "(z>12.0e-6) * (z<13.0e-6)"},
+    warpx_add_real_attributes = {'initialenergy': "ux*ux + uy*uy + uz*uz"})
 
 # Particles: beam electrons
 q_tot = 1e-12
diff --git a/Examples/Physics_applications/laser_acceleration/inputs_3d b/Examples/Physics_applications/laser_acceleration/inputs_3d
index bdcfd7676a4..fb8810dd34b 100644
--- a/Examples/Physics_applications/laser_acceleration/inputs_3d
+++ b/Examples/Physics_applications/laser_acceleration/inputs_3d
@@ -55,7 +55,7 @@ electrons.do_continuous_injection = 1
 electrons.addIntegerAttributes = regionofinterest
 electrons.attribute.regionofinterest(x,y,z,ux,uy,uz,t) = "(z>12.0e-6) * (z<13.0e-6)"
 electrons.addRealAttributes = initialenergy
-electrons.attribute.initialenergy(x,y,z,ux,uy,uz,t) = " ux*ux + uy*uy + uz*uz"
+electrons.attribute.initialenergy(x,y,z,ux,uy,uz,t) = "ux*ux + uy*uy + uz*uz"
 
 #################################
 ############ LASER  #############
diff --git a/Examples/Physics_applications/laser_ion/PICMI_inputs_2d.py b/Examples/Physics_applications/laser_ion/PICMI_inputs_2d.py
index 844501992c3..9f7a2aacfca 100755
--- a/Examples/Physics_applications/laser_ion/PICMI_inputs_2d.py
+++ b/Examples/Physics_applications/laser_ion/PICMI_inputs_2d.py
@@ -91,18 +91,17 @@
     rms_velocity=[c*ux_th, 0., c*uz_th]  # thermal velocity spread in m/s
 )
 
-# TODO: add additional attributes orig_x and orig_z
 electrons = picmi.Species(
     particle_type='electron',
     name='electrons',
     initial_distribution=slab_with_ramp_dist_electrons,
 )
 
-# TODO: add additional attributes orig_x and orig_z
 hydrogen = picmi.Species(
     particle_type='proton',
     name='hydrogen',
-    initial_distribution=slab_with_ramp_dist_hydrogen
+    initial_distribution=slab_with_ramp_dist_hydrogen,
+    warpx_add_real_attributes = {"orig_x": "x", "orig_z": "z"}
 )
 
 # Laser
diff --git a/Examples/Tests/ionization/PICMI_inputs_2d.py b/Examples/Tests/ionization/PICMI_inputs_2d.py
index a076361bf50..802bf5435ac 100644
--- a/Examples/Tests/ionization/PICMI_inputs_2d.py
+++ b/Examples/Tests/ionization/PICMI_inputs_2d.py
@@ -47,12 +47,14 @@
     fill_in = True)
 electrons = picmi.Species(
     particle_type = 'electron',
-    name = 'electrons')
+    name = 'electrons',
+    warpx_add_real_attributes = {'orig_z': 'z'})
 ions = picmi.Species(
     particle_type = 'N',
     name = 'ions',
     charge_state = 2,
-    initial_distribution = uniform_distribution)
+    initial_distribution = uniform_distribution,
+    warpx_add_real_attributes = {'orig_z': 'z'})
 
 # Field ionization
 nitrogen_ionization = picmi.FieldIonization(
@@ -88,7 +90,7 @@
     name = 'diag1',
     period = 10000,
     species = [electrons, ions],
-    data_list = ['ux', 'uy', 'uz', 'x', 'z', 'weighting'],
+    data_list = ['ux', 'uy', 'uz', 'x', 'z', 'weighting', 'orig_z'],
     write_dir = '.',
     warpx_file_prefix = 'Python_ionization_plt')
 field_diag = picmi.FieldDiagnostic(
diff --git a/Examples/Tests/ionization/analysis_ionization.py b/Examples/Tests/ionization/analysis_ionization.py
index 95732b03e36..90657915b50 100755
--- a/Examples/Tests/ionization/analysis_ionization.py
+++ b/Examples/Tests/ionization/analysis_ionization.py
@@ -52,7 +52,7 @@
     ad = ds.all_data()
 
     # Plot ions with ionization levels
-    species = 'ions';
+    species = 'ions'
     xi = ad[species, 'particle_position_x'].v
     zi = ad[species, 'particle_position_y'].v
     ii = ad[species, 'particle_ionizationLevel'].v
@@ -75,7 +75,7 @@
     plt.colorbar()
 
     # Plot electrons
-    species = 'electrons';
+    species = 'electrons'
     if species in [x[0] for x in ds.field_list]:
         xe = ad[species, 'particle_position_x'].v
         ze = ad[species, 'particle_position_y'].v
@@ -96,10 +96,11 @@
 # Check that the user runtime component (if it exists) worked as expected
 try:
     orig_z = ad['electrons', 'particle_orig_z'].v
-    assert np.all( (orig_z > 0) & (orig_z < 1.5e-5) )
+    print(f"orig_z: min = {np.min(orig_z)}, max = {np.max(orig_z)}")
+    assert np.all( (orig_z > 0.0) & (orig_z < 1.5e-5) )
     print('particle_orig_z has reasonable values')
 except yt.utilities.exceptions.YTFieldNotFound:
-    pass # Some of the tested script to not have the quantity orig_z
+    pass # The backtransformed diagnostic version of the test does not have orig_z
 
 test_name = os.path.split(os.getcwd())[1]
 checksumAPI.evaluate_checksum(test_name, filename)
diff --git a/Examples/Tests/ionization/inputs_2d_bf_rt b/Examples/Tests/ionization/inputs_2d_bf_rt
index 52af5d0d40f..8bcb66595d2 100644
--- a/Examples/Tests/ionization/inputs_2d_bf_rt
+++ b/Examples/Tests/ionization/inputs_2d_bf_rt
@@ -48,7 +48,7 @@ electrons.zmax =  50.e-6
 electrons.profile = constant
 electrons.density = 2.
 electrons.momentum_distribution_type = at_rest
-electrons.do_continuous_injection=1
+electrons.do_continuous_injection = 1
 
 lasers.names        = laser1
 laser1.profile      = Gaussian
diff --git a/Python/pywarpx/picmi.py b/Python/pywarpx/picmi.py
index 61fad85ec94..b981bc5e1d3 100644
--- a/Python/pywarpx/picmi.py
+++ b/Python/pywarpx/picmi.py
@@ -171,6 +171,14 @@ class Species(picmistandard.PICMI_Species):
         during grid-based merging, with `velocity_grid_type == "cartesian"`. If
         a single number is given the same du value will be used in all three
         directions.
+
+    warpx_add_int_attributes: dict
+        Dictionary of extra integer particle attributes initialized from an
+        expression that is a function of the variables (x, y, z, ux, uy, uz, t).
+
+    warpx_add_real_attributes: dict
+        Dictionary of extra real particle attributes initialized from an
+        expression that is a function of the variables (x, y, z, ux, uy, uz, t).
     """
     def init(self, kw):
 
@@ -261,6 +269,10 @@ def init(self, kw):
         if self.resampling_algorithm_delta_u is not None and np.size(self.resampling_algorithm_delta_u) == 1:
             self.resampling_algorithm_delta_u = [self.resampling_algorithm_delta_u]*3
 
+        # extra particle attributes
+        self.extra_int_attributes = kw.pop('warpx_add_int_attributes', None)
+        self.extra_real_attributes = kw.pop('warpx_add_real_attributes', None)
+
     def species_initialize_inputs(self, layout,
                                   initialize_self_fields = False,
                                   injection_plane_position = None,
@@ -318,6 +330,16 @@ def species_initialize_inputs(self, layout,
         self.species.add_new_attr("reflection_model_zhi(E)", self.reflection_model_zhi)
         # self.species.add_new_attr("reflection_model_eb(E)", self.reflection_model_eb)
 
+        # extra particle attributes
+        if self.extra_int_attributes is not None:
+            self.species.addIntegerAttributes = self.extra_int_attributes.keys()
+            for attr, function in self.extra_int_attributes.items():
+                self.species.add_new_attr('attribute.'+attr+'(x,y,z,ux,uy,uz,t)', function)
+        if self.extra_real_attributes is not None:
+            self.species.addRealAttributes = self.extra_real_attributes.keys()
+            for attr, function in self.extra_real_attributes.items():
+                self.species.add_new_attr('attribute.'+attr+'(x,y,z,ux,uy,uz,t)', function)
+
         pywarpx.Particles.particles_list.append(self.species)
 
         if self.initial_distribution is not None:
@@ -2596,6 +2618,9 @@ def diagnostic_initialize_inputs(self):
                         )
                     else:
                         variables.add(dataname)
+                else:
+                    # possibly add user defined attributes
+                    variables.add(dataname)
 
             # --- Convert the set to a sorted list so that the order
             # --- is the same on all processors.
diff --git a/Regression/Checksum/benchmarks_json/Python_LaserAcceleration.json b/Regression/Checksum/benchmarks_json/Python_LaserAcceleration.json
index 08969db023e..474e812480f 100644
--- a/Regression/Checksum/benchmarks_json/Python_LaserAcceleration.json
+++ b/Regression/Checksum/benchmarks_json/Python_LaserAcceleration.json
@@ -21,12 +21,14 @@
     "particle_weight": 6241509.074460764
   },
   "electrons": {
+    "particle_initialenergy": 0.0,
     "particle_momentum_x": 1.7921232210868553e-20,
     "particle_momentum_y": 7.225819896136567e-20,
     "particle_momentum_z": 4.2317254599358777e-20,
     "particle_position_x": 0.713912262116188,
     "particle_position_y": 0.7150340887578024,
     "particle_position_z": 1.31757706006908,
+    "particle_regionofinterest": 1936.0,
     "particle_weight": 12926557617.187498
   }
-}
\ No newline at end of file
+}
diff --git a/Regression/Checksum/benchmarks_json/Python_LaserIonAcc2d.json b/Regression/Checksum/benchmarks_json/Python_LaserIonAcc2d.json
index baaf29bec59..d2fd4841cc4 100644
--- a/Regression/Checksum/benchmarks_json/Python_LaserIonAcc2d.json
+++ b/Regression/Checksum/benchmarks_json/Python_LaserIonAcc2d.json
@@ -23,6 +23,8 @@
     "particle_weight": 2.6507336926909222e+17
   },
   "hydrogen": {
+    "particle_origX": 0.008198291015625001,
+    "particle_origZ": 0.0365664599609375,
     "particle_position_x": 0.008197892199782453,
     "particle_position_y": 0.0,
     "particle_position_z": 0.0365646600930625,
@@ -31,4 +33,4 @@
     "particle_momentum_z": 1.0873094324185116e-18,
     "particle_weight": 2.703612070965676e+17
   }
-}
\ No newline at end of file
+}
diff --git a/Regression/Checksum/benchmarks_json/Python_ionization.json b/Regression/Checksum/benchmarks_json/Python_ionization.json
index 31f426aa362..a5e65fcf765 100644
--- a/Regression/Checksum/benchmarks_json/Python_ionization.json
+++ b/Regression/Checksum/benchmarks_json/Python_ionization.json
@@ -10,21 +10,23 @@
     "jy": 0.0,
     "jz": 1.3483401471475687e-07
   },
-  "electrons": {
-    "particle_momentum_x": 4.4206237143449475e-18,
-    "particle_momentum_y": 0.0,
-    "particle_momentum_z": 2.6361297302081026e-18,
-    "particle_position_x": 0.11009154442846772,
-    "particle_position_y": 0.6414658436421568,
-    "particle_weight": 3.4450781249999996e-10
-  },
   "ions": {
     "particle_ionizationLevel": 72897.0,
     "particle_momentum_x": 1.76132401934254e-18,
     "particle_momentum_y": 0.0,
     "particle_momentum_z": 3.644887053263054e-23,
+    "particle_orig_z": 0.128,
     "particle_position_x": 0.03200001189420337,
     "particle_position_y": 0.1280000046901387,
     "particle_weight": 9.999999999999999e-11
+  },
+  "electrons": {
+    "particle_momentum_x": 4.4206237143449475e-18,
+    "particle_momentum_y": 0.0,
+    "particle_momentum_z": 2.6361297302081026e-18,
+    "particle_orig_z": 0.4305565137391907,
+    "particle_position_x": 0.11009154442846772,
+    "particle_position_y": 0.6414658436421568,
+    "particle_weight": 3.4450781249999996e-10
   }
-}
\ No newline at end of file
+}

From dc2c2f690c85eb648036c0710fd773972b9bfde4 Mon Sep 17 00:00:00 2001
From: David Grote <grote1@llnl.gov>
Date: Fri, 7 Jun 2024 11:16:23 -0700
Subject: [PATCH 03/11] Add temperature diagnostic (#4942)

* Add temperature diagnostic

* Add to CI test

* Add input to picmi

* Add comment on the method

* const cleanup

* Fix particle array declaration

* Update benchmark for CI
---
 Docs/source/usage/parameters.rst              |   4 +-
 Examples/Tests/collision/inputs_3d            |   2 +-
 Python/pywarpx/picmi.py                       |   3 +
 .../benchmarks_json/collisionXYZ.json         |   4 +-
 .../ComputeDiagFunctors/CMakeLists.txt        |   1 +
 .../ComputeDiagFunctors/Make.package          |   1 +
 .../ComputeDiagFunctors/TemperatureFunctor.H  |  35 ++++
 .../TemperatureFunctor.cpp                    | 165 ++++++++++++++++++
 Source/Diagnostics/Diagnostics.H              |   2 +
 Source/Diagnostics/Diagnostics.cpp            |  24 +++
 Source/Diagnostics/FullDiagnostics.cpp        |  17 ++
 11 files changed, 255 insertions(+), 3 deletions(-)
 create mode 100644 Source/Diagnostics/ComputeDiagFunctors/TemperatureFunctor.H
 create mode 100644 Source/Diagnostics/ComputeDiagFunctors/TemperatureFunctor.cpp

diff --git a/Docs/source/usage/parameters.rst b/Docs/source/usage/parameters.rst
index fb53e81012f..a3ca703d15e 100644
--- a/Docs/source/usage/parameters.rst
+++ b/Docs/source/usage/parameters.rst
@@ -2693,7 +2693,9 @@ In-situ capabilities can be used by turning on Sensei or Ascent (provided they a
 
 * ``<diag_name>.fields_to_plot`` (list of `strings`, optional)
     Fields written to output.
-    Possible scalar fields: ``part_per_cell`` ``rho`` ``phi`` ``F`` ``part_per_grid`` ``divE`` ``divB`` and ``rho_<species_name>``, where ``<species_name>`` must match the name of one of the available particle species. Note that ``phi`` will only be written out when do_electrostatic==labframe. Also, note that for ``<diag_name>.diag_type = BackTransformed``, the only scalar field currently supported is ``rho``.
+    Possible scalar fields: ``part_per_cell`` ``rho`` ``phi`` ``F`` ``part_per_grid`` ``divE`` ``divB`` ``rho_<species_name>`` and ``T_<species_name>``, where ``<species_name>`` must match the name of one of the available particle species.
+    ``T_<species_name>`` is the temperature in eV.
+    Note that ``phi`` will only be written out when do_electrostatic==labframe. Also, note that for ``<diag_name>.diag_type = BackTransformed``, the only scalar field currently supported is ``rho``.
     Possible vector field components in Cartesian geometry: ``Ex`` ``Ey`` ``Ez`` ``Bx`` ``By`` ``Bz`` ``jx`` ``jy`` ``jz``.
     Possible vector field components in RZ geometry: ``Er`` ``Et`` ``Ez`` ``Br`` ``Bt`` ``Bz`` ``jr`` ``jt`` ``jz``.
     The default is ``<diag_name>.fields_to_plot = Ex Ey Ez Bx By Bz jx jy jz`` in Cartesian geometry and ``<diag_name>.fields_to_plot = Er Et Ez Br Bt Bz jr jt jz`` in RZ geometry.
diff --git a/Examples/Tests/collision/inputs_3d b/Examples/Tests/collision/inputs_3d
index 3cc06061bed..ed413ba2776 100644
--- a/Examples/Tests/collision/inputs_3d
+++ b/Examples/Tests/collision/inputs_3d
@@ -75,7 +75,7 @@ collision3.ndt = 10
 diagnostics.diags_names = diag1 diag_parser_filter diag_uniform_filter diag_random_filter
 diag1.intervals = 10
 diag1.diag_type = Full
-diag1.fields_to_plot = Ex Ey Ez Bx By Bz
+diag1.fields_to_plot = Ex Ey Ez Bx By Bz T_electron T_ion
 
 ## diag_parser_filter is a diag used to test the particle filter function.
 diag_parser_filter.intervals = 150:150:
diff --git a/Python/pywarpx/picmi.py b/Python/pywarpx/picmi.py
index b981bc5e1d3..0048cb1e51b 100644
--- a/Python/pywarpx/picmi.py
+++ b/Python/pywarpx/picmi.py
@@ -2411,6 +2411,9 @@ def diagnostic_initialize_inputs(self):
                 elif dataname.startswith('rho_'):
                     # Adds rho_species diagnostic
                     fields_to_plot.add(dataname)
+                elif dataname.startswith('T_'):
+                    # Adds T_species diagnostic
+                    fields_to_plot.add(dataname)
                 elif dataname == 'dive':
                     fields_to_plot.add('divE')
                 elif dataname == 'divb':
diff --git a/Regression/Checksum/benchmarks_json/collisionXYZ.json b/Regression/Checksum/benchmarks_json/collisionXYZ.json
index 927848745a8..6e4b9abf965 100644
--- a/Regression/Checksum/benchmarks_json/collisionXYZ.json
+++ b/Regression/Checksum/benchmarks_json/collisionXYZ.json
@@ -5,6 +5,8 @@
     "Bz": 0.0,
     "Ex": 0.0,
     "Ey": 0.0,
-    "Ez": 0.0
+    "Ez": 0.0,
+    "T_electron": 362230.52300397365,
+    "T_ion": 338312.83502136066
   }
 }
diff --git a/Source/Diagnostics/ComputeDiagFunctors/CMakeLists.txt b/Source/Diagnostics/ComputeDiagFunctors/CMakeLists.txt
index 5e0eeaab73a..2a5cc87c0cb 100644
--- a/Source/Diagnostics/ComputeDiagFunctors/CMakeLists.txt
+++ b/Source/Diagnostics/ComputeDiagFunctors/CMakeLists.txt
@@ -13,5 +13,6 @@ foreach(D IN LISTS WarpX_DIMS)
         BackTransformFunctor.cpp
         BackTransformParticleFunctor.cpp
         ParticleReductionFunctor.cpp
+        TemperatureFunctor.cpp
     )
 endforeach()
diff --git a/Source/Diagnostics/ComputeDiagFunctors/Make.package b/Source/Diagnostics/ComputeDiagFunctors/Make.package
index fd1624b8708..0fd618748e3 100644
--- a/Source/Diagnostics/ComputeDiagFunctors/Make.package
+++ b/Source/Diagnostics/ComputeDiagFunctors/Make.package
@@ -9,5 +9,6 @@ CEXE_sources += RhoFunctor.cpp
 CEXE_sources += BackTransformFunctor.cpp
 CEXE_sources += BackTransformParticleFunctor.cpp
 CEXE_sources += ParticleReductionFunctor.cpp
+CEXE_sources += TemperatureFunctor.cpp
 
 VPATH_LOCATIONS   += $(WARPX_HOME)/Source/Diagnostics/ComputeDiagFunctors
diff --git a/Source/Diagnostics/ComputeDiagFunctors/TemperatureFunctor.H b/Source/Diagnostics/ComputeDiagFunctors/TemperatureFunctor.H
new file mode 100644
index 00000000000..f6c425e74d5
--- /dev/null
+++ b/Source/Diagnostics/ComputeDiagFunctors/TemperatureFunctor.H
@@ -0,0 +1,35 @@
+#ifndef WARPX_TEMPERATUREFUNCTOR_H_
+#define WARPX_TEMPERATUREFUNCTOR_H_
+
+#include "ComputeDiagFunctor.H"
+
+#include <AMReX_BaseFwd.H>
+
+/**
+ * \brief Functor to calculate per-cell averages of particle temperature
+ */
+class TemperatureFunctor final : public ComputeDiagFunctor
+{
+public:
+    /** \brief Constructor
+     * \param[in] lev level of multifab
+     * \param[in] crse_ratio for interpolating field values from simulation MultiFabs
+                  to the output diagnostic MultiFab mf_dst
+     * \param[in] ispec index of the species over which to calculate the temperature
+     * \param[in] ncomp Number of component of mf_src to cell-center in dst multifab
+     */
+    TemperatureFunctor(int lev, amrex::IntVect crse_ratio, int ispec, int ncomp=1);
+
+    /** \brief Compute the temperature in each grid cell.
+     *
+     * \param[out] mf_dst output MultiFab where the result is written
+     * \param[in] dcomp first component of mf_dst in which cell-centered
+     *            data is stored
+     */
+    void operator()(amrex::MultiFab& mf_dst, int dcomp, int /*i_buffer=0*/) const override;
+private:
+    int const m_lev; /**< level on which mf_src is defined */
+    int const m_ispec; /**< index of species to average over */
+};
+
+#endif // WARPX_TEMPERATUREFUNCTOR_H_
diff --git a/Source/Diagnostics/ComputeDiagFunctors/TemperatureFunctor.cpp b/Source/Diagnostics/ComputeDiagFunctors/TemperatureFunctor.cpp
new file mode 100644
index 00000000000..c42f8970d5e
--- /dev/null
+++ b/Source/Diagnostics/ComputeDiagFunctors/TemperatureFunctor.cpp
@@ -0,0 +1,165 @@
+
+#include "TemperatureFunctor.H"
+
+#include "Diagnostics/ComputeDiagFunctors/ComputeDiagFunctor.H"
+#include "Particles/MultiParticleContainer.H"
+#include "Particles/WarpXParticleContainer.H"
+#include "Utils/Parser/ParserUtils.H"
+#include "WarpX.H"
+
+#include <ablastr/coarsen/sample.H>
+
+#include <AMReX_Array.H>
+#include <AMReX_BLassert.H>
+#include <AMReX_IntVect.H>
+#include <AMReX_MultiFab.H>
+#include <AMReX_REAL.H>
+
+TemperatureFunctor::TemperatureFunctor (const int lev,
+        const amrex::IntVect crse_ratio, const int ispec, const int ncomp)
+    : ComputeDiagFunctor(ncomp, crse_ratio), m_lev(lev), m_ispec(ispec)
+{
+    // Write only in one output component.
+    AMREX_ALWAYS_ASSERT(ncomp == 1);
+}
+
+void
+TemperatureFunctor::operator() (amrex::MultiFab& mf_dst, const int dcomp, const int /*i_buffer*/) const
+{
+    using namespace amrex::literals;
+    auto& warpx = WarpX::GetInstance();
+
+    // Guard cell is set to 1 for generality. However, for a cell-centered
+    // output Multifab, mf_dst, the guard-cell data is not needed especially considering
+    // the operations performend in the CoarsenAndInterpolate function.
+    constexpr int ng = 1;
+
+    // Temporary cell-centered, multi-component MultiFab for storing particles sums and result
+    amrex::MultiFab sum_mf(warpx.boxArray(m_lev), warpx.DistributionMap(m_lev), 7, ng);
+
+    auto& pc = warpx.GetPartContainer().GetParticleContainer(m_ispec);
+    amrex::Real const mass = pc.getMass();  // Note, implicit conversion from ParticleReal
+
+    WARPX_ALWAYS_ASSERT_WITH_MESSAGE(mass > 0.,
+        "The temperature diagnostic can not be calculated for a massless species.");
+
+    // Calculate the averages in two steps, first the average velocity <u>, then the
+    // average velocity squared <u - <u>>**2. This method is more robust than the
+    // single step using <u**2> - <u>**2 when <u> >> u_rms.
+    ParticleToMesh(pc, sum_mf, m_lev,
+            [=] AMREX_GPU_DEVICE (const WarpXParticleContainer::SuperParticleType& p,
+                amrex::Array4<amrex::Real> const& out_array,
+                amrex::GpuArray<amrex::Real,AMREX_SPACEDIM> const& plo,
+                amrex::GpuArray<amrex::Real,AMREX_SPACEDIM> const& dxi)
+            {
+                // Get position in AMReX convention to calculate corresponding index.
+                // Ideally this will be replaced with the AMReX NGP interpolator
+                // Always do x direction.
+                int ii = 0, jj = 0, kk = 0;
+                const amrex::ParticleReal x = p.pos(0);
+                const amrex::Real lx = (x - plo[0]) * dxi[0];
+                ii = static_cast<int>(amrex::Math::floor(lx));
+#if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_3D) || defined(WARPX_DIM_RZ)
+                const amrex::ParticleReal y = p.pos(1);
+                const amrex::Real ly = (y - plo[1]) * dxi[1];
+                jj = static_cast<int>(amrex::Math::floor(ly));
+#endif
+#if defined(WARPX_DIM_3D)
+                const amrex::ParticleReal z = p.pos(2);
+                const amrex::Real lz = (z - plo[2]) * dxi[2];
+                kk = static_cast<int>(amrex::Math::floor(lz));
+#endif
+
+                const amrex::ParticleReal w  = p.rdata(PIdx::w);
+                const amrex::ParticleReal ux = p.rdata(PIdx::ux);
+                const amrex::ParticleReal uy = p.rdata(PIdx::uy);
+                const amrex::ParticleReal uz = p.rdata(PIdx::uz);
+                amrex::Gpu::Atomic::AddNoRet(&out_array(ii, jj, kk, 0), (amrex::Real)(w));
+                amrex::Gpu::Atomic::AddNoRet(&out_array(ii, jj, kk, 1), (amrex::Real)(w*ux));
+                amrex::Gpu::Atomic::AddNoRet(&out_array(ii, jj, kk, 2), (amrex::Real)(w*uy));
+                amrex::Gpu::Atomic::AddNoRet(&out_array(ii, jj, kk, 3), (amrex::Real)(w*uz));
+            });
+
+    // Divide value by number of particles for average
+    for (amrex::MFIter mfi(sum_mf, amrex::TilingIfNotGPU()); mfi.isValid(); ++mfi)
+    {
+        const amrex::Box& box = mfi.tilebox();
+        amrex::Array4<amrex::Real> const& out_array = sum_mf.array(mfi);
+        amrex::ParallelFor(box,
+                [=] AMREX_GPU_DEVICE (int i, int j, int k) {
+                    if (out_array(i,j,k,0) > 0) {
+                        const amrex::Real invsum = 1._rt/out_array(i,j,k,0);
+                        out_array(i,j,k,1) *= invsum;
+                        out_array(i,j,k,2) *= invsum;
+                        out_array(i,j,k,3) *= invsum;
+                    }
+                });
+    }
+
+    // Calculate the sum of the squares, subtracting the averages
+    // These loops must be written out since ParticleToMesh always zeros out the mf.
+    const auto plo = pc.Geom(m_lev).ProbLoArray();
+    const auto dxi = pc.Geom(m_lev).InvCellSizeArray();
+    for (WarpXParIter pti(pc, m_lev); pti.isValid(); ++pti)
+    {
+        const long np = pti.numParticles();
+        amrex::ParticleReal* wp = pti.GetAttribs(PIdx::w).dataPtr();
+        amrex::ParticleReal* uxp = pti.GetAttribs(PIdx::ux).dataPtr();
+        amrex::ParticleReal* uyp = pti.GetAttribs(PIdx::uy).dataPtr();
+        amrex::ParticleReal* uzp = pti.GetAttribs(PIdx::uz).dataPtr();
+
+        auto const GetPosition = GetParticlePosition<PIdx>(pti);
+
+        amrex::Array4<amrex::Real> const& out_array = sum_mf.array(pti);
+
+        amrex::ParallelFor(np,
+            [=] AMREX_GPU_DEVICE (long ip) {
+                // --- Get particle quantities
+                amrex::ParticleReal xp, yp, zp;
+                GetPosition.AsStored(ip, xp, yp, zp);
+
+                // Get position in AMReX convention to calculate corresponding index.
+                int ii = 0, jj = 0, kk = 0;
+                const amrex::Real lx = (xp - plo[0]) * dxi[0];
+                ii = static_cast<int>(amrex::Math::floor(lx));
+#if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
+                const amrex::Real lz = (zp - plo[1]) * dxi[1];
+                jj = static_cast<int>(amrex::Math::floor(lz));
+#elif defined(WARPX_DIM_3D)
+                const amrex::Real ly = (yp - plo[1]) * dxi[1];
+                jj = static_cast<int>(amrex::Math::floor(ly));
+                const amrex::Real lz = (zp - plo[2]) * dxi[2];
+                kk = static_cast<int>(amrex::Math::floor(lz));
+#endif
+
+                const amrex::ParticleReal w  = wp[ip];
+                const amrex::ParticleReal ux = uxp[ip] - out_array(ii, jj, kk, 1);
+                const amrex::ParticleReal uy = uyp[ip] - out_array(ii, jj, kk, 2);
+                const amrex::ParticleReal uz = uzp[ip] - out_array(ii, jj, kk, 3);
+                amrex::Gpu::Atomic::AddNoRet(&out_array(ii, jj, kk, 4), (amrex::Real)(w*ux*ux));
+                amrex::Gpu::Atomic::AddNoRet(&out_array(ii, jj, kk, 5), (amrex::Real)(w*uy*uy));
+                amrex::Gpu::Atomic::AddNoRet(&out_array(ii, jj, kk, 6), (amrex::Real)(w*uz*uz));
+            });
+    }
+
+    // Divide the squares by number of particles for average and calculate the temperature
+    for (amrex::MFIter mfi(sum_mf, amrex::TilingIfNotGPU()); mfi.isValid(); ++mfi)
+    {
+        const amrex::Box& box = mfi.tilebox();
+        amrex::Array4<amrex::Real> const& out_array = sum_mf.array(mfi);
+        amrex::ParallelFor(box,
+                [=] AMREX_GPU_DEVICE (int i, int j, int k) {
+                    if (out_array(i,j,k,0) > 0) {
+                        const amrex::Real invsum = 1._rt/out_array(i,j,k,0);
+                        out_array(i,j,k,4) *= invsum;
+                        out_array(i,j,k,5) *= invsum;
+                        out_array(i,j,k,6) *= invsum;
+                        out_array(i,j,k,0) = mass*(out_array(i,j,k,4) + out_array(i,j,k,5) + out_array(i,j,k,6))/(3._rt*PhysConst::q_e);
+                    }
+                });
+    }
+
+    // Coarsen and interpolate from sum_mf to the output diagnostic MultiFab, mf_dst.
+    ablastr::coarsen::sample::Coarsen(mf_dst, sum_mf, dcomp, 0, nComp(), 0, m_crse_ratio);
+
+}
diff --git a/Source/Diagnostics/Diagnostics.H b/Source/Diagnostics/Diagnostics.H
index c0d2a9f0d53..20550364fb7 100644
--- a/Source/Diagnostics/Diagnostics.H
+++ b/Source/Diagnostics/Diagnostics.H
@@ -304,6 +304,8 @@ protected:
     int m_num_buffers;
     /** Array of species indices that dump rho per species */
     amrex::Vector<int> m_rho_per_species_index;
+    /** Array of species indices that dump temperature per species */
+    amrex::Vector<int> m_T_per_species_index;
     /** Vector of particle buffer vectors for each snapshot */
     amrex::Vector< amrex::Vector<std::unique_ptr<PinnedMemoryParticleContainer> > > m_particles_buffer;
     /** Vector of pointers to functors to compute particle output per species*/
diff --git a/Source/Diagnostics/Diagnostics.cpp b/Source/Diagnostics/Diagnostics.cpp
index 3b5daabaffa..f6e2da74127 100644
--- a/Source/Diagnostics/Diagnostics.cpp
+++ b/Source/Diagnostics/Diagnostics.cpp
@@ -278,6 +278,30 @@ Diagnostics::BaseReadParameters ()
                 + ".fields_to_plot does not match any species"
             );
         }
+        // Check if m_varnames contains a string of the form T_<species_name>
+        if (var.rfind("T_", 0) == 0) {
+            // Extract species name from the string T_<species_name>
+            const std::string species = var.substr(var.find("T_") + 2);
+            // Boolean used to check if species name was misspelled
+            bool species_name_is_wrong = true;
+            // Loop over all species
+            for (int i = 0, n = int(m_all_species_names.size()); i < n; i++) {
+                // Check if species name extracted from the string T_<species_name>
+                // matches any of the species in the simulation
+                if (species == m_all_species_names[i]) {
+                    // Store species index: will be used in TemperatureFunctor to dump
+                    // T for this species
+                    m_T_per_species_index.push_back(i);
+                    species_name_is_wrong = false;
+                }
+            }
+            // If species name was misspelled, abort with error message
+            WARPX_ALWAYS_ASSERT_WITH_MESSAGE(
+                !species_name_is_wrong,
+                "Input error: string " + var + " in " + m_diag_name
+                + ".fields_to_plot does not match any species"
+            );
+        }
     }
 
     const bool checkpoint_compatibility = (
diff --git a/Source/Diagnostics/FullDiagnostics.cpp b/Source/Diagnostics/FullDiagnostics.cpp
index b25f899e29d..55af73d6408 100644
--- a/Source/Diagnostics/FullDiagnostics.cpp
+++ b/Source/Diagnostics/FullDiagnostics.cpp
@@ -8,6 +8,7 @@
 #include "ComputeDiagFunctors/PartPerCellFunctor.H"
 #include "ComputeDiagFunctors/PartPerGridFunctor.H"
 #include "ComputeDiagFunctors/ParticleReductionFunctor.H"
+#include "ComputeDiagFunctors/TemperatureFunctor.H"
 #include "ComputeDiagFunctors/RhoFunctor.H"
 #include "Diagnostics/Diagnostics.H"
 #include "Diagnostics/ParticleDiag/ParticleDiag.H"
@@ -186,6 +187,8 @@ FullDiagnostics::InitializeFieldFunctorsRZopenPMD (int lev)
 
     // Species index to loop over species that dump rho per species
     int i = 0;
+    // Species index to loop over species that dump temperature per species
+    int i_T_species = 0;
     const int ncomp = ncomp_multimodefab;
     // This function is called multiple times, for different values of `lev`
     // but the `varnames` need only be updated once.
@@ -303,6 +306,13 @@ FullDiagnostics::InitializeFieldFunctorsRZopenPMD (int lev)
                 AddRZModesToOutputNames(std::string("rho_") + m_all_species_names[m_rho_per_species_index[i]], ncomp);
             }
             i++;
+        } else if ( m_varnames_fields[comp].rfind("T_", 0) == 0 ){
+            // Initialize temperature functor to dump temperature per species
+            m_all_field_functors[lev][comp] = std::make_unique<TemperatureFunctor>(lev, m_crse_ratio, m_T_per_species_index[i_T_species]);
+            if (update_varnames) {
+                AddRZModesToOutputNames(std::string("T_") + m_all_species_names[m_T_per_species_index[i_T_species]], ncomp);
+            }
+            i_T_species++;
         } else if ( m_varnames_fields[comp] == "F" ){
             m_all_field_functors[lev][comp] = std::make_unique<CellCenterFunctor>(warpx.getFieldPointer(FieldType::F_fp, lev), lev, m_crse_ratio,
                                                         false, ncomp);
@@ -637,6 +647,9 @@ FullDiagnostics::InitializeFieldFunctors (int lev)
     // Species index to loop over species that dump rho per species
     int i = 0;
 
+    // Species index to loop over species that dump temperature per species
+    int i_T_species = 0;
+
     const auto nvar = static_cast<int>(m_varnames_fields.size());
     const auto nspec = static_cast<int>(m_pfield_species.size());
     const auto ntot = static_cast<int>(nvar + m_pfield_varnames.size() * nspec);
@@ -666,6 +679,10 @@ FullDiagnostics::InitializeFieldFunctors (int lev)
             // Initialize rho functor to dump rho per species
             m_all_field_functors[lev][comp] = std::make_unique<RhoFunctor>(lev, m_crse_ratio, true, m_rho_per_species_index[i]);
             i++;
+        } else if ( m_varnames[comp].rfind("T_", 0) == 0 ){
+            // Initialize temperature functor to dump temperature per species
+            m_all_field_functors[lev][comp] = std::make_unique<TemperatureFunctor>(lev, m_crse_ratio, m_T_per_species_index[i_T_species]);
+            i_T_species++;
         } else if ( m_varnames[comp] == "F" ){
             m_all_field_functors[lev][comp] = std::make_unique<CellCenterFunctor>(warpx.getFieldPointer(FieldType::F_fp, lev), lev, m_crse_ratio);
         } else if ( m_varnames[comp] == "G" ){

From e80ca983aa27b023b0528c0ed2875a5745ea6599 Mon Sep 17 00:00:00 2001
From: David Grote <grote1@llnl.gov>
Date: Fri, 7 Jun 2024 17:54:08 -0700
Subject: [PATCH 04/11] Cleanup diagnostic dimension macros (#4973)

* Clean up BeamRelavent diagnostics, using get_particle_position

* In Field diagnostics, simplify calculation of dV

* Clean up of ParticleExtrema

* In WarpXOpenPMD, removed duplicate particle coordinate transformation with RZ

* Added more amrex prefixes in ParticleExtrema

* Fix const

* Small cleanup in WarpXOpenPMD

* Clean up calculation of dV

* Cleanup call to CellSize

* Fix in FieldEnergy
---
 .../Diagnostics/ReducedDiags/BeamRelevant.cpp |  54 +---
 .../Diagnostics/ReducedDiags/FieldEnergy.cpp  |  14 +-
 .../ReducedDiags/FieldMomentum.cpp            |  12 +-
 .../ReducedDiags/ParticleExtrema.cpp          | 271 ++++++------------
 Source/Diagnostics/WarpXOpenPMD.cpp           |  43 +--
 5 files changed, 119 insertions(+), 275 deletions(-)

diff --git a/Source/Diagnostics/ReducedDiags/BeamRelevant.cpp b/Source/Diagnostics/ReducedDiags/BeamRelevant.cpp
index ae7d5230b4c..9b5a28a3516 100644
--- a/Source/Diagnostics/ReducedDiags/BeamRelevant.cpp
+++ b/Source/Diagnostics/ReducedDiags/BeamRelevant.cpp
@@ -175,15 +175,6 @@ void BeamRelevant::ComputeDiags (int step)
     // inverse of speed of light squared
     Real constexpr inv_c2 = 1.0_rt / (PhysConst::c * PhysConst::c);
 
-    // If 2D-XZ, p.pos(1) is z, rather than p.pos(2).
-#if (defined WARPX_DIM_3D)
-    int const index_z = 2;
-#elif (defined WARPX_DIM_XZ || defined WARPX_DIM_RZ)
-    int const index_z = 1;
-#elif (defined WARPX_DIM_1D_Z)
-    int const index_z = 0;
-#endif
-
     // loop over species
     for (int i_s = 0; i_s < nSpecies; ++i_s)
     {
@@ -212,26 +203,14 @@ void BeamRelevant::ComputeDiags (int step)
                 const ParticleReal p_uy = p.rdata(PIdx::uy);
                 const ParticleReal p_uz = p.rdata(PIdx::uz);
                 const ParticleReal p_us = p_ux*p_ux + p_uy*p_uy + p_uz*p_uz;
-                const ParticleReal p_pos0 = p.pos(0);
                 const ParticleReal p_w = p.rdata(PIdx::w);
 
-#if defined(WARPX_DIM_3D)
-                const ParticleReal p_pos1 = p.pos(1);
-                const ParticleReal p_x_mean = p_pos0*p_w;
-                const ParticleReal p_y_mean = p_pos1*p_w;
-#elif defined(WARPX_DIM_RZ)
-                const ParticleReal p_theta = p.rdata(PIdx::theta);
-                const ParticleReal p_x_mean = p_pos0*std::cos(p_theta)*p_w;
-                const ParticleReal p_y_mean = p_pos0*std::sin(p_theta)*p_w;
-#elif defined(WARPX_DIM_XZ)
-                const ParticleReal p_x_mean = p_pos0*p_w;
-                const ParticleReal p_y_mean = 0;
-#elif defined(WARPX_DIM_1D_Z)
-                amrex::ignore_unused(p_pos0);
-                const ParticleReal p_x_mean = 0;
-                const ParticleReal p_y_mean = 0;
-#endif
-                const ParticleReal p_z_mean = p.pos(index_z)*p_w;
+                ParticleReal p_x, p_y, p_z;
+                get_particle_position(p, p_x, p_y, p_z);
+
+                const ParticleReal p_x_mean = p_x*p_w;
+                const ParticleReal p_y_mean = p_y*p_w;
+                const ParticleReal p_z_mean = p_z*p_w;
 
                 const ParticleReal p_ux_mean = p_ux*p_w;
                 const ParticleReal p_uy_mean = p_uy*p_w;
@@ -292,25 +271,8 @@ void BeamRelevant::ComputeDiags (int step)
                 const ParticleReal p_gm = std::sqrt(1.0_rt+p_us*inv_c2);
                 const ParticleReal p_w = p.rdata(PIdx::w);
 
-#if (defined WARPX_DIM_1D_Z)
-                const ParticleReal p_x = 0.0;
-                const ParticleReal p_y = 0.0;
-#elif (defined WARPX_DIM_RZ)
-                const ParticleReal p_pos0 = p.pos(0);
-                const ParticleReal p_theta = p.rdata(PIdx::theta);
-                const ParticleReal p_x = p_pos0*std::cos(p_theta);
-                const ParticleReal p_y = p_pos0*std::sin(p_theta);
-#elif (defined WARPX_DIM_XZ)
-                const ParticleReal p_pos0 = p.pos(0);
-                const ParticleReal p_x = p_pos0;
-                const ParticleReal p_y = 0.0;
-#else
-                const ParticleReal p_pos0 = p.pos(0);
-                const ParticleReal p_pos1 = p.pos(1);
-                const ParticleReal p_x = p_pos0;
-                const ParticleReal p_y = p_pos1;
-#endif
-                const ParticleReal p_z = p.pos(index_z);
+                ParticleReal p_x, p_y, p_z;
+                get_particle_position(p, p_x, p_y, p_z);
 
                 const ParticleReal p_x_ms = (p_x-x_mean)*(p_x-x_mean)*p_w;
                 const ParticleReal p_y_ms = (p_y-y_mean)*(p_y-y_mean)*p_w;
diff --git a/Source/Diagnostics/ReducedDiags/FieldEnergy.cpp b/Source/Diagnostics/ReducedDiags/FieldEnergy.cpp
index f4b4e2a39a1..40ef1a088e6 100644
--- a/Source/Diagnostics/ReducedDiags/FieldEnergy.cpp
+++ b/Source/Diagnostics/ReducedDiags/FieldEnergy.cpp
@@ -98,15 +98,9 @@ void FieldEnergy::ComputeDiags (int step)
         const MultiFab & By = warpx.getField(FieldType::Bfield_aux, lev,1);
         const MultiFab & Bz = warpx.getField(FieldType::Bfield_aux, lev,2);
 
-        // get cell size
-        Geometry const & geom = warpx.Geom(lev);
-#if defined(WARPX_DIM_1D_Z)
-        auto dV = geom.CellSize(0);
-#elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
-        auto dV = geom.CellSize(0) * geom.CellSize(1);
-#elif defined(WARPX_DIM_3D)
-        auto dV = geom.CellSize(0) * geom.CellSize(1) * geom.CellSize(2);
-#endif
+        // get cell volume
+        const std::array<Real, 3> &dx = WarpX::CellSize(lev);
+        const amrex::Real dV = dx[0]*dx[1]*dx[2];
 
 #if defined(WARPX_DIM_RZ)
         amrex::Real const tmpEx = ComputeNorm2RZ(Ex, lev);
@@ -119,6 +113,8 @@ void FieldEnergy::ComputeDiags (int step)
         amrex::Real const tmpBz = ComputeNorm2RZ(Bz, lev);
         amrex::Real const Bs = tmpBx + tmpBy + tmpBz;
 #else
+        Geometry const & geom = warpx.Geom(lev);
+
         // compute E squared
         Real const tmpEx = Ex.norm2(0,geom.periodicity());
         Real const tmpEy = Ey.norm2(0,geom.periodicity());
diff --git a/Source/Diagnostics/ReducedDiags/FieldMomentum.cpp b/Source/Diagnostics/ReducedDiags/FieldMomentum.cpp
index f182acd5ba2..7eb16efecff 100644
--- a/Source/Diagnostics/ReducedDiags/FieldMomentum.cpp
+++ b/Source/Diagnostics/ReducedDiags/FieldMomentum.cpp
@@ -183,15 +183,9 @@ void FieldMomentum::ComputeDiags (int step)
         amrex::Real ExB_z = amrex::get<2>(r);
         amrex::ParallelDescriptor::ReduceRealSum({ExB_x,ExB_y,ExB_z});
 
-        // Get cell size
-        amrex::Geometry const & geom = warpx.Geom(lev);
-#if   defined(WARPX_DIM_1D_Z)
-        auto dV = geom.CellSize(0);
-#elif   defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
-        auto dV = geom.CellSize(0) * geom.CellSize(1);
-#elif defined(WARPX_DIM_3D)
-        auto dV = geom.CellSize(0) * geom.CellSize(1) * geom.CellSize(2);
-#endif
+        // Get cell volume
+        const std::array<Real, 3> &dx = WarpX::CellSize(lev);
+        const amrex::Real dV = dx[0]*dx[1]*dx[2];
 
         // Save data (offset: 3 values for each refinement level)
         const int offset = lev*3;
diff --git a/Source/Diagnostics/ReducedDiags/ParticleExtrema.cpp b/Source/Diagnostics/ReducedDiags/ParticleExtrema.cpp
index 0cc5429be7a..9adfd3f238c 100644
--- a/Source/Diagnostics/ReducedDiags/ParticleExtrema.cpp
+++ b/Source/Diagnostics/ReducedDiags/ParticleExtrema.cpp
@@ -51,7 +51,7 @@
 #include <map>
 #include <vector>
 
-using namespace amrex;
+using namespace amrex::literals;
 using namespace warpx::fields;
 
 // constructor
@@ -59,7 +59,7 @@ ParticleExtrema::ParticleExtrema (const std::string& rd_name)
 : ReducedDiags{rd_name}
 {
     // read species name
-    const ParmParse pp_rd_name(rd_name);
+    const amrex::ParmParse pp_rd_name(rd_name);
     pp_rd_name.get("species",m_species_name);
 
     // get WarpX class object
@@ -122,7 +122,7 @@ ParticleExtrema::ParticleExtrema (const std::string& rd_name)
 
         m_data.resize(all_diag_names.size());
 
-        if (ParallelDescriptor::IOProcessor())
+        if (amrex::ParallelDescriptor::IOProcessor())
         {
             if ( m_write_header )
             {
@@ -165,16 +165,7 @@ void ParticleExtrema::ComputeDiags (int step)
     const auto species_names = mypc.GetSpeciesNames();
 
     // inverse of speed of light squared
-    Real constexpr inv_c2 = 1.0_rt / (PhysConst::c * PhysConst::c);
-
-    // If 2D-XZ, p.pos(1) is z, rather than p.pos(2).
-#if (defined WARPX_DIM_3D)
-    int const index_z = 2;
-#elif (defined WARPX_DIM_XZ || defined WARPX_DIM_RZ)
-    int const index_z = 1;
-#elif (defined WARPX_DIM_1D_Z)
-    int const index_z = 0;
-#endif
+    amrex::Real constexpr inv_c2 = 1.0_rt / (PhysConst::c * PhysConst::c);
 
     // loop over species
     for (int i_s = 0; i_s < nSpecies; ++i_s)
@@ -193,172 +184,72 @@ void ParticleExtrema::ComputeDiags (int step)
         }
 
         using PType = typename WarpXParticleContainer::SuperParticleType;
-
-        // xmin
-#if (defined WARPX_DIM_RZ)
-        Real xmin = ReduceMin( myspc,
-        [=] AMREX_GPU_HOST_DEVICE (const PType& p)
-        { return p.pos(0)*std::cos(p.rdata(PIdx::theta)); });
-#elif (defined WARPX_DIM_1D_Z)
-        Real xmin = 0.0_rt;
-#else
-        Real xmin = ReduceMin( myspc,
-        [=] AMREX_GPU_HOST_DEVICE (const PType& p)
-        { return p.pos(0); });
-#endif
-
-        // xmax
-#if (defined WARPX_DIM_RZ)
-        Real xmax = ReduceMax( myspc,
-        [=] AMREX_GPU_HOST_DEVICE (const PType& p)
-        { return p.pos(0)*std::cos(p.rdata(PIdx::theta)); });
-#elif (defined WARPX_DIM_1D_Z)
-        Real xmax = 0.0_rt;
-#else
-        Real xmax = ReduceMax( myspc,
-        [=] AMREX_GPU_HOST_DEVICE (const PType& p)
-        { return p.pos(0); });
-#endif
-
-        // ymin
-#if (defined WARPX_DIM_RZ)
-        Real ymin = ReduceMin( myspc,
-        [=] AMREX_GPU_HOST_DEVICE (const PType& p)
-        { return p.pos(0)*std::sin(p.rdata(PIdx::theta)); });
-#elif (defined WARPX_DIM_XZ || WARPX_DIM_1D_Z)
-        Real ymin = 0.0_rt;
-#else
-        Real ymin = ReduceMin( myspc,
-        [=] AMREX_GPU_HOST_DEVICE (const PType& p)
-        { return p.pos(1); });
-#endif
-
-        // ymax
-#if (defined WARPX_DIM_RZ)
-        Real ymax = ReduceMax( myspc,
-        [=] AMREX_GPU_HOST_DEVICE (const PType& p)
-        { return p.pos(0)*std::sin(p.rdata(PIdx::theta)); });
-#elif (defined WARPX_DIM_XZ || WARPX_DIM_1D_Z)
-        Real ymax = 0.0_rt;
-#else
-        Real ymax = ReduceMax( myspc,
-        [=] AMREX_GPU_HOST_DEVICE (const PType& p)
-        { return p.pos(1); });
-#endif
-
-        // zmin
-        Real zmin = ReduceMin( myspc,
-        [=] AMREX_GPU_HOST_DEVICE (const PType& p)
-        { return p.pos(index_z); });
-
-        // zmax
-        Real zmax = ReduceMax( myspc,
-        [=] AMREX_GPU_HOST_DEVICE (const PType& p)
-        { return p.pos(index_z); });
-
-        // uxmin
-        Real uxmin = ReduceMin( myspc,
-        [=] AMREX_GPU_HOST_DEVICE (const PType& p)
-        { return p.rdata(PIdx::ux); });
-
-        // uxmax
-        Real uxmax = ReduceMax( myspc,
-        [=] AMREX_GPU_HOST_DEVICE (const PType& p)
-        { return p.rdata(PIdx::ux); });
-
-        // uymin
-        Real uymin = ReduceMin( myspc,
-        [=] AMREX_GPU_HOST_DEVICE (const PType& p)
-        { return p.rdata(PIdx::uy); });
-
-        // uymax
-        Real uymax = ReduceMax( myspc,
-        [=] AMREX_GPU_HOST_DEVICE (const PType& p)
-        { return p.rdata(PIdx::uy); });
-
-        // uzmin
-        Real uzmin = ReduceMin( myspc,
-        [=] AMREX_GPU_HOST_DEVICE (const PType& p)
-        { return p.rdata(PIdx::uz); });
-
-        // uzmax
-        Real uzmax = ReduceMax( myspc,
-        [=] AMREX_GPU_HOST_DEVICE (const PType& p)
-        { return p.rdata(PIdx::uz); });
-
-        // gmin
-        Real gmin = 0.0_rt;
-        if ( is_photon ) {
-            gmin = ReduceMin( myspc,
-            [=] AMREX_GPU_HOST_DEVICE (const PType& p)
+        using OpMin = amrex::ReduceOpMin;
+        using OpMax = amrex::ReduceOpMax;
+
+        amrex::ReduceOps<OpMin, OpMin, OpMin, OpMin,
+                         OpMax, OpMax, OpMax, OpMax> reduce_ops;
+        auto posminmax = amrex::ParticleReduce<amrex::ReduceData<amrex::Real, amrex::Real, amrex::Real, amrex::Real,
+                                                                 amrex::Real, amrex::Real, amrex::Real, amrex::Real>>(
+            myspc,
+            [=] AMREX_GPU_DEVICE(const PType& p) noexcept -> amrex::GpuTuple<amrex::Real, amrex::Real, amrex::Real, amrex::Real,
+                                                                             amrex::Real, amrex::Real, amrex::Real, amrex::Real>
             {
-                const Real ux = p.rdata(PIdx::ux);
-                const Real uy = p.rdata(PIdx::uy);
-                const Real uz = p.rdata(PIdx::uz);
-                const Real us = ux*ux + uy*uy + uz*uz;
-                return std::sqrt(us*inv_c2);
-            });
-        } else {
-            gmin = ReduceMin( myspc,
-            [=] AMREX_GPU_HOST_DEVICE (const PType& p)
+                amrex::ParticleReal x, y, z;
+                get_particle_position(p, x, y, z);
+                amrex::Real const w = p.rdata(PIdx::w);
+                return {w, x, y, z, w, x, y, z};
+            },
+            reduce_ops);
+
+        amrex::Real wmin = amrex::get<0>(posminmax);
+        amrex::Real xmin = amrex::get<1>(posminmax);
+        amrex::Real ymin = amrex::get<2>(posminmax);
+        amrex::Real zmin = amrex::get<3>(posminmax);
+        amrex::Real wmax = amrex::get<4>(posminmax);
+        amrex::Real xmax = amrex::get<5>(posminmax);
+        amrex::Real ymax = amrex::get<6>(posminmax);
+        amrex::Real zmax = amrex::get<7>(posminmax);
+
+        amrex::Real const gfactor = (is_photon ? 0._rt : 1._rt);
+        auto uminmax = amrex::ParticleReduce<amrex::ReduceData<amrex::Real, amrex::Real, amrex::Real, amrex::Real,
+                                                               amrex::Real, amrex::Real, amrex::Real, amrex::Real>>(
+            myspc,
+            [=] AMREX_GPU_DEVICE(const PType& p) noexcept -> amrex::GpuTuple<amrex::Real, amrex::Real, amrex::Real, amrex::Real,
+                                                                             amrex::Real, amrex::Real, amrex::Real, amrex::Real>
             {
-                const Real ux = p.rdata(PIdx::ux);
-                const Real uy = p.rdata(PIdx::uy);
-                const Real uz = p.rdata(PIdx::uz);
-                const Real us = ux*ux + uy*uy + uz*uz;
-                return std::sqrt(1.0_rt + us*inv_c2);
-            });
-        }
-
-        // gmax
-        Real gmax = 0.0_rt;
-        if ( is_photon ) {
-            gmax = ReduceMax( myspc,
-            [=] AMREX_GPU_HOST_DEVICE (const PType& p)
-            {
-                const Real ux = p.rdata(PIdx::ux);
-                const Real uy = p.rdata(PIdx::uy);
-                const Real uz = p.rdata(PIdx::uz);
-                const Real us = ux*ux + uy*uy + uz*uz;
-                return std::sqrt(us*inv_c2);
-            });
-        } else {
-            gmax = ReduceMax( myspc,
-            [=] AMREX_GPU_HOST_DEVICE (const PType& p)
-            {
-                const Real ux = p.rdata(PIdx::ux);
-                const Real uy = p.rdata(PIdx::uy);
-                const Real uz = p.rdata(PIdx::uz);
-                const Real us = ux*ux + uy*uy + uz*uz;
-                return std::sqrt(1.0_rt + us*inv_c2);
-            });
-        }
-
-        // wmin
-        Real wmin = ReduceMin( myspc,
-        [=] AMREX_GPU_HOST_DEVICE (const PType& p)
-        { return p.rdata(PIdx::w); });
-
-        // wmax
-        Real wmax = ReduceMax( myspc,
-        [=] AMREX_GPU_HOST_DEVICE (const PType& p)
-        { return p.rdata(PIdx::w); });
-
-        ParallelDescriptor::ReduceRealMin({xmin,ymin,zmin,uxmin,uymin,uzmin,gmin,wmin});
-        ParallelDescriptor::ReduceRealMax({xmax,ymax,zmax,uxmax,uymax,uzmax,gmax,wmax});
+                amrex::Real const ux = p.rdata(PIdx::ux);
+                amrex::Real const uy = p.rdata(PIdx::uy);
+                amrex::Real const uz = p.rdata(PIdx::uz);
+                amrex::Real const g = std::sqrt(gfactor + (ux*ux + uy*uy + uz*uz)*inv_c2);
+                return {g, ux, uy, uz, g, ux, uy, uz};
+            },
+            reduce_ops);
+
+        amrex::Real gmin = amrex::get<0>(uminmax);
+        amrex::Real uxmin = amrex::get<1>(uminmax);
+        amrex::Real uymin = amrex::get<2>(uminmax);
+        amrex::Real uzmin = amrex::get<3>(uminmax);
+        amrex::Real gmax = amrex::get<4>(uminmax);
+        amrex::Real uxmax = amrex::get<5>(uminmax);
+        amrex::Real uymax = amrex::get<6>(uminmax);
+        amrex::Real uzmax = amrex::get<7>(uminmax);
+
+        amrex::ParallelDescriptor::ReduceRealMin({xmin,ymin,zmin,uxmin,uymin,uzmin,gmin,wmin});
+        amrex::ParallelDescriptor::ReduceRealMax({xmax,ymax,zmax,uxmax,uymax,uzmax,gmax,wmax});
 
 #if (defined WARPX_QED)
         // get number of level (int)
         const auto level_number = WarpX::GetInstance().finestLevel();
 
         // compute chimin and chimax
-        Real chimin_f = 0.0_rt;
-        Real chimax_f = 0.0_rt;
+        amrex::Real chimin_f = 0.0_rt;
+        amrex::Real chimax_f = 0.0_rt;
 
         if (myspc.DoQED())
         {
             // declare chi arrays
-            std::vector<Real> chimin, chimax;
+            std::vector<amrex::Real> chimin, chimax;
             chimin.resize(level_number+1,0.0_rt);
             chimax.resize(level_number+1,0.0_rt);
 
@@ -374,17 +265,17 @@ void ParticleExtrema::ComputeDiags (int step)
             {
                 // define variables in preparation for field gathering
                 const std::array<amrex::Real,3>& dx = WarpX::CellSize(std::max(lev, 0));
-                const GpuArray<amrex::Real, 3> dx_arr = {dx[0], dx[1], dx[2]};
-                const MultiFab & Ex = warpx.getField(FieldType::Efield_aux, lev,0);
-                const MultiFab & Ey = warpx.getField(FieldType::Efield_aux, lev,1);
-                const MultiFab & Ez = warpx.getField(FieldType::Efield_aux, lev,2);
-                const MultiFab & Bx = warpx.getField(FieldType::Bfield_aux, lev,0);
-                const MultiFab & By = warpx.getField(FieldType::Bfield_aux, lev,1);
-                const MultiFab & Bz = warpx.getField(FieldType::Bfield_aux, lev,2);
+                const amrex::GpuArray<amrex::Real, 3> dx_arr = {dx[0], dx[1], dx[2]};
+                const amrex::MultiFab & Ex = warpx.getField(FieldType::Efield_aux, lev,0);
+                const amrex::MultiFab & Ey = warpx.getField(FieldType::Efield_aux, lev,1);
+                const amrex::MultiFab & Ez = warpx.getField(FieldType::Efield_aux, lev,2);
+                const amrex::MultiFab & Bx = warpx.getField(FieldType::Bfield_aux, lev,0);
+                const amrex::MultiFab & By = warpx.getField(FieldType::Bfield_aux, lev,1);
+                const amrex::MultiFab & Bz = warpx.getField(FieldType::Bfield_aux, lev,2);
 
                 // declare reduce_op
-                ReduceOps<ReduceOpMin, ReduceOpMax> reduce_op;
-                ReduceData<Real, Real> reduce_data(reduce_op);
+                amrex::ReduceOps<amrex::ReduceOpMin, amrex::ReduceOpMax> reduce_op;
+                amrex::ReduceData<amrex::Real, amrex::Real> reduce_data(reduce_op);
                 using ReduceTuple = typename decltype(reduce_data)::Type;
 
                 // Loop over boxes
@@ -408,28 +299,28 @@ void ParticleExtrema::ComputeDiags (int step)
                     // define variables in preparation for field gathering
                     amrex::Box box = pti.tilebox();
                     box.grow(ngEB);
-                    const Dim3 lo = amrex::lbound(box);
+                    const amrex::Dim3 lo = amrex::lbound(box);
                     const std::array<amrex::Real, 3>& xyzmin = WarpX::LowerCorner(box, lev, 0._rt);
-                    const GpuArray<amrex::Real, 3> xyzmin_arr = {xyzmin[0], xyzmin[1], xyzmin[2]};
+                    const amrex::GpuArray<amrex::Real, 3> xyzmin_arr = {xyzmin[0], xyzmin[1], xyzmin[2]};
                     const auto& ex_arr = Ex[pti].array();
                     const auto& ey_arr = Ey[pti].array();
                     const auto& ez_arr = Ez[pti].array();
                     const auto& bx_arr = Bx[pti].array();
                     const auto& by_arr = By[pti].array();
                     const auto& bz_arr = Bz[pti].array();
-                    const IndexType ex_type = Ex[pti].box().ixType();
-                    const IndexType ey_type = Ey[pti].box().ixType();
-                    const IndexType ez_type = Ez[pti].box().ixType();
-                    const IndexType bx_type = Bx[pti].box().ixType();
-                    const IndexType by_type = By[pti].box().ixType();
-                    const IndexType bz_type = Bz[pti].box().ixType();
+                    const amrex::IndexType ex_type = Ex[pti].box().ixType();
+                    const amrex::IndexType ey_type = Ey[pti].box().ixType();
+                    const amrex::IndexType ez_type = Ez[pti].box().ixType();
+                    const amrex::IndexType bx_type = Bx[pti].box().ixType();
+                    const amrex::IndexType by_type = By[pti].box().ixType();
+                    const amrex::IndexType bz_type = Bz[pti].box().ixType();
 
                     // evaluate reduce_op
                     reduce_op.eval(pti.numParticles(), reduce_data,
                     [=] AMREX_GPU_DEVICE (int i) -> ReduceTuple
                     {
                         // get external fields
-                        ParticleReal xp, yp, zp;
+                        amrex::ParticleReal xp, yp, zp;
                         GetPosition(i, xp, yp, zp);
                         amrex::ParticleReal ex = Ex_external_particle;
                         amrex::ParticleReal ey = Ey_external_particle;
@@ -449,7 +340,7 @@ void ParticleExtrema::ComputeDiags (int step)
                             dx_arr, xyzmin_arr, lo,
                             n_rz_azimuthal_modes, nox, galerkin_interpolation);
                         // compute chi
-                        Real chi = 0.0_rt;
+                        amrex::Real chi = 0.0_rt;
                         if ( is_photon ) {
                             chi = QedUtils::chi_photon(ux[i]*m, uy[i]*m, uz[i]*m,
                                              ex, ey, ez, bx, by, bz);
@@ -461,13 +352,13 @@ void ParticleExtrema::ComputeDiags (int step)
                     });
                 }
                 auto val = reduce_data.value();
-                chimin[lev] = get<0>(val);
-                chimax[lev] = get<1>(val);
+                chimin[lev] = amrex::get<0>(val);
+                chimax[lev] = amrex::get<1>(val);
             }
             chimin_f = *std::min_element(chimin.begin(), chimin.end());
             chimax_f = *std::max_element(chimax.begin(), chimax.end());
-            ParallelDescriptor::ReduceRealMin(chimin_f, ParallelDescriptor::IOProcessorNumber());
-            ParallelDescriptor::ReduceRealMax(chimax_f, ParallelDescriptor::IOProcessorNumber());
+            amrex::ParallelDescriptor::ReduceRealMin(chimin_f, amrex::ParallelDescriptor::IOProcessorNumber());
+            amrex::ParallelDescriptor::ReduceRealMax(chimax_f, amrex::ParallelDescriptor::IOProcessorNumber());
         }
 #endif
 
diff --git a/Source/Diagnostics/WarpXOpenPMD.cpp b/Source/Diagnostics/WarpXOpenPMD.cpp
index 45d5dc1cdee..36827bd316a 100644
--- a/Source/Diagnostics/WarpXOpenPMD.cpp
+++ b/Source/Diagnostics/WarpXOpenPMD.cpp
@@ -911,33 +911,34 @@ WarpXOpenPMDPlot::SaveRealProperty (ParticleIter& pti,
     {
         auto const real_counter = std::min(write_real_comp.size(), real_comp_names.size());
 
+#if defined(WARPX_DIM_RZ)
         // reconstruct Cartesian positions for RZ simulations
         // r,z,theta -> x,y,z
-#if defined(WARPX_DIM_RZ)
-        auto const * const r = soa.GetRealData(PIdx::x).data();
-        auto const * const theta = soa.GetRealData(PIdx::theta).data();
+        // If each comp is being written, create a temporary array, otherwise create an empty array.
+        std::shared_ptr<amrex::ParticleReal> const x(
+            new amrex::ParticleReal[(write_real_comp[0] ? numParticleOnTile : 0)],
+            [](amrex::ParticleReal const *p) { delete[] p; }
+        );
+        std::shared_ptr<amrex::ParticleReal> const y(
+            new amrex::ParticleReal[(write_real_comp[1] ? numParticleOnTile : 0)],
+            [](amrex::ParticleReal const *p) { delete[] p; }
+        );
 
+        const auto& tile = pti.GetParticleTile();
+        const auto& ptd = tile.getConstParticleTileData();
+
+        for (int i = 0; i < numParticleOnTile; ++i) {
+            const auto& p = ptd.getSuperParticle(i);
+            amrex::ParticleReal xp, yp, zp;
+            get_particle_position(p, xp, yp, zp);
+            if (write_real_comp[0]) { x.get()[i] = xp; }
+            if (write_real_comp[1]) { y.get()[i] = yp; }
+        }
         if (write_real_comp[0]) {
-            std::shared_ptr<amrex::ParticleReal> const x(
-                new amrex::ParticleReal[numParticleOnTile],
-                [](amrex::ParticleReal const *p) { delete[] p; }
-            );
-            for (int i = 0; i < numParticleOnTile; ++i) {
-                x.get()[i] = r[i] * std::cos(theta[i]);
-            }
-            getComponentRecord(real_comp_names[0]).storeChunk(
-                x, {offset}, {numParticleOnTile64});
+            getComponentRecord(real_comp_names[0]).storeChunk(x, {offset}, {numParticleOnTile64});
         }
         if (write_real_comp[1]) {
-            std::shared_ptr<amrex::ParticleReal> const y(
-                new amrex::ParticleReal[numParticleOnTile],
-                [](amrex::ParticleReal const *p) { delete[] p; }
-            );
-            for (int i = 0; i < numParticleOnTile; ++i) {
-                y.get()[i] = r[i] * std::sin(theta[i]);
-            }
-            getComponentRecord(real_comp_names[1]).storeChunk(
-                y, {offset}, {numParticleOnTile64});
+            getComponentRecord(real_comp_names[1]).storeChunk(y, {offset}, {numParticleOnTile64});
         }
 #endif
 

From 7269f09307ce608428bdfc406544c0df7284ab40 Mon Sep 17 00:00:00 2001
From: Luca Fedeli <luca.fedeli@cea.fr>
Date: Tue, 11 Jun 2024 01:03:29 +0200
Subject: [PATCH 05/11] slightly increase tolerance for embedded_circle test
 (#4968)

---
 Examples/Tests/embedded_circle/inputs_2d | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Examples/Tests/embedded_circle/inputs_2d b/Examples/Tests/embedded_circle/inputs_2d
index 7100f245917..7c76c1659da 100644
--- a/Examples/Tests/embedded_circle/inputs_2d
+++ b/Examples/Tests/embedded_circle/inputs_2d
@@ -5,7 +5,7 @@
 max_step = 11
 warpx.const_dt = 3.99e-13
 warpx.do_electrostatic = labframe
-warpx.self_fields_required_precision = 1e-06
+warpx.self_fields_required_precision = 2e-06
 warpx.eb_implicit_function = -((x-0.00005)**2+(z-0.00005)**2-1e-05**2)
 warpx.eb_potential(x,y,z,t) = -10
 warpx.self_fields_absolute_tolerance = 0.02

From a1eb908e897c5fb8cdad40fa0d041a1508c19d50 Mon Sep 17 00:00:00 2001
From: David Grote <grote1@llnl.gov>
Date: Mon, 10 Jun 2024 17:05:06 -0700
Subject: [PATCH 06/11] Remove unneeded macros from AllocLevelData (#4979)

---
 Source/Parallelization/GuardCellManager.H   | 2 +-
 Source/Parallelization/GuardCellManager.cpp | 2 +-
 Source/WarpX.cpp                            | 8 +-------
 3 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/Source/Parallelization/GuardCellManager.H b/Source/Parallelization/GuardCellManager.H
index 341db01bef6..561456943f1 100644
--- a/Source/Parallelization/GuardCellManager.H
+++ b/Source/Parallelization/GuardCellManager.H
@@ -52,7 +52,7 @@ public:
      */
     void Init(
         amrex::Real dt,
-        amrex::RealVect dx,
+        const amrex::Real * dx,
         bool do_subcycling,
         bool do_fdtd_nci_corr,
         short grid_type,
diff --git a/Source/Parallelization/GuardCellManager.cpp b/Source/Parallelization/GuardCellManager.cpp
index 28157e09d8c..321be15df7e 100644
--- a/Source/Parallelization/GuardCellManager.cpp
+++ b/Source/Parallelization/GuardCellManager.cpp
@@ -33,7 +33,7 @@ using namespace amrex;
 void
 guardCellManager::Init (
     const amrex::Real dt,
-    const amrex::RealVect dx,
+    const amrex::Real *dx,
     const bool do_subcycling,
     const bool do_fdtd_nci_corr,
     const short grid_type,
diff --git a/Source/WarpX.cpp b/Source/WarpX.cpp
index d3f91002ef4..ef81aef4482 100644
--- a/Source/WarpX.cpp
+++ b/Source/WarpX.cpp
@@ -2120,13 +2120,7 @@ WarpX::AllocLevelData (int lev, const BoxArray& ba, const DistributionMapping& d
 {
     const bool aux_is_nodal = (field_gathering_algo == GatheringAlgo::MomentumConserving);
 
-#if   defined(WARPX_DIM_1D_Z)
-    const amrex::RealVect dx(WarpX::CellSize(lev)[2]);
-#elif   defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
-    const amrex::RealVect dx = {WarpX::CellSize(lev)[0], WarpX::CellSize(lev)[2]};
-#elif defined(WARPX_DIM_3D)
-    const amrex::RealVect dx = {WarpX::CellSize(lev)[0], WarpX::CellSize(lev)[1], WarpX::CellSize(lev)[2]};
-#endif
+    const Real* dx = Geom(lev).CellSize();
 
     // Initialize filter before guard cells manager
     // (needs info on length of filter's stencil)

From a3cd47d19b16722721bb206ebdb4da52c4dca8af Mon Sep 17 00:00:00 2001
From: Axel Huebl <axel.huebl@plasma.ninja>
Date: Tue, 11 Jun 2024 06:24:39 +0200
Subject: [PATCH 07/11] Release 24.06 (#4980)

* AMReX: 24.06

* pyAMReX: 24.06

* WarpX: 24.06
---
 .github/workflows/cuda.yml       | 2 +-
 CMakeLists.txt                   | 2 +-
 Docs/source/conf.py              | 4 ++--
 Python/setup.py                  | 2 +-
 Regression/WarpX-GPU-tests.ini   | 2 +-
 Regression/WarpX-tests.ini       | 2 +-
 cmake/dependencies/AMReX.cmake   | 4 ++--
 cmake/dependencies/pyAMReX.cmake | 4 ++--
 run_test.sh                      | 2 +-
 setup.py                         | 2 +-
 10 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml
index 07422c36e15..3a03ea01c0c 100644
--- a/.github/workflows/cuda.yml
+++ b/.github/workflows/cuda.yml
@@ -115,7 +115,7 @@ jobs:
         which nvcc || echo "nvcc not in PATH!"
 
         git clone https://github.com/AMReX-Codes/amrex.git ../amrex
-        cd ../amrex && git checkout --detach 28b010126a1b39297d8a496ba81f171d8563953b && cd -
+        cd ../amrex && git checkout --detach 24.06 && cd -
         make COMP=gcc QED=FALSE USE_MPI=TRUE USE_GPU=TRUE USE_OMP=FALSE USE_FFT=TRUE USE_CCACHE=TRUE -j 4
 
         ccache -s
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 85b4a42b040..81c12a3df2f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,7 +1,7 @@
 # Preamble ####################################################################
 #
 cmake_minimum_required(VERSION 3.20.0)
-project(WarpX VERSION 24.05)
+project(WarpX VERSION 24.06)
 
 include(${WarpX_SOURCE_DIR}/cmake/WarpXFunctions.cmake)
 
diff --git a/Docs/source/conf.py b/Docs/source/conf.py
index 471f6f2b6a3..18ca8370e80 100644
--- a/Docs/source/conf.py
+++ b/Docs/source/conf.py
@@ -103,9 +103,9 @@ def __init__(self, *args, **kwargs):
 # built documents.
 #
 # The short X.Y version.
-version = u'24.05'
+version = u'24.06'
 # The full version, including alpha/beta/rc tags.
-release = u'24.05'
+release = u'24.06'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/Python/setup.py b/Python/setup.py
index fbf3330ada0..9f9c8d7d736 100644
--- a/Python/setup.py
+++ b/Python/setup.py
@@ -54,7 +54,7 @@
     package_data = {}
 
 setup(name = 'pywarpx',
-      version = '24.05',
+      version = '24.06',
       packages = ['pywarpx'],
       package_dir = {'pywarpx': 'pywarpx'},
       description = """Wrapper of WarpX""",
diff --git a/Regression/WarpX-GPU-tests.ini b/Regression/WarpX-GPU-tests.ini
index 8542e5f35d9..05ed74fe1b1 100644
--- a/Regression/WarpX-GPU-tests.ini
+++ b/Regression/WarpX-GPU-tests.ini
@@ -60,7 +60,7 @@ emailBody = Check https://ccse.lbl.gov/pub/GpuRegressionTesting/WarpX/ for more
 
 [AMReX]
 dir = /home/regtester/git/amrex/
-branch = 28b010126a1b39297d8a496ba81f171d8563953b
+branch = 24.06
 
 [source]
 dir = /home/regtester/git/WarpX
diff --git a/Regression/WarpX-tests.ini b/Regression/WarpX-tests.ini
index e2d69be9c60..41db9a15bdc 100644
--- a/Regression/WarpX-tests.ini
+++ b/Regression/WarpX-tests.ini
@@ -59,7 +59,7 @@ emailBody = Check https://ccse.lbl.gov/pub/RegressionTesting/WarpX/ for more det
 
 [AMReX]
 dir = /home/regtester/AMReX_RegTesting/amrex/
-branch = 28b010126a1b39297d8a496ba81f171d8563953b
+branch = 24.06
 
 [source]
 dir = /home/regtester/AMReX_RegTesting/warpx
diff --git a/cmake/dependencies/AMReX.cmake b/cmake/dependencies/AMReX.cmake
index f80ec0d5af7..eda45dc9f77 100644
--- a/cmake/dependencies/AMReX.cmake
+++ b/cmake/dependencies/AMReX.cmake
@@ -250,7 +250,7 @@ macro(find_amrex)
         endif()
         set(COMPONENT_PRECISION ${WarpX_PRECISION} P${WarpX_PARTICLE_PRECISION})
 
-        find_package(AMReX 24.05 CONFIG REQUIRED COMPONENTS ${COMPONENT_ASCENT} ${COMPONENT_DIMS} ${COMPONENT_EB} PARTICLES ${COMPONENT_PIC} ${COMPONENT_PRECISION} ${COMPONENT_SENSEI} LSOLVERS)
+        find_package(AMReX 24.06 CONFIG REQUIRED COMPONENTS ${COMPONENT_ASCENT} ${COMPONENT_DIMS} ${COMPONENT_EB} PARTICLES ${COMPONENT_PIC} ${COMPONENT_PRECISION} ${COMPONENT_SENSEI} LSOLVERS)
         # note: TINYP skipped because user-configured and optional
 
         # AMReX CMake helper scripts
@@ -273,7 +273,7 @@ set(WarpX_amrex_src ""
 set(WarpX_amrex_repo "https://github.com/AMReX-Codes/amrex.git"
     CACHE STRING
     "Repository URI to pull and build AMReX from if(WarpX_amrex_internal)")
-set(WarpX_amrex_branch "28b010126a1b39297d8a496ba81f171d8563953b"
+set(WarpX_amrex_branch "24.06"
     CACHE STRING
     "Repository branch for WarpX_amrex_repo if(WarpX_amrex_internal)")
 
diff --git a/cmake/dependencies/pyAMReX.cmake b/cmake/dependencies/pyAMReX.cmake
index cdef8f277f6..18cab89e347 100644
--- a/cmake/dependencies/pyAMReX.cmake
+++ b/cmake/dependencies/pyAMReX.cmake
@@ -64,7 +64,7 @@ function(find_pyamrex)
         endif()
     elseif(NOT WarpX_pyamrex_internal)
         # TODO: MPI control
-        find_package(pyAMReX 24.05 CONFIG REQUIRED)
+        find_package(pyAMReX 24.06 CONFIG REQUIRED)
         message(STATUS "pyAMReX: Found version '${pyAMReX_VERSION}'")
     endif()
 endfunction()
@@ -79,7 +79,7 @@ option(WarpX_pyamrex_internal "Download & build pyAMReX" ON)
 set(WarpX_pyamrex_repo "https://github.com/AMReX-Codes/pyamrex.git"
     CACHE STRING
     "Repository URI to pull and build pyamrex from if(WarpX_pyamrex_internal)")
-set(WarpX_pyamrex_branch "d4d409bd21bc4c48487883ac2331efdb1a6b3d61"
+set(WarpX_pyamrex_branch "24.06"
     CACHE STRING
     "Repository branch for WarpX_pyamrex_repo if(WarpX_pyamrex_internal)")
 
diff --git a/run_test.sh b/run_test.sh
index e9dbb0f2533..f397d31048e 100755
--- a/run_test.sh
+++ b/run_test.sh
@@ -68,7 +68,7 @@ python3 -m pip install --upgrade -r warpx/Regression/requirements.txt
 
 # Clone AMReX and warpx-data
 git clone https://github.com/AMReX-Codes/amrex.git
-cd amrex && git checkout --detach 28b010126a1b39297d8a496ba81f171d8563953b && cd -
+cd amrex && git checkout --detach 24.06 && cd -
 # warpx-data contains various required data sets
 git clone --depth 1 https://github.com/ECP-WarpX/warpx-data.git
 # openPMD-example-datasets contains various required data sets
diff --git a/setup.py b/setup.py
index 3d61cc5be36..670e376986f 100644
--- a/setup.py
+++ b/setup.py
@@ -278,7 +278,7 @@ def build_extension(self, ext):
 setup(
     name='pywarpx',
     # note PEP-440 syntax: x.y.zaN but x.y.z.devN
-    version = '24.05',
+    version = '24.06',
     packages = ['pywarpx'],
     package_dir = {'pywarpx': 'Python/pywarpx'},
     author='Jean-Luc Vay, David P. Grote, Maxence Thévenet, Rémi Lehe, Andrew Myers, Weiqun Zhang, Axel Huebl, et al.',

From c7b203e5670f57e6b9955db8c3f80d718f070b1c Mon Sep 17 00:00:00 2001
From: Luca Fedeli <luca.fedeli@cea.fr>
Date: Tue, 11 Jun 2024 23:27:08 +0200
Subject: [PATCH 08/11] group initialization and cleanup of external libraries
 (#4964)

---
 Source/Evolve/WarpXEvolve.cpp        |  3 +++
 Source/Initialization/CMakeLists.txt |  1 +
 Source/Initialization/Make.package   |  1 +
 Source/Initialization/WarpXInit.H    | 30 ++++++++++++++++++++++++++
 Source/Initialization/WarpXInit.cpp  | 29 +++++++++++++++++++++++++
 Source/main.cpp                      | 32 ++++++----------------------
 6 files changed, 70 insertions(+), 26 deletions(-)
 create mode 100644 Source/Initialization/WarpXInit.H
 create mode 100644 Source/Initialization/WarpXInit.cpp

diff --git a/Source/Evolve/WarpXEvolve.cpp b/Source/Evolve/WarpXEvolve.cpp
index 4ac50483ad3..8b2cc2c33f1 100644
--- a/Source/Evolve/WarpXEvolve.cpp
+++ b/Source/Evolve/WarpXEvolve.cpp
@@ -306,6 +306,9 @@ WarpX::Evolve (int numsteps)
         multi_diags->FilterComputePackFlushLastTimestep( istep[0] );
         if (m_exit_loop_due_to_interrupt_signal) { ExecutePythonCallback("onbreaksignal"); }
     }
+
+    amrex::Print() <<
+        ablastr::warn_manager::GetWMInstance().PrintGlobalWarnings("THE END");
 }
 
 /* /brief Perform one PIC iteration, without subcycling
diff --git a/Source/Initialization/CMakeLists.txt b/Source/Initialization/CMakeLists.txt
index 8931de740ad..e5e2334fd7e 100644
--- a/Source/Initialization/CMakeLists.txt
+++ b/Source/Initialization/CMakeLists.txt
@@ -11,6 +11,7 @@ foreach(D IN LISTS WarpX_DIMS)
         TemperatureProperties.cpp
         VelocityProperties.cpp
         WarpXAMReXInit.cpp
+        WarpXInit.cpp
         WarpXInitData.cpp
     )
 endforeach()
diff --git a/Source/Initialization/Make.package b/Source/Initialization/Make.package
index 8b4a4c1d669..831e3fc3f89 100644
--- a/Source/Initialization/Make.package
+++ b/Source/Initialization/Make.package
@@ -7,6 +7,7 @@ CEXE_sources += PlasmaInjector.cpp
 CEXE_sources += TemperatureProperties.cpp
 CEXE_sources += VelocityProperties.cpp
 CEXE_sources += WarpXAMReXInit.cpp
+CEXE_sources += WarpXInit.cpp
 CEXE_sources += WarpXInitData.cpp
 
 VPATH_LOCATIONS   += $(WARPX_HOME)/Source/Initialization
diff --git a/Source/Initialization/WarpXInit.H b/Source/Initialization/WarpXInit.H
new file mode 100644
index 00000000000..ce179e2e997
--- /dev/null
+++ b/Source/Initialization/WarpXInit.H
@@ -0,0 +1,30 @@
+/* Copyright 2024 Luca Fedeli
+ *
+ * This file is part of WarpX.
+ *
+ * License: BSD-3-Clause-LBNL
+ */
+#ifndef WARPX_INIT_H_
+#define WARPX_INIT_H_
+
+namespace warpx::initialization
+{
+    /** Initializes, in the following order:
+     * - the MPI library through the mpi_init helper function in ablastr
+     * - the AMReX library
+     * - the FFT library through the anyfft::setup() function in ablastr
+     *
+     * @param[in] argc number of arguments from main()
+     * @param[in] argv argument strings from main()
+     */
+    void initialize_external_libraries(int argc, char* argv[]);
+
+    /** Initializes, in the following order:
+     * - the FFT library through the anyfft::cleanup() function in ablastr
+     * - the AMReX library
+     * - the MPI library through the mpi_finalize helper function in ablastr
+     */
+    void finalize_external_libraries();
+}
+
+#endif //WARPX_INIT_H_
diff --git a/Source/Initialization/WarpXInit.cpp b/Source/Initialization/WarpXInit.cpp
new file mode 100644
index 00000000000..7e00760bf30
--- /dev/null
+++ b/Source/Initialization/WarpXInit.cpp
@@ -0,0 +1,29 @@
+/* Copyright 2024 Luca Fedeli
+ *
+ * This file is part of WarpX.
+ *
+ * License: BSD-3-Clause-LBNL
+ */
+
+#include "WarpXInit.H"
+
+#include "Initialization/WarpXAMReXInit.H"
+
+#include <AMReX.H>
+
+#include <ablastr/math/fft/AnyFFT.H>
+#include <ablastr/parallelization/MPIInitHelpers.H>
+
+void warpx::initialization::initialize_external_libraries(int argc, char* argv[])
+{
+    ablastr::parallelization::mpi_init(argc, argv);
+    warpx::initialization::amrex_init(argc, argv);
+    ablastr::math::anyfft::setup();
+}
+
+void warpx::initialization::finalize_external_libraries()
+{
+    ablastr::math::anyfft::cleanup();
+    amrex::Finalize();
+    ablastr::parallelization::mpi_finalize();
+}
diff --git a/Source/main.cpp b/Source/main.cpp
index 2a1b828c64f..9273cd3928b 100644
--- a/Source/main.cpp
+++ b/Source/main.cpp
@@ -8,25 +8,16 @@
  */
 #include "WarpX.H"
 
-#include "Initialization/WarpXAMReXInit.H"
+#include "Initialization/WarpXInit.H"
 #include "Utils/WarpXProfilerWrapper.H"
 
-#include <ablastr/math/fft/AnyFFT.H>
-#include <ablastr/parallelization/MPIInitHelpers.H>
 #include <ablastr/utils/timer/Timer.H>
-#include <ablastr/warn_manager/WarnManager.H>
 
 #include <AMReX_Print.H>
 
-
 int main(int argc, char* argv[])
 {
-    ablastr::parallelization::mpi_init(argc, argv);
-
-    warpx::initialization::amrex_init(argc, argv);
-
-    ablastr::math::anyfft::setup();
-
+    warpx::initialization::initialize_external_libraries(argc, argv);
     {
         WARPX_PROFILE_VAR("main()", pmain);
 
@@ -34,29 +25,18 @@ int main(int argc, char* argv[])
         timer.record_start_time();
 
         auto& warpx = WarpX::GetInstance();
-
         warpx.InitData();
-
         warpx.Evolve();
-
-        amrex::Print() <<
-            ablastr::warn_manager::GetWMInstance().PrintGlobalWarnings("THE END");
+        const auto is_warpx_verbose = warpx.Verbose();
+        WarpX::Finalize();
 
         timer.record_stop_time();
-        if (warpx.Verbose())
-        {
+        if (is_warpx_verbose){
             amrex::Print() << "Total Time                     : "
                            << timer.get_global_duration() << '\n';
         }
 
         WARPX_PROFILE_VAR_STOP(pmain);
-
-        WarpX::Finalize();
     }
-
-    ablastr::math::anyfft::cleanup();
-
-    amrex::Finalize();
-
-    ablastr::parallelization::mpi_finalize ();
+    warpx::initialization::finalize_external_libraries();
 }

From 2602b540efcd15f3ffebc3017312e98f46cdc7af Mon Sep 17 00:00:00 2001
From: Axel Huebl <axel.huebl@plasma.ninja>
Date: Wed, 12 Jun 2024 00:20:20 +0200
Subject: [PATCH 09/11] Fix FieldProbe Check: Particle Shape (#4983)

The constructor of FieldProbe might be called earlier than the
WarpX class parameter init. That could lead to relying on an
uninitialized particle shape static. Use the parser instead, similar
to our general efforts to reduce the static members stored in the
WarpX class.
---
 Source/Diagnostics/ReducedDiags/FieldProbe.cpp | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/Source/Diagnostics/ReducedDiags/FieldProbe.cpp b/Source/Diagnostics/ReducedDiags/FieldProbe.cpp
index 7364d5989f1..1fc3b957ec9 100644
--- a/Source/Diagnostics/ReducedDiags/FieldProbe.cpp
+++ b/Source/Diagnostics/ReducedDiags/FieldProbe.cpp
@@ -154,7 +154,12 @@ FieldProbe::FieldProbe (const std::string& rd_name)
             ablastr::warn_manager::WarnPriority::low);
     }
 
-    WARPX_ALWAYS_ASSERT_WITH_MESSAGE(interp_order <= WarpX::nox ,
+    // ensure assumption holds: we read the fields in the interpolation kernel as they are,
+    // without further communication of guard/ghost/halo regions
+    int particle_shape;
+    const ParmParse pp_algo("algo");
+    utils::parser::getWithParser(pp_algo, "particle_shape", particle_shape);
+    WARPX_ALWAYS_ASSERT_WITH_MESSAGE(interp_order <= particle_shape ,
                                      "Field probe interp_order should be less than or equal to algo.particle_shape");
     if (ParallelDescriptor::IOProcessor())
     {

From 3c4e523e255dfa20663e45b416f828924366f2a1 Mon Sep 17 00:00:00 2001
From: Axel Huebl <axel.huebl@plasma.ninja>
Date: Thu, 13 Jun 2024 23:16:14 +0200
Subject: [PATCH 10/11] CMake: heFFTe Support (#4986)

* CMake: heFFTe

* Doc: Perlmutter heFFTe

* Update Spack Dev Envs

* Finalize After Testing

Co-authored-by: Alfred Mishi <140518333+Haavaan@users.noreply.github.com>

---------

Co-authored-by: Alfred Mishi <140518333+Haavaan@users.noreply.github.com>
---
 CMakeLists.txt                                | 37 ++++++++++++++++++
 Docs/source/install/cmake.rst                 |  2 +
 Docs/source/install/dependencies.rst          |  7 ++--
 Docs/source/install/hpc/perlmutter.rst        |  8 ++--
 .../machines/desktop/spack-macos-openmp.yaml  |  1 +
 Tools/machines/desktop/spack-ubuntu-cuda.yaml |  1 +
 .../machines/desktop/spack-ubuntu-openmp.yaml |  1 +
 Tools/machines/desktop/spack-ubuntu-rocm.yaml |  1 +
 .../install_cpu_dependencies.sh               | 39 +++++++++++++++++++
 .../install_gpu_dependencies.sh               | 38 ++++++++++++++++++
 .../perlmutter_cpu_warpx.profile.example      |  2 +
 .../perlmutter_gpu_warpx.profile.example      |  2 +
 cmake/WarpXFunctions.cmake                    |  1 +
 setup.py                                      |  2 +
 14 files changed, 135 insertions(+), 7 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 81c12a3df2f..5e2d1ebba9c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -79,6 +79,7 @@ option(WarpX_LIB           "Build WarpX as a library"                   OFF)
 option(WarpX_MPI           "Multi-node support (message-passing)"       ON)
 option(WarpX_OPENPMD       "openPMD I/O (HDF5, ADIOS)"                  ON)
 option(WarpX_FFT           "FFT-based solvers"                          OFF)
+option(WarpX_HEFFTE        "Multi-node FFT-based solvers"               OFF)
 option(WarpX_PYTHON        "Python bindings"                            OFF)
 option(WarpX_SENSEI        "SENSEI in situ diagnostics"                 OFF)
 option(WarpX_QED           "QED support (requires PICSAR)"              ON)
@@ -136,6 +137,10 @@ mark_as_advanced(WarpX_MPI_THREAD_MULTIPLE)
 
 option(WarpX_amrex_internal                    "Download & build AMReX" ON)
 
+if(WarpX_HEFFTE AND NOT WarpX_MPI)
+    message(FATAL_ERROR "WarpX_HEFFTE (${WarpX_HEFFTE}) can only be used if WarpX_MPI is ON.")
+endif()
+
 # change the default build type to Release (or RelWithDebInfo) instead of Debug
 set_default_build_type("Release")
 
@@ -174,6 +179,10 @@ option(ABLASTR_FFT "compile AnyFFT wrappers" ${WarpX_FFT})
 if(WarpX_FFT)
     set(ABLASTR_FFT ON CACHE STRING "FFT-based solvers" FORCE)
 endif()
+option(ABLASTR_HEFFTE "compile AnyFFT wrappers" ${WarpX_HEFFTE})
+if(WarpX_HEFFTE)
+    set(ABLASTR_HEFFTE ON CACHE STRING "Multi-Node FFT-based solvers" FORCE)
+endif()
 
 # this defined the variable BUILD_TESTING which is ON by default
 #include(CTest)
@@ -215,6 +224,23 @@ if(WarpX_FFT)
     endif()
 endif()
 
+# multi-node FFT
+if(WarpX_HEFFTE)
+    if(WarpX_COMPUTE STREQUAL CUDA)
+        set(_heFFTe_COMPS CUDA)
+    elseif(WarpX_COMPUTE STREQUAL HIP)
+        set(_heFFTe_COMPS ROCM)
+    elseif(WarpX_COMPUTE STREQUAL SYCL)
+        set(_heFFTe_COMPS ONEAPI)
+    else()  # NOACC, OMP
+        set(_heFFTe_COMPS FFTW)  # or MKL
+    endif()
+    # note: we could also enforce GPUAWARE for CUDA and HIP, which can still be
+    #       disabled at runtime
+
+    find_package(Heffte REQUIRED COMPONENTS ${_heFFTe_COMPS})
+endif()
+
 # Python
 if(WarpX_PYTHON)
     find_package(Python COMPONENTS Interpreter Development.Module REQUIRED)
@@ -455,6 +481,10 @@ foreach(D IN LISTS WarpX_DIMS)
         endif()
     endif()
 
+    if(ABLASTR_HEFFTE)
+        target_link_libraries(ablastr_${SD} PUBLIC Heffte::Heffte)
+    endif()
+
     if(WarpX_PYTHON)
         target_link_libraries(pyWarpX_${SD} PRIVATE pybind11::module pybind11::windows_extras)
         if(WarpX_PYTHON_IPO)
@@ -539,6 +569,13 @@ foreach(D IN LISTS WarpX_DIMS)
         target_compile_definitions(ablastr_${SD} PUBLIC ABLASTR_USE_FFT)
     endif()
 
+    if(WarpX_HEFFTE)
+        target_compile_definitions(ablastr_${SD} PUBLIC WARPX_USE_HEFFTE)
+    endif()
+    if(ABLASTR_HEFFTE)
+        target_compile_definitions(ablastr_${SD} PUBLIC ABLASTR_USE_HEFFTE)
+    endif()
+
     if(WarpX_PYTHON AND pyWarpX_VERSION_INFO)
         # for module __version__
         target_compile_definitions(pyWarpX_${SD} PRIVATE
diff --git a/Docs/source/install/cmake.rst b/Docs/source/install/cmake.rst
index b9fd0b2be45..fde927c10e1 100644
--- a/Docs/source/install/cmake.rst
+++ b/Docs/source/install/cmake.rst
@@ -97,6 +97,7 @@ CMake Option                  Default & Values                             Descr
 ``WarpX_PRECISION``           SINGLE/**DOUBLE**                            Floating point precision (single/double)
 ``WarpX_PARTICLE_PRECISION``  SINGLE/**DOUBLE**                            Particle floating point precision (single/double), defaults to WarpX_PRECISION value if not set
 ``WarpX_FFT``                 ON/**OFF**                                   FFT-based solvers
+``WarpX_HEFFTE``              ON/**OFF**                                   Multi-Node FFT-based solvers
 ``WarpX_PYTHON``              ON/**OFF**                                   Python bindings
 ``WarpX_QED``                 **ON**/OFF                                   QED support (requires PICSAR)
 ``WarpX_QED_TABLE_GEN``       ON/**OFF**                                   QED table generation support (requires PICSAR and Boost)
@@ -271,6 +272,7 @@ Environment Variable          Default & Values                             Descr
 ``WARPX_PRECISION``           SINGLE/**DOUBLE**                            Floating point precision (single/double)
 ``WARPX_PARTICLE_PRECISION``  SINGLE/**DOUBLE**                            Particle floating point precision (single/double), defaults to WarpX_PRECISION value if not set
 ``WARPX_FFT``                 ON/**OFF**                                   FFT-based solvers
+``WARPX_HEFFTE``              ON/**OFF**                                   Multi-Node FFT-based solvers
 ``WARPX_QED``                 **ON**/OFF                                   PICSAR QED (requires PICSAR)
 ``WARPX_QED_TABLE_GEN``       ON/**OFF**                                   QED table generation (requires PICSAR and Boost)
 ``BUILD_PARALLEL``            ``2``                                        Number of threads to use for parallel builds
diff --git a/Docs/source/install/dependencies.rst b/Docs/source/install/dependencies.rst
index ce9f9dca520..3bab32b7502 100644
--- a/Docs/source/install/dependencies.rst
+++ b/Docs/source/install/dependencies.rst
@@ -23,12 +23,13 @@ Optional dependencies include:
 - for on-node accelerated compute *one of either*:
 
   - `OpenMP 3.1+ <https://www.openmp.org>`__: for threaded CPU execution or
-  - `CUDA Toolkit 11.7+ <https://developer.nvidia.com/cuda-downloads>`__: for Nvidia GPU support (see `matching host-compilers <https://gist.github.com/ax3l/9489132>`_) or
+  - `CUDA Toolkit 11.7+ <https://developer.nvidia.com/cuda-downloads>`__: for Nvidia GPU support (see `matching host-compilers <https://gist.github.com/ax3l/9489132>`__) or
   - `ROCm 5.2+ (5.5+ recommended) <https://gpuopen.com/learn/amd-lab-notes/amd-lab-notes-rocm-installation-readme/>`__: for AMD GPU support
-- `FFTW3 <http://www.fftw.org>`_: for spectral solver (PSATD) support when running on CPU or SYCL
+- `FFTW3 <http://www.fftw.org>`__: for spectral solver (PSATD or IGF) support when running on CPU or SYCL
 
   - also needs the ``pkg-config`` tool on Unix
-- `BLAS++ <https://github.com/icl-utk-edu/blaspp>`_ and `LAPACK++ <https://github.com/icl-utk-edu/lapackpp>`_: for spectral solver (PSATD) support in RZ geometry
+- `heFFTe 2.4.0+ <https://github.com/icl-utk-edu/heffte`__: for multi-node spectral solver (IGF) support
+- `BLAS++ <https://github.com/icl-utk-edu/blaspp>`__ and `LAPACK++ <https://github.com/icl-utk-edu/lapackpp>`__: for spectral solver (PSATD) support in RZ geometry
 - `Boost 1.66.0+ <https://www.boost.org/>`__: for QED lookup tables generation support
 - `openPMD-api 0.15.1+ <https://github.com/openPMD/openPMD-api>`__: we automatically download and compile a copy of openPMD-api for openPMD I/O support
 
diff --git a/Docs/source/install/hpc/perlmutter.rst b/Docs/source/install/hpc/perlmutter.rst
index 9612b64476d..dc5a985e99f 100644
--- a/Docs/source/install/hpc/perlmutter.rst
+++ b/Docs/source/install/hpc/perlmutter.rst
@@ -153,7 +153,7 @@ Use the following :ref:`cmake commands <building-cmake>` to compile the applicat
          cd $HOME/src/warpx
          rm -rf build_pm_gpu
 
-         cmake -S . -B build_pm_gpu -DWarpX_COMPUTE=CUDA -DWarpX_FFT=ON -DWarpX_QED_TABLE_GEN=ON -DWarpX_DIMS="1;2;RZ;3"
+         cmake -S . -B build_pm_gpu -DWarpX_COMPUTE=CUDA -DWarpX_FFT=ON -DWarpX_HEFFTE=ON -DWarpX_QED_TABLE_GEN=ON -DWarpX_DIMS="1;2;RZ;3"
          cmake --build build_pm_gpu -j 16
 
       The WarpX application executables are now in ``$HOME/src/warpx/build_pm_gpu/bin/``.
@@ -164,7 +164,7 @@ Use the following :ref:`cmake commands <building-cmake>` to compile the applicat
          cd $HOME/src/warpx
          rm -rf build_pm_gpu_py
 
-         cmake -S . -B build_pm_gpu_py -DWarpX_COMPUTE=CUDA -DWarpX_FFT=ON -DWarpX_QED_TABLE_GEN=ON -DWarpX_APP=OFF -DWarpX_PYTHON=ON -DWarpX_DIMS="1;2;RZ;3"
+         cmake -S . -B build_pm_gpu_py -DWarpX_COMPUTE=CUDA -DWarpX_FFT=ON -DWarpX_HEFFTE=ON -DWarpX_QED_TABLE_GEN=ON -DWarpX_APP=OFF -DWarpX_PYTHON=ON -DWarpX_DIMS="1;2;RZ;3"
          cmake --build build_pm_gpu_py -j 16 --target pip_install
 
    .. tab-item:: CPU Nodes
@@ -174,7 +174,7 @@ Use the following :ref:`cmake commands <building-cmake>` to compile the applicat
          cd $HOME/src/warpx
          rm -rf build_pm_cpu
 
-         cmake -S . -B build_pm_cpu -DWarpX_COMPUTE=OMP -DWarpX_FFT=ON -DWarpX_QED_TABLE_GEN=ON -DWarpX_DIMS="1;2;RZ;3"
+         cmake -S . -B build_pm_cpu -DWarpX_COMPUTE=OMP -DWarpX_FFT=ON -DWarpX_HEFFTE=ON -DWarpX_QED_TABLE_GEN=ON -DWarpX_DIMS="1;2;RZ;3"
          cmake --build build_pm_cpu -j 16
 
       The WarpX application executables are now in ``$HOME/src/warpx/build_pm_cpu/bin/``.
@@ -184,7 +184,7 @@ Use the following :ref:`cmake commands <building-cmake>` to compile the applicat
 
          rm -rf build_pm_cpu_py
 
-         cmake -S . -B build_pm_cpu_py -DWarpX_COMPUTE=OMP -DWarpX_FFT=ON -DWarpX_QED_TABLE_GEN=ON -DWarpX_APP=OFF -DWarpX_PYTHON=ON -DWarpX_DIMS="1;2;RZ;3"
+         cmake -S . -B build_pm_cpu_py -DWarpX_COMPUTE=OMP -DWarpX_FFT=ON -DWarpX_HEFFTE=ON -DWarpX_QED_TABLE_GEN=ON -DWarpX_APP=OFF -DWarpX_PYTHON=ON -DWarpX_DIMS="1;2;RZ;3"
          cmake --build build_pm_cpu_py -j 16 --target pip_install
 
 Now, you can :ref:`submit Perlmutter compute jobs <running-cpp-perlmutter>` for WarpX :ref:`Python (PICMI) scripts <usage-picmi>` (:ref:`example scripts <usage-examples>`).
diff --git a/Tools/machines/desktop/spack-macos-openmp.yaml b/Tools/machines/desktop/spack-macos-openmp.yaml
index 820cf7069fd..3ea78625b78 100644
--- a/Tools/machines/desktop/spack-macos-openmp.yaml
+++ b/Tools/machines/desktop/spack-macos-openmp.yaml
@@ -23,6 +23,7 @@ spack:
   - conduit ~fortran
   - fftw
   - hdf5 ~fortran
+  - heffte ~cuda +fftw
   - lapackpp ~cuda ~rocm ^blaspp ~cuda +openmp ~rocm
   - mpi
   - llvm-openmp
diff --git a/Tools/machines/desktop/spack-ubuntu-cuda.yaml b/Tools/machines/desktop/spack-ubuntu-cuda.yaml
index 08d0c95ee4b..19b9ae12e24 100644
--- a/Tools/machines/desktop/spack-ubuntu-cuda.yaml
+++ b/Tools/machines/desktop/spack-ubuntu-cuda.yaml
@@ -25,6 +25,7 @@ spack:
   - cuda
   - fftw
   - hdf5
+  - heffte
   - lapackpp
   - mpi
   - pkgconfig
diff --git a/Tools/machines/desktop/spack-ubuntu-openmp.yaml b/Tools/machines/desktop/spack-ubuntu-openmp.yaml
index b658f1e009d..1eb7d4074a7 100644
--- a/Tools/machines/desktop/spack-ubuntu-openmp.yaml
+++ b/Tools/machines/desktop/spack-ubuntu-openmp.yaml
@@ -22,6 +22,7 @@ spack:
   - ecp-data-vis-sdk +adios2 +ascent +hdf5 +sensei
   - fftw
   - hdf5
+  - heffte ~cuda +fftw
   - lapackpp ~cuda ~rocm ^blaspp ~cuda +openmp ~rocm
   - mpi
   - pkgconfig
diff --git a/Tools/machines/desktop/spack-ubuntu-rocm.yaml b/Tools/machines/desktop/spack-ubuntu-rocm.yaml
index 45c9b0f776e..7eee1baa13c 100644
--- a/Tools/machines/desktop/spack-ubuntu-rocm.yaml
+++ b/Tools/machines/desktop/spack-ubuntu-rocm.yaml
@@ -21,6 +21,7 @@ spack:
   - cmake
   - ecp-data-vis-sdk +adios2 +ascent +hdf5 +sensei
   - hdf5
+  - heffte
   - hip
   - lapackpp
   - llvm-amdgpu
diff --git a/Tools/machines/perlmutter-nersc/install_cpu_dependencies.sh b/Tools/machines/perlmutter-nersc/install_cpu_dependencies.sh
index f65e43891d0..a5a7c28b85e 100755
--- a/Tools/machines/perlmutter-nersc/install_cpu_dependencies.sh
+++ b/Tools/machines/perlmutter-nersc/install_cpu_dependencies.sh
@@ -109,6 +109,45 @@ CXX=$(which CC) CXXFLAGS="-DLAPACK_FORTRAN_ADD_" cmake -S $HOME/src/lapackpp -B
 cmake --build ${build_dir}/lapackpp-pm-cpu-build --target install --parallel 16
 rm -rf ${build_dir}/lapackpp-pm-cpu-build
 
+# heFFTe
+if [ -d $HOME/src/heffte ]
+then
+  cd $HOME/src/heffte
+  git fetch --prune
+  git checkout v2.4.0
+  git pull
+  cd -
+else
+  git clone -b v2.4.0 https://github.com/icl-utk-edu/heffte.git ${HOME}/src/heffte
+fi
+rm -rf ${HOME}/src/heffte-pm-cpu-build
+cmake \
+    -S ${HOME}/src/heffte               \
+    -B ${build_dir}/heffte-pm-cpu-build \
+    -DBUILD_SHARED_LIBS=ON              \
+    -DCMAKE_BUILD_TYPE=Release          \
+    -DCMAKE_CXX_STANDARD=17             \
+    -DCMAKE_INSTALL_RPATH_USE_LINK_PATH=ON  \
+    -DCMAKE_INSTALL_PREFIX=${SW_DIR}/heffte-2.4.0  \
+    -DHeffte_DISABLE_GPU_AWARE_MPI=ON   \
+    -DHeffte_ENABLE_AVX=ON              \
+    -DHeffte_ENABLE_AVX512=OFF          \
+    -DHeffte_ENABLE_FFTW=ON             \
+    -DHeffte_ENABLE_CUDA=OFF            \
+    -DHeffte_ENABLE_ROCM=OFF            \
+    -DHeffte_ENABLE_ONEAPI=OFF          \
+    -DHeffte_ENABLE_MKL=OFF             \
+    -DHeffte_ENABLE_DOXYGEN=OFF         \
+    -DHeffte_SEQUENTIAL_TESTING=OFF     \
+    -DHeffte_ENABLE_TESTING=OFF         \
+    -DHeffte_ENABLE_TRACING=OFF         \
+    -DHeffte_ENABLE_PYTHON=OFF          \
+    -DHeffte_ENABLE_FORTRAN=OFF         \
+    -DHeffte_ENABLE_SWIG=OFF            \
+    -DHeffte_ENABLE_MAGMA=OFF
+cmake --build ${build_dir}/heffte-pm-cpu-build --target install --parallel 16
+rm -rf ${build_dir}/heffte-pm-cpu-build
+
 
 # Python ######################################################################
 #
diff --git a/Tools/machines/perlmutter-nersc/install_gpu_dependencies.sh b/Tools/machines/perlmutter-nersc/install_gpu_dependencies.sh
index 9ac5800c6ce..125c63104be 100755
--- a/Tools/machines/perlmutter-nersc/install_gpu_dependencies.sh
+++ b/Tools/machines/perlmutter-nersc/install_gpu_dependencies.sh
@@ -109,6 +109,44 @@ CXX=$(which CC) CXXFLAGS="-DLAPACK_FORTRAN_ADD_" cmake -S $HOME/src/lapackpp -B
 cmake --build ${build_dir}/lapackpp-pm-gpu-build --target install --parallel 16
 rm -rf ${build_dir}/lapackpp-pm-gpu-build
 
+# heFFTe
+if [ -d $HOME/src/heffte ]
+then
+  cd $HOME/src/heffte
+  git fetch --prune
+  git checkout v2.4.0
+  cd -
+else
+  git clone -b v2.4.0 https://github.com/icl-utk-edu/heffte.git ${HOME}/src/heffte
+fi
+rm -rf ${HOME}/src/heffte-pm-gpu-build
+cmake \
+    -S ${HOME}/src/heffte               \
+    -B ${build_dir}/heffte-pm-gpu-build \
+    -DBUILD_SHARED_LIBS=ON              \
+    -DCMAKE_BUILD_TYPE=Release          \
+    -DCMAKE_CXX_STANDARD=17             \
+    -DCMAKE_INSTALL_RPATH_USE_LINK_PATH=ON  \
+    -DCMAKE_INSTALL_PREFIX=${SW_DIR}/heffte-2.4.0  \
+    -DHeffte_DISABLE_GPU_AWARE_MPI=OFF  \
+    -DHeffte_ENABLE_AVX=OFF             \
+    -DHeffte_ENABLE_AVX512=OFF          \
+    -DHeffte_ENABLE_FFTW=OFF            \
+    -DHeffte_ENABLE_CUDA=ON             \
+    -DHeffte_ENABLE_ROCM=OFF            \
+    -DHeffte_ENABLE_ONEAPI=OFF          \
+    -DHeffte_ENABLE_MKL=OFF             \
+    -DHeffte_ENABLE_DOXYGEN=OFF         \
+    -DHeffte_SEQUENTIAL_TESTING=OFF     \
+    -DHeffte_ENABLE_TESTING=OFF         \
+    -DHeffte_ENABLE_TRACING=OFF         \
+    -DHeffte_ENABLE_PYTHON=OFF          \
+    -DHeffte_ENABLE_FORTRAN=OFF         \
+    -DHeffte_ENABLE_SWIG=OFF            \
+    -DHeffte_ENABLE_MAGMA=OFF
+cmake --build ${build_dir}/heffte-pm-gpu-build --target install --parallel 16
+rm -rf ${build_dir}/heffte-pm-gpu-build
+
 
 # Python ######################################################################
 #
diff --git a/Tools/machines/perlmutter-nersc/perlmutter_cpu_warpx.profile.example b/Tools/machines/perlmutter-nersc/perlmutter_cpu_warpx.profile.example
index 1b0ac3182d5..3fea9b3aa39 100644
--- a/Tools/machines/perlmutter-nersc/perlmutter_cpu_warpx.profile.example
+++ b/Tools/machines/perlmutter-nersc/perlmutter_cpu_warpx.profile.example
@@ -19,11 +19,13 @@ export CMAKE_PREFIX_PATH=${CFS}/${proj}/${USER}/sw/perlmutter/cpu/c-blosc-1.21.1
 export CMAKE_PREFIX_PATH=${CFS}/${proj}/${USER}/sw/perlmutter/cpu/adios2-2.8.3:$CMAKE_PREFIX_PATH
 export CMAKE_PREFIX_PATH=${CFS}/${proj}/${USER}/sw/perlmutter/cpu/blaspp-master:$CMAKE_PREFIX_PATH
 export CMAKE_PREFIX_PATH=${CFS}/${proj}/${USER}/sw/perlmutter/cpu/lapackpp-master:$CMAKE_PREFIX_PATH
+export CMAKE_PREFIX_PATH=${CFS}/${proj}/${USER}/sw/perlmutter/cpu/heffte-2.4.0:$CMAKE_PREFIX_PATH
 
 export LD_LIBRARY_PATH=${CFS}/${proj}/${USER}/sw/perlmutter/cpu/c-blosc-1.21.1/lib64:$LD_LIBRARY_PATH
 export LD_LIBRARY_PATH=${CFS}/${proj}/${USER}/sw/perlmutter/cpu/adios2-2.8.3/lib64:$LD_LIBRARY_PATH
 export LD_LIBRARY_PATH=${CFS}/${proj}/${USER}/sw/perlmutter/cpu/blaspp-master/lib64:$LD_LIBRARY_PATH
 export LD_LIBRARY_PATH=${CFS}/${proj}/${USER}/sw/perlmutter/cpu/lapackpp-master/lib64:$LD_LIBRARY_PATH
+export LD_LIBRARY_PATH=${CFS}/${proj}/${USER}/sw/perlmutter/cpu/heffte-2.4.0/lib64:$LD_LIBRARY_PATH
 
 export PATH=${CFS}/${proj}/${USER}/sw/perlmutter/cpu/adios2-2.8.3/bin:${PATH}
 
diff --git a/Tools/machines/perlmutter-nersc/perlmutter_gpu_warpx.profile.example b/Tools/machines/perlmutter-nersc/perlmutter_gpu_warpx.profile.example
index 759df0b923a..9e1465d6c02 100644
--- a/Tools/machines/perlmutter-nersc/perlmutter_gpu_warpx.profile.example
+++ b/Tools/machines/perlmutter-nersc/perlmutter_gpu_warpx.profile.example
@@ -23,11 +23,13 @@ export CMAKE_PREFIX_PATH=${CFS}/${proj%_g}/${USER}/sw/perlmutter/gpu/c-blosc-1.2
 export CMAKE_PREFIX_PATH=${CFS}/${proj%_g}/${USER}/sw/perlmutter/gpu/adios2-2.8.3:$CMAKE_PREFIX_PATH
 export CMAKE_PREFIX_PATH=${CFS}/${proj%_g}/${USER}/sw/perlmutter/gpu/blaspp-master:$CMAKE_PREFIX_PATH
 export CMAKE_PREFIX_PATH=${CFS}/${proj%_g}/${USER}/sw/perlmutter/gpu/lapackpp-master:$CMAKE_PREFIX_PATH
+export CMAKE_PREFIX_PATH=${CFS}/${proj%_g}/${USER}/sw/perlmutter/gpu/heffte-2.4.0:$CMAKE_PREFIX_PATH
 
 export LD_LIBRARY_PATH=${CFS}/${proj%_g}/${USER}/sw/perlmutter/gpu/c-blosc-1.21.1/lib64:$LD_LIBRARY_PATH
 export LD_LIBRARY_PATH=${CFS}/${proj%_g}/${USER}/sw/perlmutter/gpu/adios2-2.8.3/lib64:$LD_LIBRARY_PATH
 export LD_LIBRARY_PATH=${CFS}/${proj%_g}/${USER}/sw/perlmutter/gpu/blaspp-master/lib64:$LD_LIBRARY_PATH
 export LD_LIBRARY_PATH=${CFS}/${proj%_g}/${USER}/sw/perlmutter/gpu/lapackpp-master/lib64:$LD_LIBRARY_PATH
+export LD_LIBRARY_PATH=${CFS}/${proj%_g}/${USER}/sw/perlmutter/gpu/heffte-2.4.0/lib64:$LD_LIBRARY_PATH
 
 export PATH=${CFS}/${proj%_g}/${USER}/sw/perlmutter/gpu/adios2-2.8.3/bin:${PATH}
 
diff --git a/cmake/WarpXFunctions.cmake b/cmake/WarpXFunctions.cmake
index a68ebca6c60..c12cedca3b2 100644
--- a/cmake/WarpXFunctions.cmake
+++ b/cmake/WarpXFunctions.cmake
@@ -453,6 +453,7 @@ function(warpx_print_summary)
     message("    PARTICLE PRECISION: ${WarpX_PARTICLE_PRECISION}")
     message("    PRECISION: ${WarpX_PRECISION}")
     message("    FFT Solvers: ${WarpX_FFT}")
+    message("    heFFTe: ${WarpX_HEFFTE}")
     message("    PYTHON: ${WarpX_PYTHON}")
     if(WarpX_PYTHON)
         message("    PYTHON IPO: ${WarpX_PYTHON_IPO}")
diff --git a/setup.py b/setup.py
index 670e376986f..6ad6fd81960 100644
--- a/setup.py
+++ b/setup.py
@@ -97,6 +97,7 @@ def build_extension(self, ext):
             '-DWarpX_PRECISION=' + WARPX_PRECISION,
             '-DWarpX_PARTICLE_PRECISION=' + WARPX_PARTICLE_PRECISION,
             '-DWarpX_FFT:BOOL=' + WARPX_FFT,
+            '-DWarpX_HEFFTE:BOOL=' + WARPX_HEFFTE,
             '-DWarpX_PYTHON:BOOL=ON',
             '-DWarpX_PYTHON_IPO:BOOL=' + WARPX_PYTHON_IPO,
             '-DWarpX_QED:BOOL=' + WARPX_QED,
@@ -206,6 +207,7 @@ def build_extension(self, ext):
 WARPX_PRECISION = env.pop('WARPX_PRECISION', 'DOUBLE')
 WARPX_PARTICLE_PRECISION = env.pop('WARPX_PARTICLE_PRECISION', WARPX_PRECISION)
 WARPX_FFT = env.pop('WARPX_FFT', 'OFF')
+WARPX_HEFFTE = env.pop('WARPX_HEFFTE', 'OFF')
 WARPX_QED = env.pop('WARPX_QED', 'ON')
 WARPX_QED_TABLE_GEN = env.pop('WARPX_QED_TABLE_GEN', 'OFF')
 WARPX_DIMS = env.pop('WARPX_DIMS', '1;2;RZ;3')

From 828332089e58ef7686a8a8753062eb505514cd7e Mon Sep 17 00:00:00 2001
From: Axel Huebl <axel.huebl@plasma.ninja>
Date: Tue, 18 Jun 2024 00:25:51 -0700
Subject: [PATCH 11/11] Resetting collisionXYY Temperature (#4999)

Resetting after a recent change. Might need more investigation.
---
 Regression/Checksum/benchmarks_json/collisionXYZ.json | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Regression/Checksum/benchmarks_json/collisionXYZ.json b/Regression/Checksum/benchmarks_json/collisionXYZ.json
index 6e4b9abf965..c87bbd98e42 100644
--- a/Regression/Checksum/benchmarks_json/collisionXYZ.json
+++ b/Regression/Checksum/benchmarks_json/collisionXYZ.json
@@ -6,7 +6,7 @@
     "Ex": 0.0,
     "Ey": 0.0,
     "Ez": 0.0,
-    "T_electron": 362230.52300397365,
-    "T_ion": 338312.83502136066
+    "T_electron": 358778.6506903592,
+    "T_ion": 341562.3085776466
   }
 }