diff --git a/src/core/electrostatics/coulomb.cpp b/src/core/electrostatics/coulomb.cpp index 14282a8b17..cd5dfa8766 100644 --- a/src/core/electrostatics/coulomb.cpp +++ b/src/core/electrostatics/coulomb.cpp @@ -145,7 +145,7 @@ Solver::calc_pressure_long_range(ParticleRange const &particles) const { struct ShortRangeCutoff { #ifdef P3M auto operator()(std::shared_ptr const &actor) const { - return actor->p3m.params.r_cut; + return actor->p3m_params.r_cut; } auto operator()(std::shared_ptr const &actor) const { diff --git a/src/core/electrostatics/elc.cpp b/src/core/electrostatics/elc.cpp index 28f4f5717b..a7988e84db 100644 --- a/src/core/electrostatics/elc.cpp +++ b/src/core/electrostatics/elc.cpp @@ -1011,7 +1011,8 @@ void ElectrostaticLayerCorrection::adapt_solver() { std::visit( [this](auto &solver) { set_prefactor(solver->prefactor); - solver->p3m.params.epsilon = P3M_EPSILON_METALLIC; + solver->adapt_epsilon_elc(); + assert(solver->p3m_params.epsilon == P3M_EPSILON_METALLIC); }, base_solver); } @@ -1031,7 +1032,7 @@ void ElectrostaticLayerCorrection::recalc_box_h() { void ElectrostaticLayerCorrection::recalc_space_layer() { if (elc.dielectric_contrast_on) { auto const p3m_r_cut = std::visit( - [](auto &solver) { return solver->p3m.params.r_cut; }, base_solver); + [](auto &solver) { return solver->p3m_params.r_cut; }, base_solver); // recalculate the space layer size: // 1. set the space_layer to be 1/3 of the gap size, so that box = layer elc.space_layer = (1. / 3.) * elc.gap_size; diff --git a/src/core/electrostatics/p3m.cpp b/src/core/electrostatics/p3m.cpp index 2475b4954c..08cef7bd74 100644 --- a/src/core/electrostatics/p3m.cpp +++ b/src/core/electrostatics/p3m.cpp @@ -90,6 +90,10 @@ #include #include +#ifdef FFTW3_H +#error "The FFTW3 library shouldn't be visible in this translation unit" +#endif + template void CoulombP3MImpl::count_charged_particles() { auto local_n = 0; @@ -262,8 +266,11 @@ void CoulombP3MImpl::init_cpu_kernels() { assert(p3m.fft); p3m.local_mesh.calc_local_ca_mesh(p3m.params, local_geo, skin, elc_layer); - p3m.fft->init_halo(); - p3m.fft->init_fft(); + p3m.fft_buffers->init_halo(); + p3m.fft->init(p3m.params); + p3m.mesh.ks_pnum = p3m.fft->get_ks_pnum(); + p3m.fft_buffers->init_meshes(p3m.fft->get_ca_mesh_size()); + p3m.update_mesh_views(); p3m.calc_differential_operator(); /* fix box length dependent constants */ @@ -391,8 +398,9 @@ Utils::Vector9d CoulombP3MImpl::long_range_pressure( if (p3m.sum_q2 > 0.) { charge_assign(particles); - p3m.fft->perform_scalar_halo_gather(); - p3m.fft->perform_scalar_fwd_fft(); + p3m.fft_buffers->perform_scalar_halo_gather(); + p3m.fft->forward_fft(p3m.fft_buffers->get_scalar_mesh()); + p3m.update_mesh_views(); auto constexpr mesh_start = Utils::Vector3i::broadcast(0); auto const &mesh_stop = p3m.mesh.size; @@ -457,8 +465,9 @@ double CoulombP3MImpl::long_range_kernel( system.coulomb.impl->solver)) { charge_assign(particles); } - p3m.fft->perform_scalar_halo_gather(); - p3m.fft->perform_scalar_fwd_fft(); + p3m.fft_buffers->perform_scalar_halo_gather(); + p3m.fft->forward_fft(p3m.fft_buffers->get_scalar_mesh()); + p3m.update_mesh_views(); } auto p_q_range = ParticlePropertyRange::charge_range(particles); @@ -515,8 +524,10 @@ double CoulombP3MImpl::long_range_kernel( auto const check_residuals = not p3m.params.tuning and check_complex_residuals; p3m.fft->check_complex_residuals = check_residuals; - p3m.fft->perform_vector_back_fft(); - p3m.fft->perform_vector_halo_spread(); + for (auto &rs_mesh : p3m.fft_buffers->get_vector_mesh()) { + p3m.fft->backward_fft(rs_mesh); + } + p3m.fft_buffers->perform_vector_halo_spread(); p3m.fft->check_complex_residuals = false; auto const force_prefac = prefactor / volume; @@ -785,25 +796,25 @@ void CoulombP3M::sanity_checks_boxl() const { auto const &local_geo = *system.local_geo; for (auto i = 0u; i < 3u; i++) { /* check k-space cutoff */ - if (p3m.params.cao_cut[i] >= box_geo.length_half()[i]) { + if (p3m_params.cao_cut[i] >= box_geo.length_half()[i]) { std::stringstream msg; - msg << "P3M_init: k-space cutoff " << p3m.params.cao_cut[i] + msg << "P3M_init: k-space cutoff " << p3m_params.cao_cut[i] << " is larger than half of box dimension " << box_geo.length()[i]; throw std::runtime_error(msg.str()); } - if (p3m.params.cao_cut[i] >= local_geo.length()[i]) { + if (p3m_params.cao_cut[i] >= local_geo.length()[i]) { std::stringstream msg; - msg << "P3M_init: k-space cutoff " << p3m.params.cao_cut[i] + msg << "P3M_init: k-space cutoff " << p3m_params.cao_cut[i] << " is larger than local box dimension " << local_geo.length()[i]; throw std::runtime_error(msg.str()); } } - if (p3m.params.epsilon != P3M_EPSILON_METALLIC) { + if (p3m_params.epsilon != P3M_EPSILON_METALLIC) { if ((box_geo.length()[0] != box_geo.length()[1]) or (box_geo.length()[1] != box_geo.length()[2]) or - (p3m.params.mesh[0] != p3m.params.mesh[1]) or - (p3m.params.mesh[1] != p3m.params.mesh[2])) { + (p3m_params.mesh[0] != p3m_params.mesh[1]) or + (p3m_params.mesh[1] != p3m_params.mesh[2])) { throw std::runtime_error( "CoulombP3M: non-metallic epsilon requires cubic box"); } @@ -841,7 +852,8 @@ void CoulombP3M::sanity_checks_node_grid() const { } } -void CoulombP3M::scaleby_box_l() { +template +void CoulombP3MImpl::scaleby_box_l() { auto const &box_geo = *get_system().box_geo; p3m.params.r_cut = p3m.params.r_cut_iL * box_geo.length()[0]; p3m.params.alpha = p3m.params.alpha_L * box_geo.length_inv()[0]; diff --git a/src/core/electrostatics/p3m.hpp b/src/core/electrostatics/p3m.hpp index f7d7fb1995..773e2da83b 100644 --- a/src/core/electrostatics/p3m.hpp +++ b/src/core/electrostatics/p3m.hpp @@ -50,38 +50,19 @@ #include #include -#include /** @brief P3M solver. */ struct CoulombP3M : public Coulomb::Actor { - /** @brief Coulomb P3M parameters. */ - p3m_data_struct &p3m; - - int tune_timings; - bool tune_verbose; - bool check_complex_residuals; - -protected: - bool m_is_tuned; + P3MParameters const &p3m_params; public: - CoulombP3M(p3m_data_struct &p3m_data, double prefactor, int tune_timings, - bool tune_verbose, bool check_complex_residuals) - : p3m{p3m_data}, tune_timings{tune_timings}, tune_verbose{tune_verbose}, - check_complex_residuals{check_complex_residuals} { - - if (tune_timings <= 0) { - throw std::domain_error("Parameter 'timings' must be > 0"); - } - m_is_tuned = not p3m.params.tuning; - p3m.params.tuning = false; - set_prefactor(prefactor); - } + CoulombP3M(P3MParameters const &p3m_params) : p3m_params{p3m_params} {} virtual ~CoulombP3M() = default; - [[nodiscard]] bool is_tuned() const { return m_is_tuned; } + [[nodiscard]] virtual bool is_tuned() const noexcept = 0; [[nodiscard]] virtual bool is_gpu() const noexcept = 0; + [[nodiscard]] virtual bool is_double_precision() const noexcept = 0; /** @brief Recalculate all derived parameters. */ virtual void init() = 0; @@ -109,6 +90,7 @@ struct CoulombP3M : public Coulomb::Actor { */ virtual void count_charged_particles() = 0; virtual void count_charged_particles_elc(int, double, double) = 0; + virtual void adapt_epsilon_elc() = 0; /** * @brief Tune P3M parameters to desired accuracy. @@ -119,7 +101,7 @@ struct CoulombP3M : public Coulomb::Actor { * @ref P3MParameters::r_cut_iL "r_cut_iL" and * @ref P3MParameters::alpha_L "alpha_L" are tuned to obtain the target * @ref P3MParameters::accuracy "accuracy" in optimal time. - * These parameters are stored in the @ref p3m object. + * These parameters are stored in the @ref p3m_params object. * * The function utilizes the analytic expression of the error estimate * for the P3M method in @cite hockney88a (eq. (8.23)) in @@ -163,10 +145,10 @@ struct CoulombP3M : public Coulomb::Actor { /** Calculate real-space contribution of p3m Coulomb pair forces. */ Utils::Vector3d pair_force(double q1q2, Utils::Vector3d const &d, double dist) const { - if ((q1q2 == 0.) || dist >= p3m.params.r_cut || dist <= 0.) { + if ((q1q2 == 0.) || dist >= p3m_params.r_cut || dist <= 0.) { return {}; } - auto const alpha = p3m.params.alpha; + auto const alpha = p3m_params.alpha; auto const adist = alpha * dist; auto const exp_adist_sq = exp(-adist * adist); auto const dist_sq = dist * dist; @@ -184,10 +166,10 @@ struct CoulombP3M : public Coulomb::Actor { /** Calculate real-space contribution of Coulomb pair energy. */ // Eq. (3.6) @cite deserno00b double pair_energy(double q1q2, double dist) const { - if ((q1q2 == 0.) || dist >= p3m.params.r_cut || dist <= 0.) { + if ((q1q2 == 0.) || dist >= p3m_params.r_cut || dist <= 0.) { return {}; } - auto const adist = p3m.params.alpha * dist; + auto const adist = p3m_params.alpha * dist; #if USE_ERFC_APPROXIMATION auto const erfc_part_ri = Utils::AS_erfc_part(adist) / dist; return prefactor * q1q2 * erfc_part_ri * exp(-adist * adist); @@ -216,7 +198,7 @@ struct CoulombP3M : public Coulomb::Actor { void sanity_checks_periodicity() const; void sanity_checks_cell_structure() const; - virtual void scaleby_box_l(); + virtual void scaleby_box_l() = 0; }; #endif // P3M diff --git a/src/core/electrostatics/p3m.impl.hpp b/src/core/electrostatics/p3m.impl.hpp index 7ac688ab30..8f8fb6d92f 100644 --- a/src/core/electrostatics/p3m.impl.hpp +++ b/src/core/electrostatics/p3m.impl.hpp @@ -19,19 +19,6 @@ * along with this program. If not, see . */ -/** @file - * P3M algorithm for long-range Coulomb interaction. - * - * We use a P3M (Particle-Particle Particle-Mesh) method based on the - * Ewald summation. Details of the used method can be found in - * @cite hockney88a and @cite deserno98a @cite deserno98b. - * - * Further reading: @cite ewald21a, @cite hockney88a, @cite deserno98a, - * @cite deserno98b, @cite deserno00e, @cite deserno00b, @cite cerda08d. - * - * Implementation in p3m.cpp. - */ - #pragma once #include "config/config.hpp" @@ -49,11 +36,13 @@ #include #include +#include +#include #include template -struct p3m_data_struct_coulomb : public p3m_data_struct_fft { - using p3m_data_struct_fft::p3m_data_struct_fft; +struct p3m_data_struct_coulomb : public p3m_data_struct { + using p3m_data_struct::p3m_data_struct; /** number of charged particles (only on head node). */ int sum_qpart = 0; @@ -73,17 +62,34 @@ template struct CoulombP3MImpl : public CoulombP3M { ~CoulombP3MImpl() override = default; + /** @brief Coulomb P3M parameters. */ + p3m_data_struct_coulomb &p3m; + +private: /** @brief Coulomb P3M meshes and FFT algorithm. */ std::unique_ptr> p3m_impl; - // this member overshadows its own base class pointer in the parent class - p3m_data_struct_coulomb &p3m; + int tune_timings; + bool tune_verbose; + bool check_complex_residuals; + bool m_is_tuned; - template +public: CoulombP3MImpl( std::unique_ptr> &&p3m_handle, - Args &&...args) - : CoulombP3M(*p3m_handle, std::forward(args)...), - p3m_impl{std::move(p3m_handle)}, p3m{*p3m_impl} {} + double prefactor, int tune_timings, bool tune_verbose, + bool check_complex_residuals) + : CoulombP3M(p3m_handle->params), p3m{*p3m_handle}, + p3m_impl{std::move(p3m_handle)}, tune_timings{tune_timings}, + tune_verbose{tune_verbose}, + check_complex_residuals{check_complex_residuals} { + + if (tune_timings <= 0) { + throw std::domain_error("Parameter 'timings' must be > 0"); + } + m_is_tuned = not p3m.params.tuning; + p3m.params.tuning = false; + set_prefactor(prefactor); + } void init() override { if constexpr (Architecture == Arch::CPU) { @@ -103,10 +109,17 @@ struct CoulombP3MImpl : public CoulombP3M { p3m.sum_q2 = sum_q2; p3m.square_sum_q = square_sum_q; } + void adapt_epsilon_elc() override { + p3m.params.epsilon = P3M_EPSILON_METALLIC; + } + [[nodiscard]] bool is_tuned() const noexcept override { return m_is_tuned; } [[nodiscard]] bool is_gpu() const noexcept override { return Architecture == Arch::GPU; } + [[nodiscard]] bool is_double_precision() const noexcept override { + return std::is_same_v; + } void on_activation() override { #ifdef CUDA @@ -160,6 +173,7 @@ struct CoulombP3MImpl : public CoulombP3M { ParticleRange const &particles); void calc_influence_function_force() override; void calc_influence_function_energy() override; + void scaleby_box_l() override; void init_cpu_kernels(); #ifdef CUDA void init_gpu_kernels(); @@ -170,13 +184,15 @@ struct CoulombP3MImpl : public CoulombP3M { }; template class FFTBackendImpl, class... Args> + template class FFTBackendImpl, + template class P3MFFTMeshImpl, class... Args> std::shared_ptr new_p3m_handle(P3MParameters &&p3m, Args &&...args) { auto obj = std::make_shared>( std::make_unique>(std::move(p3m)), std::forward(args)...); - obj->p3m.template make_fft_instance>(false); + obj->p3m.template make_mesh_instance>(); + obj->p3m.template make_fft_instance>(); return obj; } diff --git a/src/core/fft/fft.cpp b/src/core/fft/fft.cpp index 9dad7b9aa5..89b05bc386 100644 --- a/src/core/fft/fft.cpp +++ b/src/core/fft/fft.cpp @@ -28,6 +28,8 @@ #include "fft.hpp" #include "vector.hpp" +#include "p3m/packing.hpp" + #include #include #include @@ -394,7 +396,7 @@ void fft_data_struct::forw_grid_comm( FloatType const *in, FloatType *out) { for (std::size_t i = 0ul; i < plan.group.size(); i++) { plan.pack_function(in, send_buf.data(), &(plan.send_block[6ul * i]), - &(plan.send_block[6ul * i + 3ul]), plan.old_mesh, + &(plan.send_block[6ul * i + 3ul]), plan.old_mesh.data(), plan.element); if (plan.group[i] != comm.rank()) { @@ -405,7 +407,7 @@ void fft_data_struct::forw_grid_comm( std::swap(send_buf, recv_buf); } fft_unpack_block(recv_buf.data(), out, &(plan.recv_block[6ul * i]), - &(plan.recv_block[6ul * i + 3ul]), plan.new_mesh, + &(plan.recv_block[6ul * i + 3ul]), plan.new_mesh.data(), plan.element); } } @@ -429,8 +431,8 @@ void fft_data_struct::back_grid_comm( for (std::size_t i = 0ul; i < plan_f.group.size(); i++) { plan_b.pack_function(in, send_buf.data(), &(plan_f.recv_block[6ul * i]), - &(plan_f.recv_block[6ul * i + 3ul]), plan_f.new_mesh, - plan_f.element); + &(plan_f.recv_block[6ul * i + 3ul]), + plan_f.new_mesh.data(), plan_f.element); if (plan_f.group[i] != comm.rank()) { /* send first, receive second */ fft_sendrecv(send_buf.data(), plan_f.recv_size[i], plan_f.group[i], @@ -440,8 +442,8 @@ void fft_data_struct::back_grid_comm( std::swap(send_buf, recv_buf); } fft_unpack_block(recv_buf.data(), out, &(plan_f.send_block[6ul * i]), - &(plan_f.send_block[6ul * i + 3ul]), plan_f.old_mesh, - plan_f.element); + &(plan_f.send_block[6ul * i + 3ul]), + plan_f.old_mesh.data(), plan_f.element); } } @@ -596,9 +598,9 @@ int fft_data_struct::initialize_fft( forw[i].new_size = calc_local_mesh( my_pos[i], n_grid[i], global_mesh_dim.data(), global_mesh_off.data(), - forw[i].new_mesh, forw[i].start); - permute_ifield(forw[i].new_mesh, 3, -(forw[i].n_permute)); - permute_ifield(forw[i].start, 3, -(forw[i].n_permute)); + forw[i].new_mesh.data(), forw[i].start.data()); + permute_ifield(forw[i].new_mesh.data(), 3, -(forw[i].n_permute)); + permute_ifield(forw[i].start.data(), 3, -(forw[i].n_permute)); forw[i].n_ffts = forw[i].new_mesh[0] * forw[i].new_mesh[1]; /* === send/recv block specifications === */ @@ -782,58 +784,6 @@ void fft_data_struct::backward_fft( /* REMARK: Result has to be in data. */ } -template -void fft_pack_block(FloatType const *const in, FloatType *const out, - int const start[3], int const size[3], int const dim[3], - int element) { - - auto const copy_size = - static_cast(element * size[2]) * sizeof(FloatType); - /* offsets for indices in input grid */ - auto const m_in_offset = element * dim[2]; - auto const s_in_offset = element * (dim[2] * (dim[1] - size[1])); - /* offsets for indices in output grid */ - auto const m_out_offset = element * size[2]; - /* linear index of input grid, linear index of output grid */ - int li_in = element * (start[2] + dim[2] * (start[1] + dim[1] * start[0])); - int li_out = 0; - - for (int s = 0; s < size[0]; s++) { - for (int m = 0; m < size[1]; m++) { - memmove(&(out[li_out]), &(in[li_in]), copy_size); - li_in += m_in_offset; - li_out += m_out_offset; - } - li_in += s_in_offset; - } -} - -template -void fft_unpack_block(FloatType const *const in, FloatType *const out, - int const start[3], int const size[3], int const dim[3], - int element) { - - auto const copy_size = - static_cast(element * size[2]) * sizeof(FloatType); - /* offsets for indices in output grid */ - auto const m_out_offset = element * dim[2]; - auto const s_out_offset = element * (dim[2] * (dim[1] - size[1])); - /* offset for indices in input grid */ - auto const m_in_offset = element * size[2]; - /* linear index of in grid, linear index of out grid */ - int li_in = 0; - int li_out = element * (start[2] + dim[2] * (start[1] + dim[1] * start[0])); - - for (int s = 0; s < size[0]; s++) { - for (int m = 0; m < size[1]; m++) { - memmove(&(out[li_out]), &(in[li_in]), copy_size); - li_in += m_in_offset; - li_out += m_out_offset; - } - li_out += s_out_offset; - } -} - template void fft_plan::destroy_plan() { if (plan_handle) { fftw::destroy_plan(plan_handle); @@ -877,17 +827,4 @@ template struct fft_back_plan; template struct fft_data_struct; template struct fft_data_struct; -template void fft_pack_block(float const *in, float *out, - int const start[3], int const size[3], - int const dim[3], int element); -template void fft_pack_block(double const *in, double *out, - int const start[3], int const size[3], - int const dim[3], int element); -template void fft_unpack_block(float const *in, float *out, - int const start[3], int const size[3], - int const dim[3], int element); -template void fft_unpack_block(double const *in, double *out, - int const start[3], int const size[3], - int const dim[3], int element); - } // namespace fft diff --git a/src/core/fft/fft.hpp b/src/core/fft/fft.hpp index 5c3afe58d1..a462a958c3 100644 --- a/src/core/fft/fft.hpp +++ b/src/core/fft/fft.hpp @@ -88,11 +88,11 @@ struct fft_forw_plan : public fft_plan { int n_ffts; /** size of local mesh before communication. */ - int old_mesh[3]; + std::array old_mesh; /** size of local mesh after communication, also used for actual FFT. */ - int new_mesh[3]; + std::array new_mesh; /** lower left point of local FFT mesh in global FFT mesh coordinates. */ - int start[3]; + std::array start; /** size of new mesh (number of mesh points). */ int new_size; @@ -197,9 +197,9 @@ template struct fft_data_struct { void backward_fft(boost::mpi::communicator const &comm, FloatType *data, bool check_complex); - auto get_mesh_size() const { return forw[3u].new_mesh; } + auto const &get_mesh_size() const { return forw[3u].new_mesh; } - auto get_mesh_start() const { return forw[3u].start; } + auto const &get_mesh_start() const { return forw[3u].start; } private: void forw_grid_comm(boost::mpi::communicator const &comm, @@ -211,43 +211,6 @@ template struct fft_data_struct { FloatType const *in, FloatType *out); }; -/** Pack a block (size[3] starting at start[3]) of an input - * 3d-grid with dimension dim[3] into an output 3d-block with - * dimension size[3]. - * - * The block with dimensions (size[0], size[1], size[2]) is stored - * in 'row-major-order' or 'C-order', that means the first index is - * changing slowest when running through the linear array. The - * element (@c i0 (slow), @c i1 (mid), @c i2 (fast)) has the linear index - * li = i2 + size[2] * (i1 + (size[1] * i0)). - * - * \param[in] in pointer to input 3d-grid. - * \param[out] out pointer to output 3d-grid (block). - * \param[in] start start index of the block in the in-grid. - * \param[in] size size of the block (=dimension of the out-grid). - * \param[in] dim size of the in-grid. - * \param[in] element size of a grid element (e.g. 1 for Real, 2 for Complex). - */ -template -void fft_pack_block(FloatType const *in, FloatType *out, int const start[3], - int const size[3], int const dim[3], int element); - -/** Unpack a 3d-grid input block (size[3]) into an output 3d-grid - * with dimension dim[3] at start position start[3]. - * - * see also \ref fft_pack_block. - * - * \param[in] in pointer to input 3d-grid. - * \param[out] out pointer to output 3d-grid (block). - * \param[in] start start index of the block in the in-grid. - * \param[in] size size of the block (=dimension of the out-grid). - * \param[in] dim size of the in-grid. - * \param[in] element size of a grid element (e.g. 1 for Real, 2 for Complex). - */ -template -void fft_unpack_block(FloatType const *in, FloatType *out, int const start[3], - int const size[3], int const dim[3], int element); - int map_3don2d_grid(int const g3d[3], int g2d[3]); std::optional> find_comm_groups(Utils::Vector3i const &, diff --git a/src/core/magnetostatics/dipoles.cpp b/src/core/magnetostatics/dipoles.cpp index 5b9e3c98a5..d29a1ac714 100644 --- a/src/core/magnetostatics/dipoles.cpp +++ b/src/core/magnetostatics/dipoles.cpp @@ -91,7 +91,7 @@ double Solver::cutoff() const { #ifdef DP3M if (impl->solver) { if (auto dp3m = get_actor_by_type(impl->solver)) { - return dp3m->dp3m.params.r_cut; + return dp3m->dp3m_params.r_cut; } } #endif diff --git a/src/core/magnetostatics/dlc.cpp b/src/core/magnetostatics/dlc.cpp index e327a11d0f..15d820d3c8 100644 --- a/src/core/magnetostatics/dlc.cpp +++ b/src/core/magnetostatics/dlc.cpp @@ -465,7 +465,7 @@ struct AdaptSolver { #ifdef DP3M void operator()(std::shared_ptr const &solver) { m_actor->prefactor = solver->prefactor; - m_actor->epsilon = solver->dp3m.params.epsilon; + m_actor->epsilon = solver->dp3m_params.epsilon; } #endif diff --git a/src/core/magnetostatics/dp3m.cpp b/src/core/magnetostatics/dp3m.cpp index 68e409b316..8855a8e4d8 100644 --- a/src/core/magnetostatics/dp3m.cpp +++ b/src/core/magnetostatics/dp3m.cpp @@ -76,6 +76,10 @@ #include #include +#ifdef FFTW3_H +#error "The FFTW3 library shouldn't be visible in this translation unit" +#endif + template void DipolarP3MImpl::count_magnetic_particles() { int local_n = 0; @@ -136,8 +140,11 @@ void DipolarP3MImpl::init_cpu_kernels() { assert(dp3m.fft); dp3m.local_mesh.calc_local_ca_mesh(dp3m.params, local_geo, verlet_skin, 0.); - dp3m.fft->init_halo(); - dp3m.fft->init_fft(); + dp3m.fft_buffers->init_halo(); + dp3m.fft->init(dp3m.params); + dp3m.mesh.ks_pnum = dp3m.fft->get_ks_pnum(); + dp3m.fft_buffers->init_meshes(dp3m.fft->get_ca_mesh_size()); + dp3m.update_mesh_views(); dp3m.calc_differential_operator(); /* fix box length dependent constants */ @@ -253,8 +260,11 @@ double DipolarP3MImpl::long_range_kernel( if (dp3m.sum_mu2 > 0.) { dipole_assign(particles); - dp3m.fft->perform_vector_halo_gather(); - dp3m.fft->perform_vector_fwd_fft(); + dp3m.fft_buffers->perform_vector_halo_gather(); + for (auto &rs_mesh : dp3m.fft_buffers->get_vector_mesh()) { + dp3m.fft->forward_fft(rs_mesh); + } + dp3m.update_mesh_views(); } /* === k-space energy calculation === */ @@ -323,8 +333,9 @@ double DipolarP3MImpl::long_range_kernel( auto &mesh_dip = dp3m.mesh.rs_fields; auto indices = Utils::Vector3i{}; auto index = std::size_t(0u); + dp3m.ks_scalar.resize(dp3m.mesh.rs_scalar.size()); - /* fill in mesh.ks_scalar array for torque calculation */ + /* fill in ks_scalar array for torque calculation */ auto it_energy = dp3m.g_energy.begin(); index = 0u; for_each_3d(mesh_start, mesh_stop, indices, [&]() { @@ -333,13 +344,13 @@ double DipolarP3MImpl::long_range_kernel( auto const re = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) + mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) + mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]); - dp3m.mesh.ks_scalar[index] = *it_energy * re; + dp3m.ks_scalar[index] = *it_energy * re; ++index; // Im(mu)*k auto const im = mesh_dip[0u][index] * FloatType(d_op[shift[KX]]) + mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) + mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]); - dp3m.mesh.ks_scalar[index] = *it_energy * im; + dp3m.ks_scalar[index] = *it_energy * im; ++index; std::advance(it_energy, 1); }); @@ -349,13 +360,13 @@ double DipolarP3MImpl::long_range_kernel( index = 0u; for_each_3d(mesh_start, mesh_stop, indices, [&]() { auto const d_op_val = FloatType(d_op[indices[d] + offset[d]]); - dp3m.mesh.rs_scalar[index] = d_op_val * dp3m.mesh.ks_scalar[index]; + dp3m.mesh.rs_scalar[index] = d_op_val * dp3m.ks_scalar[index]; ++index; - dp3m.mesh.rs_scalar[index] = d_op_val * dp3m.mesh.ks_scalar[index]; + dp3m.mesh.rs_scalar[index] = d_op_val * dp3m.ks_scalar[index]; ++index; }); - dp3m.fft->perform_scalar_back_fft(); - dp3m.fft->perform_scalar_halo_spread(); + dp3m.fft->backward_fft(dp3m.fft_buffers->get_scalar_mesh()); + dp3m.fft_buffers->perform_scalar_halo_spread(); /* Assign force component from mesh to particle */ auto const d_rs = (d + dp3m.mesh.ks_pnum) % 3; Utils::integral_parameter( @@ -369,7 +380,7 @@ double DipolarP3MImpl::long_range_kernel( // grids dp3m.mesh.rs_fields ! // Note: I'll do here 9 inverse FFTs. By symmetry, we can reduce this // number to 6 ! - /* fill in mesh.ks_scalar array for force calculation */ + /* fill in ks_scalar array for force calculation */ auto it_force = dp3m.g_force.begin(); index = 0u; for_each_3d(mesh_start, mesh_stop, indices, [&]() { @@ -384,8 +395,8 @@ double DipolarP3MImpl::long_range_kernel( mesh_dip[1u][index] * FloatType(d_op[shift[KY]]) + mesh_dip[2u][index] * FloatType(d_op[shift[KZ]]); ++index; - dp3m.mesh.ks_scalar[index - 2] = *it_force * im; - dp3m.mesh.ks_scalar[index - 1] = *it_force * (-re); + dp3m.ks_scalar[index - 2] = *it_force * im; + dp3m.ks_scalar[index - 1] = *it_force * (-re); std::advance(it_force, 1); }); @@ -395,19 +406,21 @@ double DipolarP3MImpl::long_range_kernel( for_each_3d(mesh_start, mesh_stop, indices, [&]() { auto const d_op_val = FloatType(d_op[indices[d] + offset[d]]); auto const shift = indices + offset; - auto const f1 = d_op_val * dp3m.mesh.ks_scalar[index]; + auto const f1 = d_op_val * dp3m.ks_scalar[index]; mesh_dip[0u][index] = FloatType(d_op[shift[KX]]) * f1; mesh_dip[1u][index] = FloatType(d_op[shift[KY]]) * f1; mesh_dip[2u][index] = FloatType(d_op[shift[KZ]]) * f1; ++index; - auto const f2 = d_op_val * dp3m.mesh.ks_scalar[index]; + auto const f2 = d_op_val * dp3m.ks_scalar[index]; mesh_dip[0u][index] = FloatType(d_op[shift[KX]]) * f2; mesh_dip[1u][index] = FloatType(d_op[shift[KY]]) * f2; mesh_dip[2u][index] = FloatType(d_op[shift[KZ]]) * f2; ++index; }); - dp3m.fft->perform_vector_back_fft(); - dp3m.fft->perform_vector_halo_spread(); + for (auto &rs_mesh : dp3m.fft_buffers->get_vector_mesh()) { + dp3m.fft->backward_fft(rs_mesh); + } + dp3m.fft_buffers->perform_vector_halo_spread(); /* Assign force component from mesh to particle */ auto const d_rs = (d + dp3m.mesh.ks_pnum) % 3; Utils::integral_parameter( @@ -435,8 +448,9 @@ double DipolarP3MImpl::long_range_kernel( return energy; } -double DipolarP3M::calc_surface_term(bool force_flag, bool energy_flag, - ParticleRange const &particles) { +template +double DipolarP3MImpl::calc_surface_term( + bool force_flag, bool energy_flag, ParticleRange const &particles) { auto const &box_geo = *get_system().box_geo; auto const pref = prefactor * 4. * std::numbers::pi / box_geo.volume() / (2. * dp3m.params.epsilon + 1.); @@ -811,15 +825,15 @@ void DipolarP3M::sanity_checks_boxl() const { auto const &local_geo = *system.local_geo; for (auto i = 0u; i < 3u; i++) { /* check k-space cutoff */ - if (dp3m.params.cao_cut[i] >= box_geo.length_half()[i]) { + if (dp3m_params.cao_cut[i] >= box_geo.length_half()[i]) { std::stringstream msg; - msg << "dipolar P3M_init: k-space cutoff " << dp3m.params.cao_cut[i] + msg << "dipolar P3M_init: k-space cutoff " << dp3m_params.cao_cut[i] << " is larger than half of box dimension " << box_geo.length()[i]; throw std::runtime_error(msg.str()); } - if (dp3m.params.cao_cut[i] >= local_geo.length()[i]) { + if (dp3m_params.cao_cut[i] >= local_geo.length()[i]) { std::stringstream msg; - msg << "dipolar P3M_init: k-space cutoff " << dp3m.params.cao_cut[i] + msg << "dipolar P3M_init: k-space cutoff " << dp3m_params.cao_cut[i] << " is larger than local box dimension " << local_geo.length()[i]; throw std::runtime_error(msg.str()); } @@ -862,7 +876,8 @@ void DipolarP3M::sanity_checks_node_grid() const { } } -void DipolarP3M::scaleby_box_l() { +template +void DipolarP3MImpl::scaleby_box_l() { auto const &box_geo = *get_system().box_geo; dp3m.params.r_cut = dp3m.params.r_cut_iL * box_geo.length()[0]; dp3m.params.alpha = dp3m.params.alpha_L * box_geo.length_inv()[0]; @@ -871,6 +886,7 @@ void DipolarP3M::scaleby_box_l() { sanity_checks_boxl(); calc_influence_function_force(); calc_influence_function_energy(); + dp3m.energy_correction = 0.; } template diff --git a/src/core/magnetostatics/dp3m.hpp b/src/core/magnetostatics/dp3m.hpp index 6909b44caf..a01624c489 100644 --- a/src/core/magnetostatics/dp3m.hpp +++ b/src/core/magnetostatics/dp3m.hpp @@ -49,7 +49,6 @@ #include #include -#include #ifdef NPT /** Update the NpT virial */ @@ -58,36 +57,16 @@ void npt_add_virial_magnetic_contribution(double energy); /** @brief Dipolar P3M solver. */ struct DipolarP3M : public Dipoles::Actor { - /** @brief Dipolar P3M parameters. */ - p3m_data_struct &dp3m; - - int tune_timings; - bool tune_verbose; - -protected: - bool m_is_tuned; + P3MParameters const &dp3m_params; public: - DipolarP3M(p3m_data_struct &dp3m_data, double prefactor, int tune_timings, - bool tune_verbose) - : dp3m{dp3m_data}, tune_timings{tune_timings}, - tune_verbose{tune_verbose} { - - if (tune_timings <= 0) { - throw std::domain_error("Parameter 'timings' must be > 0"); - } - if (dp3m.params.mesh != Utils::Vector3i::broadcast(dp3m.params.mesh[0])) { - throw std::domain_error("DipolarP3M requires a cubic mesh"); - } - m_is_tuned = not dp3m.params.tuning; - dp3m.params.tuning = false; - set_prefactor(prefactor); - } + DipolarP3M(P3MParameters const &dp3m_params) : dp3m_params{dp3m_params} {} virtual ~DipolarP3M() = default; - [[nodiscard]] bool is_tuned() const { return m_is_tuned; } + [[nodiscard]] virtual bool is_tuned() const noexcept = 0; [[nodiscard]] virtual bool is_gpu() const noexcept = 0; + [[nodiscard]] virtual bool is_double_precision() const noexcept = 0; virtual void on_activation() = 0; /** @brief Recalculate all box-length-dependent parameters. */ @@ -126,7 +105,7 @@ struct DipolarP3M : public Dipoles::Actor { * @ref P3MParameters::r_cut_iL "r_cut_iL" and * @ref P3MParameters::alpha_L "alpha_L" are tuned to obtain the target * @ref P3MParameters::accuracy "accuracy" in optimal time. - * These parameters are stored in the @ref dp3m object. + * These parameters are stored in the @ref dp3m_params object. * * The function utilizes the analytic expression of the error estimate * for the dipolar P3M method in the paper of @cite cerda08d in @@ -163,14 +142,14 @@ struct DipolarP3M : public Dipoles::Actor { inline ParticleForce pair_force(Particle const &p1, Particle const &p2, Utils::Vector3d const &d, double dist2, double dist) const { - if ((p1.dipm() == 0.) || (p2.dipm() == 0.) || dist >= dp3m.params.r_cut || + if ((p1.dipm() == 0.) || (p2.dipm() == 0.) || dist >= dp3m_params.r_cut || dist <= 0.) return {}; auto const dip1 = p1.calc_dip(); auto const dip2 = p2.calc_dip(); - auto const alpsq = dp3m.params.alpha * dp3m.params.alpha; - auto const adist = dp3m.params.alpha * dist; + auto const alpsq = dp3m_params.alpha * dp3m_params.alpha; + auto const adist = dp3m_params.alpha * dist; #if USE_ERFC_APPROXIMATION auto const erfc_part_ri = Utils::AS_erfc_part(adist) / dist; #else @@ -183,11 +162,11 @@ struct DipolarP3M : public Dipoles::Actor { auto const mir = dip1 * d; auto const mjr = dip2 * d; - auto const coeff = 2. * dp3m.params.alpha * std::numbers::inv_sqrtpi; + auto const coeff = 2. * dp3m_params.alpha * std::numbers::inv_sqrtpi; auto const dist2i = 1. / dist2; auto const exp_adist2 = exp(-Utils::sqr(adist)); - auto const B_r = (dp3m.params.accuracy > 5e-06) + auto const B_r = (dp3m_params.accuracy > 5e-06) ? (erfc_part_ri + coeff) * exp_adist2 * dist2i : (erfc(adist) / dist + coeff * exp_adist2) * dist2i; @@ -221,15 +200,15 @@ struct DipolarP3M : public Dipoles::Actor { inline double pair_energy(Particle const &p1, Particle const &p2, Utils::Vector3d const &d, double dist2, double dist) const { - if ((p1.dipm() == 0.) || (p2.dipm() == 0.) || dist >= dp3m.params.r_cut || + if ((p1.dipm() == 0.) || (p2.dipm() == 0.) || dist >= dp3m_params.r_cut || dist <= 0.) return {}; auto const dip1 = p1.calc_dip(); auto const dip2 = p2.calc_dip(); - auto const alpsq = dp3m.params.alpha * dp3m.params.alpha; - auto const adist = dp3m.params.alpha * dist; + auto const alpsq = dp3m_params.alpha * dp3m_params.alpha; + auto const adist = dp3m_params.alpha * dist; #if USE_ERFC_APPROXIMATION auto const erfc_part_ri = Utils::AS_erfc_part(adist) / dist; @@ -242,11 +221,11 @@ struct DipolarP3M : public Dipoles::Actor { auto const mir = dip1 * d; auto const mjr = dip2 * d; - auto const coeff = 2. * dp3m.params.alpha * std::numbers::inv_sqrtpi; + auto const coeff = 2. * dp3m_params.alpha * std::numbers::inv_sqrtpi; auto const dist2i = 1. / dist2; auto const exp_adist2 = exp(-Utils::sqr(adist)); - auto const B_r = (dp3m.params.accuracy > 5e-06) + auto const B_r = (dp3m_params.accuracy > 5e-06) ? dist2i * (erfc_part_ri + coeff) * exp_adist2 : dist2i * (erfc(adist) / dist + coeff * exp_adist2); auto const C_r = (3. * B_r + 2. * alpsq * coeff * exp_adist2) * dist2i; @@ -270,8 +249,8 @@ struct DipolarP3M : public Dipoles::Actor { virtual void calc_influence_function_energy() = 0; /** Compute the dipolar surface terms */ - double calc_surface_term(bool force_flag, bool energy_flag, - ParticleRange const &particles); + virtual double calc_surface_term(bool force_flag, bool energy_flag, + ParticleRange const &particles) = 0; /** Checks for correctness of the k-space cutoff. */ void sanity_checks_boxl() const; @@ -279,7 +258,7 @@ struct DipolarP3M : public Dipoles::Actor { void sanity_checks_periodicity() const; void sanity_checks_cell_structure() const; - virtual void scaleby_box_l(); + virtual void scaleby_box_l() = 0; }; #endif // DP3M diff --git a/src/core/magnetostatics/dp3m.impl.hpp b/src/core/magnetostatics/dp3m.impl.hpp index 07874460e3..ee50a2bd9d 100644 --- a/src/core/magnetostatics/dp3m.impl.hpp +++ b/src/core/magnetostatics/dp3m.impl.hpp @@ -19,16 +19,6 @@ * along with this program. If not, see . */ -/** @file - * P3M algorithm for long-range magnetic dipole-dipole interaction. - * - * We use here a P3M (Particle-Particle Particle-Mesh) method based - * on the dipolar Ewald summation. Details of the used method can be - * found in @cite hockney88a and @cite deserno98a @cite deserno98b. - * - * Further reading: @cite cerda08d - */ - #pragma once #include "config/config.hpp" @@ -41,18 +31,19 @@ #include "p3m/data_struct.hpp" #include "p3m/interpolation.hpp" -#include "Particle.hpp" #include "ParticleRange.hpp" #include -#include #include +#include +#include #include +#include template -struct p3m_data_struct_dipoles : public p3m_data_struct_fft { - using p3m_data_struct_fft::p3m_data_struct_fft; +struct p3m_data_struct_dipoles : public p3m_data_struct { + using p3m_data_struct::p3m_data_struct; /** number of dipolar particles (only on head node). */ int sum_dip_part = 0; @@ -64,6 +55,8 @@ struct p3m_data_struct_dipoles : public p3m_data_struct_fft { /** cached k-space self-energy correction */ double energy_correction = 0.; + /** k-space scalar mesh for k-space calculations. */ + std::vector ks_scalar; p3m_interpolation_cache inter_weights; }; @@ -72,17 +65,34 @@ template struct DipolarP3MImpl : public DipolarP3M { ~DipolarP3MImpl() override = default; + /** @brief Dipolar P3M parameters. */ + p3m_data_struct_dipoles &dp3m; + +private: /** @brief Coulomb P3M meshes and FFT algorithm. */ std::unique_ptr> dp3m_impl; - // this member overshadows its own base class pointer in the parent class - p3m_data_struct_dipoles &dp3m; + int tune_timings; + bool tune_verbose; + bool m_is_tuned; - template +public: DipolarP3MImpl( std::unique_ptr> &&dp3m_handle, - Args &&...args) - : DipolarP3M(*dp3m_handle, std::forward(args)...), - dp3m_impl{std::move(dp3m_handle)}, dp3m{*dp3m_impl} {} + double prefactor, int tune_timings, bool tune_verbose) + : DipolarP3M(dp3m_handle->params), dp3m{*dp3m_handle}, + dp3m_impl{std::move(dp3m_handle)}, tune_timings{tune_timings}, + tune_verbose{tune_verbose} { + + if (tune_timings <= 0) { + throw std::domain_error("Parameter 'timings' must be > 0"); + } + if (dp3m.params.mesh != Utils::Vector3i::broadcast(dp3m.params.mesh[0])) { + throw std::domain_error("DipolarP3M requires a cubic mesh"); + } + m_is_tuned = not dp3m.params.tuning; + dp3m.params.tuning = false; + set_prefactor(prefactor); + } void init() override { if constexpr (Architecture == Arch::CPU) { @@ -92,9 +102,13 @@ struct DipolarP3MImpl : public DipolarP3M { void tune() override; void count_magnetic_particles() override; + [[nodiscard]] bool is_tuned() const noexcept override { return m_is_tuned; } [[nodiscard]] bool is_gpu() const noexcept override { return Architecture == Arch::GPU; } + [[nodiscard]] bool is_double_precision() const noexcept override { + return std::is_same_v; + } void on_activation() override { sanity_checks(); @@ -121,21 +135,22 @@ struct DipolarP3MImpl : public DipolarP3M { void calc_energy_correction() override; void calc_influence_function_force() override; void calc_influence_function_energy() override; + double calc_surface_term(bool force_flag, bool energy_flag, + ParticleRange const &particles) override; void init_cpu_kernels(); - void scaleby_box_l() override { - DipolarP3M::scaleby_box_l(); - dp3m.energy_correction = 0.; - } + void scaleby_box_l() override; }; template class FFTBackendImpl, class... Args> + template class FFTBackendImpl, + template class P3MFFTMeshImpl, class... Args> std::shared_ptr new_dp3m_handle(P3MParameters &&p3m, Args &&...args) { auto obj = std::make_shared>( std::make_unique>(std::move(p3m)), std::forward(args)...); - obj->dp3m.template make_fft_instance>(true); + obj->dp3m.template make_mesh_instance>(); + obj->dp3m.template make_fft_instance>(); return obj; } diff --git a/src/core/p3m/CMakeLists.txt b/src/core/p3m/CMakeLists.txt index 2a6dd407ba..a32b76fd5d 100644 --- a/src/core/p3m/CMakeLists.txt +++ b/src/core/p3m/CMakeLists.txt @@ -17,5 +17,6 @@ # along with this program. If not, see . # -target_sources(espresso_core PRIVATE common.cpp send_mesh.cpp - TuningAlgorithm.cpp FFTBackendLegacy.cpp) +target_sources( + espresso_core PRIVATE common.cpp send_mesh.cpp TuningAlgorithm.cpp + FFTBackendLegacy.cpp FFTBuffersLegacy.cpp) diff --git a/src/core/p3m/FFTBackendLegacy.cpp b/src/core/p3m/FFTBackendLegacy.cpp index c00f3440c8..b2cce8ea10 100644 --- a/src/core/p3m/FFTBackendLegacy.cpp +++ b/src/core/p3m/FFTBackendLegacy.cpp @@ -29,17 +29,11 @@ #include "fft/fft.hpp" -#include - -#include #include -#include -#include template -FFTBackendLegacy::FFTBackendLegacy( - p3m_data_struct_fft &obj, bool dipolar) - : FFTBackend(obj), dipolar{dipolar}, +FFTBackendLegacy::FFTBackendLegacy(P3MLocalMesh const &local_mesh) + : FFTBackend(local_mesh), fft{std::make_unique>( ::Communication::mpiCallbacksHandle()->share_mpi_env())} {} @@ -47,90 +41,20 @@ template FFTBackendLegacy::~FFTBackendLegacy() = default; template -void FFTBackendLegacy::update_mesh_data() { - auto const mesh_size_ptr = fft->get_mesh_size(); - auto const mesh_start_ptr = fft->get_mesh_start(); - for (auto i = 0u; i < 3u; ++i) { - mesh.size[i] = mesh_size_ptr[i]; - mesh.start[i] = mesh_start_ptr[i]; - } - mesh.stop = mesh.start + mesh.size; - mesh.ks_scalar = std::span(ks_mesh); - mesh.rs_scalar = std::span(rs_mesh); - for (auto i = 0u; i < 3u; ++i) { - mesh.rs_fields[i] = std::span(rs_mesh_fields[i]); - } -} - -template void FFTBackendLegacy::init_halo() { - mesh_comm.resize(::comm_cart, local_mesh); -} - -template void FFTBackendLegacy::init_fft() { - auto ca_mesh_size = fft->initialize_fft( +void FFTBackendLegacy::init(P3MParameters const ¶ms) { + ca_mesh_size = fft->initialize_fft( ::comm_cart, local_mesh.dim, local_mesh.margin, params.mesh, - params.mesh_off, mesh.ks_pnum, ::communicator.node_grid); - rs_mesh.resize(ca_mesh_size); - if (dipolar) { - ks_mesh.resize(ca_mesh_size); - } - for (auto &rs_mesh_field : rs_mesh_fields) { - rs_mesh_field.resize(ca_mesh_size); - } - update_mesh_data(); -} - -template -void FFTBackendLegacy::perform_vector_back_fft() { - for (auto &rs_mesh_field : rs_mesh_fields) { - fft->backward_fft(::comm_cart, rs_mesh_field.data(), - check_complex_residuals); - } -} - -template -void FFTBackendLegacy::perform_vector_halo_spread() { - std::array meshes = {{rs_mesh_fields[0u].data(), - rs_mesh_fields[1u].data(), - rs_mesh_fields[2u].data()}}; - mesh_comm.spread_grid(::comm_cart, meshes, local_mesh.dim); -} - -template -void FFTBackendLegacy::perform_scalar_halo_gather() { - mesh_comm.gather_grid(::comm_cart, rs_mesh.data(), local_mesh.dim); -} - -template -void FFTBackendLegacy::perform_scalar_fwd_fft() { - fft->forward_fft(::comm_cart, rs_mesh.data()); - update_mesh_data(); -} - -template -void FFTBackendLegacy::perform_vector_halo_gather() { - std::array meshes = {{rs_mesh_fields[0u].data(), - rs_mesh_fields[1u].data(), - rs_mesh_fields[2u].data()}}; - mesh_comm.gather_grid(::comm_cart, meshes, local_mesh.dim); -} - -template -void FFTBackendLegacy::perform_vector_fwd_fft() { - for (auto &rs_mesh_field : rs_mesh_fields) { - fft->forward_fft(::comm_cart, rs_mesh_field.data()); - } - update_mesh_data(); + params.mesh_off, ks_pnum, ::communicator.node_grid); } template -void FFTBackendLegacy::perform_scalar_back_fft() { - fft->backward_fft(::comm_cart, rs_mesh.data(), check_complex_residuals); +void FFTBackendLegacy::forward_fft(FloatType *rs_mesh) { + fft->forward_fft(::comm_cart, rs_mesh); } template -void FFTBackendLegacy::perform_scalar_halo_spread() { - mesh_comm.spread_grid(::comm_cart, rs_mesh.data(), local_mesh.dim); +void FFTBackendLegacy::backward_fft(FloatType *rs_mesh) { + fft->backward_fft(::comm_cart, rs_mesh, check_complex_residuals); } template class FFTBackendLegacy; diff --git a/src/core/p3m/FFTBackendLegacy.hpp b/src/core/p3m/FFTBackendLegacy.hpp index bf2dd92f7b..fdffe0c718 100644 --- a/src/core/p3m/FFTBackendLegacy.hpp +++ b/src/core/p3m/FFTBackendLegacy.hpp @@ -27,9 +27,6 @@ #include "common.hpp" #include "data_struct.hpp" -#include "send_mesh.hpp" - -#include "fft/vector.hpp" #include #include @@ -49,34 +46,26 @@ class FFTBackendLegacy : public FFTBackend { static_assert(std::is_same_v or std::is_same_v, "FFTW only implements float and double"); - bool dipolar; std::unique_ptr> fft; - /** @brief k-space mesh (local) for k-space calculations. */ - std::vector ks_mesh; - /** @brief real-space mesh (local) for CA/FFT. */ - fft::vector rs_mesh; - /** @brief real-space mesh (local) for the electric or dipolar field. */ - std::array, 3> rs_mesh_fields; - p3m_send_mesh mesh_comm; - using FFTBackend::params; - using FFTBackend::mesh; using FFTBackend::local_mesh; using FFTBackend::check_complex_residuals; + int ca_mesh_size = -1; + int ks_pnum = -1; public: - FFTBackendLegacy(p3m_data_struct_fft &obj, bool dipolar); + FFTBackendLegacy(P3MLocalMesh const &local_mesh); ~FFTBackendLegacy() override; - void init_fft() override; - void init_halo() override; - void perform_scalar_fwd_fft() override; - void perform_vector_fwd_fft() override; - void perform_scalar_back_fft() override; - void perform_vector_back_fft() override; - void perform_scalar_halo_gather() override; - void perform_vector_halo_gather() override; - void perform_scalar_halo_spread() override; - void perform_vector_halo_spread() override; - void update_mesh_data(); + void init(P3MParameters const ¶ms) override; + void forward_fft(FloatType *rs_mesh) override; + void backward_fft(FloatType *rs_mesh) override; + int get_ca_mesh_size() const noexcept override { return ca_mesh_size; } + int get_ks_pnum() const noexcept override { return ks_pnum; } + std::array const &get_mesh_size() const override { + return fft->get_mesh_size(); + } + std::array const &get_mesh_start() const override { + return fft->get_mesh_start(); + } /** * @brief Index helpers for reciprocal space. diff --git a/src/core/p3m/FFTBuffersLegacy.cpp b/src/core/p3m/FFTBuffersLegacy.cpp new file mode 100644 index 0000000000..608037f8e2 --- /dev/null +++ b/src/core/p3m/FFTBuffersLegacy.cpp @@ -0,0 +1,97 @@ +/* + * Copyright (C) 2010-2024 The ESPResSo project + * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010 + * Max-Planck-Institute for Polymer Research, Theory Group + * + * This file is part of ESPResSo. + * + * ESPResSo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * ESPResSo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include "config/config.hpp" + +#if defined(P3M) or defined(DP3M) + +#include "FFTBuffersLegacy.hpp" + +#include "communication.hpp" + +#include +#include + +template +FFTBuffersLegacy::~FFTBuffersLegacy() = default; + +template +void FFTBuffersLegacy::update_mesh_views( + P3MFFTMesh &out) { + out.rs_scalar = std::span(rs_mesh); + for (auto i = 0u; i < 3u; ++i) { + out.rs_fields[i] = std::span(rs_mesh_fields[i]); + } +} + +template void FFTBuffersLegacy::init_halo() { + mesh_comm.resize(::comm_cart, local_mesh); +} + +template +void FFTBuffersLegacy::init_meshes(int ca_mesh_size) { + rs_mesh.resize(ca_mesh_size); + for (auto &rs_mesh_field : rs_mesh_fields) { + rs_mesh_field.resize(ca_mesh_size); + } +} + +template +void FFTBuffersLegacy::perform_vector_halo_spread() { + std::array meshes = {{rs_mesh_fields[0u].data(), + rs_mesh_fields[1u].data(), + rs_mesh_fields[2u].data()}}; + mesh_comm.spread_grid(::comm_cart, meshes, local_mesh.dim); +} + +template +void FFTBuffersLegacy::perform_scalar_halo_gather() { + mesh_comm.gather_grid(::comm_cart, rs_mesh.data(), local_mesh.dim); +} + +template +void FFTBuffersLegacy::perform_vector_halo_gather() { + std::array meshes = {{rs_mesh_fields[0u].data(), + rs_mesh_fields[1u].data(), + rs_mesh_fields[2u].data()}}; + mesh_comm.gather_grid(::comm_cart, meshes, local_mesh.dim); +} + +template +void FFTBuffersLegacy::perform_scalar_halo_spread() { + mesh_comm.spread_grid(::comm_cart, rs_mesh.data(), local_mesh.dim); +} + +template +FloatType *FFTBuffersLegacy::get_scalar_mesh() { + return rs_mesh.data(); +} + +template +std::array FFTBuffersLegacy::get_vector_mesh() { + return {rs_mesh_fields[0u].data(), rs_mesh_fields[1u].data(), + rs_mesh_fields[2u].data()}; +} + +template class FFTBuffersLegacy; +template class FFTBuffersLegacy; + +#endif // defined(P3M) or defined(DP3M) diff --git a/src/core/p3m/FFTBuffersLegacy.hpp b/src/core/p3m/FFTBuffersLegacy.hpp new file mode 100644 index 0000000000..130515cff1 --- /dev/null +++ b/src/core/p3m/FFTBuffersLegacy.hpp @@ -0,0 +1,64 @@ +/* + * Copyright (C) 2010-2024 The ESPResSo project + * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010 + * Max-Planck-Institute for Polymer Research, Theory Group + * + * This file is part of ESPResSo. + * + * ESPResSo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * ESPResSo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#pragma once + +#include "config/config.hpp" + +#if defined(P3M) or defined(DP3M) + +#include "common.hpp" +#include "data_struct.hpp" +#include "send_mesh.hpp" + +#include "fft/vector.hpp" + +#include +#include + +/** @brief Buffers for @ref FFTBackendLegacy. */ +template +class FFTBuffersLegacy : public FFTBuffers { + static_assert(std::is_same_v or + std::is_same_v, + "FFTW only implements float and double"); + using FFTBuffers::FFTBuffers; + using FFTBuffers::local_mesh; + p3m_send_mesh mesh_comm; + /** @brief real-space mesh (local) for CA/FFT. */ + fft::vector rs_mesh; + /** @brief real-space mesh (local) for the electric or dipolar field. */ + std::array, 3u> rs_mesh_fields; + +public: + ~FFTBuffersLegacy() override; + void init_halo() override; + void init_meshes(int ca_mesh_size) override; + void perform_scalar_halo_gather() override; + void perform_vector_halo_gather() override; + void perform_scalar_halo_spread() override; + void perform_vector_halo_spread() override; + void update_mesh_views(P3MFFTMesh &out) override; + FloatType *get_scalar_mesh() override; + std::array get_vector_mesh() override; +}; + +#endif // defined(P3M) or defined(DP3M) diff --git a/src/core/p3m/common.hpp b/src/core/p3m/common.hpp index 4a4b88b22f..3cb9c1cb45 100644 --- a/src/core/p3m/common.hpp +++ b/src/core/p3m/common.hpp @@ -219,11 +219,9 @@ struct P3MLocalMesh { /** @brief Local mesh FFT buffers. */ template struct P3MFFTMesh { - /** @brief k-space scalar mesh for k-space calculations. */ - std::span ks_scalar; /** @brief real-space scalar mesh for charge assignment and FFT. */ std::span rs_scalar; - /** @brief real-space 3D meshes for the electric or dipolar field. */ + /** @brief real-space vector meshes for the electric or dipolar field. */ std::array, 3> rs_fields; /** @brief Indices of the lower left corner of the local mesh grid. */ diff --git a/src/core/p3m/data_struct.hpp b/src/core/p3m/data_struct.hpp index 7669a5b359..9bd81ad7c2 100644 --- a/src/core/p3m/data_struct.hpp +++ b/src/core/p3m/data_struct.hpp @@ -35,13 +35,16 @@ #include template class FFTBackend; +template class FFTBuffers; /** * @brief Base class for the electrostatics and magnetostatics P3M algorithms. * Contains a handle to the FFT backend, information about the local mesh, * the differential operator, and various buffers. */ -struct p3m_data_struct { +template struct p3m_data_struct { + using value_type = FloatType; + explicit p3m_data_struct(P3MParameters &¶meters) : params{std::move(parameters)} {} @@ -49,6 +52,8 @@ struct p3m_data_struct { P3MParameters params; /** @brief Local mesh properties. */ P3MLocalMesh local_mesh; + /** @brief Local mesh FFT buffers. */ + P3MFFTMesh mesh; /** * @brief Spatial differential operator in k-space. @@ -63,57 +68,81 @@ struct p3m_data_struct { void calc_differential_operator() { d_op = detail::calc_meshift(params.mesh, true); } -}; - -template -struct p3m_data_struct_fft : public p3m_data_struct { - using p3m_data_struct::p3m_data_struct; - using value_type = FloatType; - /** @brief Local mesh FFT buffers. */ - P3MFFTMesh mesh; - /** Force optimised influence function (k-space) */ + /** @brief Force optimised influence function (k-space) */ std::vector g_force; - /** Energy optimised influence function (k-space) */ + /** @brief Energy optimised influence function (k-space) */ std::vector g_energy; - /** FFT backend. */ + /** @brief FFT algorithm. */ std::unique_ptr> fft; + /** @brief FFT buffers. */ + std::unique_ptr> fft_buffers; + + void init(); + + void update_mesh_views() { + auto const mesh_size_ptr = fft->get_mesh_size(); + auto const mesh_start_ptr = fft->get_mesh_start(); + for (auto i = 0u; i < 3u; ++i) { + mesh.size[i] = mesh_size_ptr[i]; + mesh.start[i] = mesh_start_ptr[i]; + } + mesh.stop = mesh.start + mesh.size; + fft_buffers->update_mesh_views(mesh); + } template void make_fft_instance(Args... args) { assert(fft == nullptr); - fft = std::make_unique(*this, args...); + fft = std::make_unique(std::as_const(local_mesh), args...); + } + + template void make_mesh_instance(Args... args) { + assert(fft_buffers == nullptr); + fft_buffers = std::make_unique(std::as_const(local_mesh), args...); } }; /** * @brief API for the FFT backend of the P3M algorithm. - * Any FFT backend must implement this interface. - * The backend can read some members of @ref p3m_data_struct - * but can only modify the FFT buffers in @ref P3MFFTMesh. */ template class FFTBackend { protected: - P3MParameters const ¶ms; P3MLocalMesh const &local_mesh; - P3MFFTMesh &mesh; public: bool check_complex_residuals = false; - explicit FFTBackend(p3m_data_struct_fft &obj) - : params{obj.params}, local_mesh{obj.local_mesh}, mesh{obj.mesh} {} + explicit FFTBackend(P3MLocalMesh const &local_mesh) + : local_mesh{local_mesh} {} virtual ~FFTBackend() = default; - /** @brief Initialize the FFT plans and buffers. */ - virtual void init_fft() = 0; - /** @brief Initialize the halo buffers. */ - virtual void init_halo() = 0; + virtual void init(P3MParameters const ¶ms) = 0; + virtual int get_ca_mesh_size() const noexcept = 0; + virtual int get_ks_pnum() const noexcept = 0; /** @brief Carry out the forward FFT of the scalar mesh. */ - virtual void perform_scalar_fwd_fft() = 0; - /** @brief Carry out the forward FFT of the vector meshes. */ - virtual void perform_vector_fwd_fft() = 0; + virtual void forward_fft(FloatType *rs_mesh) = 0; /** @brief Carry out the backward FFT of the scalar mesh. */ - virtual void perform_scalar_back_fft() = 0; - /** @brief Carry out the backward FFT of the vector meshes. */ - virtual void perform_vector_back_fft() = 0; + virtual void backward_fft(FloatType *rs_mesh) = 0; + /** @brief Get indices of the k-space data layout. */ + virtual std::tuple get_permutations() const = 0; + virtual std::array const &get_mesh_size() const = 0; + virtual std::array const &get_mesh_start() const = 0; +}; + +/** + * @brief API for the FFT mesh buffers. + */ +template class FFTBuffers { +protected: + P3MLocalMesh const &local_mesh; + +public: + bool check_complex_residuals = false; + explicit FFTBuffers(P3MLocalMesh const &local_mesh) + : local_mesh{local_mesh} {} + virtual ~FFTBuffers() = default; + /** @brief Initialize the meshes. */ + virtual void init_meshes(int ca_mesh_size) = 0; + /** @brief Initialize the halo buffers. */ + virtual void init_halo() = 0; /** @brief Update scalar mesh halo with data from neighbors (accumulation). */ virtual void perform_scalar_halo_gather() = 0; /** @brief Update vector mesh halo with data from neighbors (accumulation). */ @@ -122,8 +151,21 @@ template class FFTBackend { virtual void perform_scalar_halo_spread() = 0; /** @brief Update vector mesh halo of all neighbors. */ virtual void perform_vector_halo_spread() = 0; - /** @brief Get indices of the k-space data layout. */ - virtual std::tuple get_permutations() const = 0; + /** + * @brief Get pointer to scalar mesh begin. + * Should only be used by @ref FFTBackend. + */ + virtual FloatType *get_scalar_mesh() = 0; + /** + * @brief Get pointer to vector mesh begin. + * Should only be used by @ref FFTBackend. + */ + virtual std::array get_vector_mesh() = 0; + /** + * @brief Update the scalar and vector mesh views in @ref P3MFFTMesh + * to point to the new underlying data structures. + */ + virtual void update_mesh_views(P3MFFTMesh &out) = 0; }; #endif // defined(P3M) or defined(DP3M) diff --git a/src/core/p3m/packing.hpp b/src/core/p3m/packing.hpp new file mode 100644 index 0000000000..19f131a58c --- /dev/null +++ b/src/core/p3m/packing.hpp @@ -0,0 +1,106 @@ +/* + * Copyright (C) 2010-2024 The ESPResSo project + * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010 + * Max-Planck-Institute for Polymer Research, Theory Group + * + * This file is part of ESPResSo. + * + * ESPResSo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * ESPResSo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#pragma once + +#include +#include + +/** + * @brief Pack a 3D-block of size @c size starting at @c start of an input + * 3D-grid @c in with dimension @c dim into an output 3D-block @c out. + * + * The block is stored in 'row-major-order' or 'C-order', that means + * the first index is changing slowest when running through the linear array. + * The element (@c i0 (slow), @c i1 (mid), @c i2 (fast)) has the linear + * index li = i2 + size[2] * (i1 + (size[1] * i0)). + * + * @param[in] in pointer to input 3D-grid. + * @param[out] out pointer to output 3D-block. + * @param[in] start start index of the block in the input 3D-grid. + * @param[in] size size of the output 3D-block. + * @param[in] dim size of the input 3D-grid. + * @param[in] element size of a grid element (e.g. 1 for Real, 2 for Complex). + */ +template +void fft_pack_block(FloatType const *const in, FloatType *const out, + int const *start, int const *size, int const *dim, + int element) { + + auto const copy_size = + static_cast(element * size[2]) * sizeof(FloatType); + /* offsets for indices in input grid */ + auto const m_in_offset = element * dim[2]; + auto const s_in_offset = element * (dim[2] * (dim[1] - size[1])); + /* offsets for indices in output grid */ + auto const m_out_offset = element * size[2]; + /* linear index of input grid, linear index of output grid */ + int li_in = element * (start[2] + dim[2] * (start[1] + dim[1] * start[0])); + int li_out = 0; + + for (int s = 0; s < size[0]; s++) { + for (int m = 0; m < size[1]; m++) { + memmove(&(out[li_out]), &(in[li_in]), copy_size); + li_in += m_in_offset; + li_out += m_out_offset; + } + li_in += s_in_offset; + } +} + +/** + * @brief Unpack a 3D-block @c in of size @c size into an output + * 3D-grid @c out of size @c dim starting at position @c start. + * + * See also @ref fft_pack_block. + * + * @param[in] in pointer to input 3D-block. + * @param[out] out pointer to output 3D-grid. + * @param[in] start start index of the block in the input 3D-grid. + * @param[in] size size of the input 3D-block. + * @param[in] dim size of the output 3D-grid. + * @param[in] element size of a grid element (e.g. 1 for Real, 2 for Complex). + */ +template +void fft_unpack_block(FloatType const *const in, FloatType *const out, + int const *start, int const *size, int const *dim, + int element) { + + auto const copy_size = + static_cast(element * size[2]) * sizeof(FloatType); + /* offsets for indices in output grid */ + auto const m_out_offset = element * dim[2]; + auto const s_out_offset = element * (dim[2] * (dim[1] - size[1])); + /* offset for indices in input grid */ + auto const m_in_offset = element * size[2]; + /* linear index of in grid, linear index of out grid */ + int li_in = 0; + int li_out = element * (start[2] + dim[2] * (start[1] + dim[1] * start[0])); + + for (int s = 0; s < size[0]; s++) { + for (int m = 0; m < size[1]; m++) { + memmove(&(out[li_out]), &(in[li_in]), copy_size); + li_in += m_in_offset; + li_out += m_out_offset; + } + li_out += s_out_offset; + } +} diff --git a/src/core/p3m/send_mesh.cpp b/src/core/p3m/send_mesh.cpp index 4707936196..ee711eae3e 100644 --- a/src/core/p3m/send_mesh.cpp +++ b/src/core/p3m/send_mesh.cpp @@ -25,6 +25,7 @@ #include "fft/fft.hpp" #include "p3m/common.hpp" +#include "p3m/packing.hpp" #include "p3m/send_mesh.hpp" #include @@ -174,8 +175,8 @@ void p3m_send_mesh::gather_grid(boost::mpi::communicator const &comm, /* pack send block */ if (s_size[s_dir] > 0) { for (std::size_t i = 0; i < meshes.size(); i++) { - fft::fft_pack_block(meshes[i], send_grid.data() + i * s_size[s_dir], - s_ld[s_dir], s_dim[s_dir], dim.data(), 1); + fft_pack_block(meshes[i], send_grid.data() + i * s_size[s_dir], + s_ld[s_dir], s_dim[s_dir], dim.data(), 1); } } @@ -214,8 +215,8 @@ void p3m_send_mesh::spread_grid(boost::mpi::communicator const &comm, /* pack send block */ if (r_size[r_dir] > 0) { for (std::size_t i = 0; i < meshes.size(); i++) { - fft::fft_pack_block(meshes[i], send_grid.data() + i * r_size[r_dir], - r_ld[r_dir], r_dim[r_dir], dim.data(), 1); + fft_pack_block(meshes[i], send_grid.data() + i * r_size[r_dir], + r_ld[r_dir], r_dim[r_dir], dim.data(), 1); } } /* communication */ @@ -231,8 +232,8 @@ void p3m_send_mesh::spread_grid(boost::mpi::communicator const &comm, /* unpack recv block */ if (s_size[s_dir] > 0) { for (std::size_t i = 0; i < meshes.size(); i++) { - fft::fft_unpack_block(recv_grid.data() + i * s_size[s_dir], meshes[i], - s_ld[s_dir], s_dim[s_dir], dim.data(), 1); + fft_unpack_block(recv_grid.data() + i * s_size[s_dir], meshes[i], + s_ld[s_dir], s_dim[s_dir], dim.data(), 1); } } } diff --git a/src/core/unit_tests/EspressoSystemStandAlone_test.cpp b/src/core/unit_tests/EspressoSystemStandAlone_test.cpp index c2f4a9d125..58a9bba30e 100644 --- a/src/core/unit_tests/EspressoSystemStandAlone_test.cpp +++ b/src/core/unit_tests/EspressoSystemStandAlone_test.cpp @@ -53,6 +53,7 @@ namespace utf = boost::unit_test; #include "observables/ParticleVelocities.hpp" #include "observables/PidObservable.hpp" #include "p3m/FFTBackendLegacy.hpp" +#include "p3m/FFTBuffersLegacy.hpp" #include "particle_node.hpp" #include "system/System.hpp" @@ -234,8 +235,9 @@ BOOST_FIXTURE_TEST_CASE(espresso_system_stand_alone, ParticleFactory) { 5, 0.615, 1e-3}; - auto solver = new_p3m_handle( - std::move(p3m), prefactor, 1, false, true); + auto solver = + new_p3m_handle( + std::move(p3m), prefactor, 1, false, true); add_actor(comm, espresso::system, system.coulomb.impl->solver, solver, [&system]() { system.on_coulomb_change(); }); BOOST_CHECK(not solver->is_gpu()); @@ -302,8 +304,9 @@ BOOST_FIXTURE_TEST_CASE(espresso_system_stand_alone, ParticleFactory) { 5, 0.615, 1e-3}; - auto solver = new_dp3m_handle( - std::move(p3m), prefactor, 1, false); + auto solver = + new_dp3m_handle( + std::move(p3m), prefactor, 1, false); add_actor(comm, espresso::system, system.dipoles.impl->solver, solver, [&system]() { system.on_dipoles_change(); }); BOOST_CHECK(not solver->is_gpu()); diff --git a/src/core/unit_tests/fft_test.cpp b/src/core/unit_tests/fft_test.cpp index bf8100f887..9d5a3bf98e 100644 --- a/src/core/unit_tests/fft_test.cpp +++ b/src/core/unit_tests/fft_test.cpp @@ -20,15 +20,14 @@ #define BOOST_TEST_MODULE "FFT utility functions" #define BOOST_TEST_DYN_LINK -#include "config/config.hpp" - -#if defined(P3M) or defined(DP3M) - #include +#include "config/config.hpp" + #include "fft/fft.hpp" #include "fft/vector.hpp" #include "p3m/for_each_3d.hpp" +#include "p3m/packing.hpp" #include @@ -41,6 +40,8 @@ #include #include +#if defined(P3M) or defined(DP3M) + BOOST_AUTO_TEST_CASE(fft_find_comm_groups_mismatch) { using fft::find_comm_groups; int my_pos[3] = {0}; @@ -170,6 +171,8 @@ BOOST_AUTO_TEST_CASE(fft_plan_without_mpi) { delete plan; } +#endif // defined(P3M) or defined(DP3M) + BOOST_AUTO_TEST_CASE(for_each_3d_test) { auto const m_start = Utils::Vector3i{{0, -1, 3}}; auto const m_stop = Utils::Vector3i{{2, 2, 5}}; @@ -207,6 +210,90 @@ BOOST_AUTO_TEST_CASE(for_each_3d_test) { } } -#else // defined(P3M) or defined(DP3M) -int main(int argc, char **argv) {} -#endif // defined(P3M) or defined(DP3M) +template +void run_on_grid(T (&grid)[dim1][dim2][dim3], F fun) { + for (auto i = 0; i < dim1; ++i) { + for (auto j = 0; j < dim2; ++j) { + for (auto k = 0; k < dim3; ++k) { + fun(grid, i, j, k); + } + } + } +} + +BOOST_AUTO_TEST_CASE(packing_unpacking) { + // packing into block + { + int const dim[3] = {4, 6, 8}; + int mesh[4][6][8] = {}; + run_on_grid(mesh, [](auto &mesh, int i, int j, int k) { + mesh[i][j][k] = (i + 1) * (j + 1) * (k + 1); + }); + int const start[3] = {1, 2, 3}; + int const size[3] = {2, 3, 4}; + int block[2][3][4] = {}; + run_on_grid(block, [](auto &gd, int i, int j, int k) { gd[i][j][k] = -1; }); + fft_pack_block(&(mesh[0][0][0]), &(block[0][0][0]), start, size, dim, 1); + run_on_grid(block, [&start](auto &block, int i, int j, int k) { + auto const ref = + (i + 1 + start[0u]) * (j + 1 + start[1u]) * (k + 1 + start[2u]); + BOOST_REQUIRE_EQUAL(block[i][j][k], ref); + }); + } + + // packing into self + { + int const dim[3] = {1, 1, 8}; + int mesh[1][1][8] = {}; + run_on_grid(mesh, [](auto &gd, int i, int j, int k) { gd[i][j][k] = k; }); + int const start[3] = {0, 0, 0}; + int const size[3] = {1, 1, 4}; + fft_pack_block(&(mesh[0][0][0]), &(mesh[0][0][3]), start, size, dim, 1); + run_on_grid(mesh, [&size](auto &mesh, int i, int j, int k) { + auto ref = k; + if (k >= 3 and k < 3 + size[2u]) { + ref = k - 3; + } + BOOST_REQUIRE_EQUAL(mesh[i][j][k], ref); + }); + } + + // unpacking into grid + { + int const dim[3] = {4, 6, 8}; + int mesh[4][6][8] = {}; + run_on_grid(mesh, [](auto &mesh, int i, int j, int k) { + mesh[i][j][k] = (i + 1) * (j + 1) * (k + 1); + }); + int const start[3] = {1, 2, 3}; + int const size[3] = {2, 3, 4}; + int block[2][3][4] = {}; + run_on_grid(block, [](auto &block, int i, int j, int k) { + block[i][j][k] = -(i + 1) * (j + 1) * (k + 1); + }); + fft_unpack_block(&(block[0][0][0]), &(mesh[0][0][0]), start, size, dim, 1); + run_on_grid(mesh, [&start, &size](auto &mesh, int i, int j, int k) { + auto ref = (i + 1) * (j + 1) * (k + 1); + if ((i >= start[0] and i < (start[0] + size[0])) and + (j >= start[1] and j < (start[1] + size[1])) and + (k >= start[2] and k < (start[2] + size[2]))) { + ref = -(i + 1 - start[0]) * (j + 1 - start[1]) * (k + 1 - start[2]); + } + BOOST_REQUIRE_EQUAL(mesh[i][j][k], ref); + }); + } + + // unpacking into self + { + int const dim[3] = {1, 1, 8}; + int mesh[1][1][8] = {}; + run_on_grid(mesh, [](auto &gd, int i, int j, int k) { gd[i][j][k] = k; }); + int const start[3] = {0, 0, 0}; + int const size[3] = {1, 1, 4}; + fft_unpack_block(&(mesh[0][0][3]), &(mesh[0][0][0]), start, size, dim, 1); + run_on_grid(mesh, [&size](auto &mesh, int i, int j, int k) { + auto const ref = (k < size[2u]) ? k + 3 : k; + BOOST_REQUIRE_EQUAL(mesh[i][j][k], ref); + }); + } +} diff --git a/src/script_interface/electrostatics/CoulombP3M.hpp b/src/script_interface/electrostatics/CoulombP3M.hpp index 78d1e4d8e2..5e16440de9 100644 --- a/src/script_interface/electrostatics/CoulombP3M.hpp +++ b/src/script_interface/electrostatics/CoulombP3M.hpp @@ -28,18 +28,28 @@ #include "core/electrostatics/p3m.hpp" #include "core/electrostatics/p3m.impl.hpp" #include "core/p3m/FFTBackendLegacy.hpp" +#include "core/p3m/FFTBuffersLegacy.hpp" #include "script_interface/get_value.hpp" #include +#include #include +#include + +#ifdef FFTW3_H +#error "The FFTW3 library shouldn't be visible in this translation unit" +#endif namespace ScriptInterface { namespace Coulomb { template class CoulombP3M : public Actor, ::CoulombP3M> { + int m_tune_timings; bool m_tune; + bool m_tune_verbose; + bool m_check_complex_residuals; bool m_single_precision; public: @@ -56,44 +66,48 @@ class CoulombP3M : public Actor, ::CoulombP3M> { CoulombP3M() { add_parameters({ {"single_precision", AutoParameter::read_only, - [this]() { return m_single_precision; }}, + [this]() { return not actor()->is_double_precision(); }}, {"alpha_L", AutoParameter::read_only, - [this]() { return actor()->p3m.params.alpha_L; }}, + [this]() { return actor()->p3m_params.alpha_L; }}, {"r_cut_iL", AutoParameter::read_only, - [this]() { return actor()->p3m.params.r_cut_iL; }}, + [this]() { return actor()->p3m_params.r_cut_iL; }}, {"mesh", AutoParameter::read_only, - [this]() { return actor()->p3m.params.mesh; }}, + [this]() { return actor()->p3m_params.mesh; }}, {"mesh_off", AutoParameter::read_only, - [this]() { return actor()->p3m.params.mesh_off; }}, + [this]() { return actor()->p3m_params.mesh_off; }}, {"cao", AutoParameter::read_only, - [this]() { return actor()->p3m.params.cao; }}, + [this]() { return actor()->p3m_params.cao; }}, {"accuracy", AutoParameter::read_only, - [this]() { return actor()->p3m.params.accuracy; }}, + [this]() { return actor()->p3m_params.accuracy; }}, {"epsilon", AutoParameter::read_only, - [this]() { return actor()->p3m.params.epsilon; }}, + [this]() { return actor()->p3m_params.epsilon; }}, {"a", AutoParameter::read_only, - [this]() { return actor()->p3m.params.a; }}, + [this]() { return actor()->p3m_params.a; }}, {"alpha", AutoParameter::read_only, - [this]() { return actor()->p3m.params.alpha; }}, + [this]() { return actor()->p3m_params.alpha; }}, {"r_cut", AutoParameter::read_only, - [this]() { return actor()->p3m.params.r_cut; }}, + [this]() { return actor()->p3m_params.r_cut; }}, {"is_tuned", AutoParameter::read_only, [this]() { return actor()->is_tuned(); }}, {"verbose", AutoParameter::read_only, - [this]() { return actor()->tune_verbose; }}, + [this]() { return m_tune_verbose; }}, {"timings", AutoParameter::read_only, - [this]() { return actor()->tune_timings; }}, + [this]() { return m_tune_timings; }}, {"tune", AutoParameter::read_only, [this]() { return m_tune; }}, {"check_complex_residuals", AutoParameter::read_only, - [this]() { return actor()->check_complex_residuals; }}, + [this]() { return m_check_complex_residuals; }}, }); } void do_construct(VariantMap const ¶ms) override { m_tune = get_value(params, "tune"); - m_single_precision = get_value(params, "single_precision"); + m_tune_timings = get_value(params, "timings"); + m_tune_verbose = get_value(params, "verbose"); + m_check_complex_residuals = + get_value(params, "check_complex_residuals"); + auto const single_precision = get_value(params, "single_precision"); context()->parallel_try_catch([&]() { - if (Architecture == Arch::GPU and not m_single_precision) { + if (Architecture == Arch::GPU and not single_precision) { throw std::invalid_argument( "P3M GPU only implemented in single-precision mode"); } @@ -105,24 +119,25 @@ class CoulombP3M : public Actor, ::CoulombP3M> { get_value(params, "cao"), get_value(params, "alpha"), get_value(params, "accuracy")}; - make_handle(m_single_precision, std::move(p3m), - get_value(params, "prefactor"), - get_value(params, "timings"), - get_value(params, "verbose"), - get_value(params, "check_complex_residuals")); + make_handle(single_precision, std::move(p3m), + get_value(params, "prefactor"), m_tune_timings, + m_tune_verbose, m_check_complex_residuals); }); set_charge_neutrality_tolerance(params); } private: + template + void make_handle_impl(Args &&...args) { + m_actor = new_p3m_handle(std::forward(args)...); + } template void make_handle(bool single_precision, Args &&...args) { if (single_precision) { - m_actor = new_p3m_handle( - std::forward(args)...); + make_handle_impl(std::forward(args)...); } else { - m_actor = new_p3m_handle( - std::forward(args)...); + make_handle_impl(std::forward(args)...); } } }; diff --git a/src/script_interface/magnetostatics/DipolarP3M.hpp b/src/script_interface/magnetostatics/DipolarP3M.hpp index 20dff5968e..67a2c2205d 100644 --- a/src/script_interface/magnetostatics/DipolarP3M.hpp +++ b/src/script_interface/magnetostatics/DipolarP3M.hpp @@ -28,18 +28,27 @@ #include "core/magnetostatics/dp3m.hpp" #include "core/magnetostatics/dp3m.impl.hpp" #include "core/p3m/FFTBackendLegacy.hpp" +#include "core/p3m/FFTBuffersLegacy.hpp" #include "script_interface/get_value.hpp" #include +#include +#include +#include + +#ifdef FFTW3_H +#error "The FFTW3 library shouldn't be visible in this translation unit" +#endif namespace ScriptInterface { namespace Dipoles { template class DipolarP3M : public Actor, ::DipolarP3M> { + int m_tune_timings; bool m_tune; - bool m_single_precision; + bool m_tune_verbose; public: using Base = Actor, ::DipolarP3M>; @@ -54,40 +63,42 @@ class DipolarP3M : public Actor, ::DipolarP3M> { DipolarP3M() { add_parameters({ {"single_precision", AutoParameter::read_only, - [this]() { return m_single_precision; }}, + [this]() { return not actor()->is_double_precision(); }}, {"alpha_L", AutoParameter::read_only, - [this]() { return actor()->dp3m.params.alpha_L; }}, + [this]() { return actor()->dp3m_params.alpha_L; }}, {"r_cut_iL", AutoParameter::read_only, - [this]() { return actor()->dp3m.params.r_cut_iL; }}, + [this]() { return actor()->dp3m_params.r_cut_iL; }}, {"mesh", AutoParameter::read_only, - [this]() { return actor()->dp3m.params.mesh; }}, + [this]() { return actor()->dp3m_params.mesh; }}, {"mesh_off", AutoParameter::read_only, - [this]() { return actor()->dp3m.params.mesh_off; }}, + [this]() { return actor()->dp3m_params.mesh_off; }}, {"cao", AutoParameter::read_only, - [this]() { return actor()->dp3m.params.cao; }}, + [this]() { return actor()->dp3m_params.cao; }}, {"accuracy", AutoParameter::read_only, - [this]() { return actor()->dp3m.params.accuracy; }}, + [this]() { return actor()->dp3m_params.accuracy; }}, {"epsilon", AutoParameter::read_only, - [this]() { return actor()->dp3m.params.epsilon; }}, + [this]() { return actor()->dp3m_params.epsilon; }}, {"a", AutoParameter::read_only, - [this]() { return actor()->dp3m.params.a; }}, + [this]() { return actor()->dp3m_params.a; }}, {"alpha", AutoParameter::read_only, - [this]() { return actor()->dp3m.params.alpha; }}, + [this]() { return actor()->dp3m_params.alpha; }}, {"r_cut", AutoParameter::read_only, - [this]() { return actor()->dp3m.params.r_cut; }}, + [this]() { return actor()->dp3m_params.r_cut; }}, {"is_tuned", AutoParameter::read_only, [this]() { return actor()->is_tuned(); }}, {"verbose", AutoParameter::read_only, - [this]() { return actor()->tune_verbose; }}, + [this]() { return m_tune_verbose; }}, {"timings", AutoParameter::read_only, - [this]() { return actor()->tune_timings; }}, + [this]() { return m_tune_timings; }}, {"tune", AutoParameter::read_only, [this]() { return m_tune; }}, }); } void do_construct(VariantMap const ¶ms) override { m_tune = get_value(params, "tune"); - m_single_precision = get_value(params, "single_precision"); + m_tune_timings = get_value(params, "timings"); + m_tune_verbose = get_value(params, "verbose"); + auto const single_precision = get_value(params, "single_precision"); static_assert(Architecture == Arch::CPU, "GPU not implemented"); context()->parallel_try_catch([&]() { auto p3m = P3MParameters{!get_value_or(params, "is_tuned", !m_tune), @@ -98,22 +109,24 @@ class DipolarP3M : public Actor, ::DipolarP3M> { get_value(params, "cao"), get_value(params, "alpha"), get_value(params, "accuracy")}; - make_handle(m_single_precision, std::move(p3m), - get_value(params, "prefactor"), - get_value(params, "timings"), - get_value(params, "verbose")); + make_handle(single_precision, std::move(p3m), + get_value(params, "prefactor"), m_tune_timings, + m_tune_verbose); }); } private: + template + void make_handle_impl(Args &&...args) { + m_actor = new_dp3m_handle(std::forward(args)...); + } template void make_handle(bool single_precision, Args &&...args) { if (single_precision) { - m_actor = new_dp3m_handle( - std::forward(args)...); + make_handle_impl(std::forward(args)...); } else { - m_actor = new_dp3m_handle( - std::forward(args)...); + make_handle_impl(std::forward(args)...); } } };