diff --git a/src/Evolution/DgSubcell/Matrices.cpp b/src/Evolution/DgSubcell/Matrices.cpp index e9c41879bf46..432db30c01f1 100644 --- a/src/Evolution/DgSubcell/Matrices.cpp +++ b/src/Evolution/DgSubcell/Matrices.cpp @@ -37,118 +37,33 @@ const Matrix& projection_matrix( subcell_quadrature == Spectral::Quadrature::CellCentered, "subcell_quadrature option in projection_matrix should be " "FaceCentered or CellCentered"); - switch (dg_mesh.quadrature(0)) { - case Spectral::Quadrature::GaussLobatto: { - switch (subcell_quadrature) { - case Spectral::Quadrature::CellCentered: { - static const auto cache_gl = make_static_cache< - CacheRange, - Spectral::maximum_number_of_points< - Spectral::Basis::Legendre> + - 1>, - CacheRange, - Spectral::maximum_number_of_points< - Spectral::Basis::FiniteDifference> + - 1>>([](const size_t local_num_dg_points, - const size_t local_num_fd_points) { - return Spectral::interpolation_matrix< - Spectral::Basis::Legendre, Spectral::Quadrature::GaussLobatto>( - local_num_dg_points, - Spectral::collocation_points< - Spectral::Basis::FiniteDifference, - Spectral::Quadrature::CellCentered>(local_num_fd_points)); - }); - return cache_gl(dg_mesh.extents(0), subcell_extents); - } - case Spectral::Quadrature::FaceCentered: { - static const auto cache_gl = make_static_cache< - CacheRange, - Spectral::maximum_number_of_points< - Spectral::Basis::Legendre> + - 1>, - CacheRange, - Spectral::maximum_number_of_points< - Spectral::Basis::FiniteDifference> + - 1>>([](const size_t local_num_dg_points, - const size_t local_num_fd_points) { - return Spectral::interpolation_matrix< - Spectral::Basis::Legendre, Spectral::Quadrature::GaussLobatto>( - local_num_dg_points, - Spectral::collocation_points< - Spectral::Basis::FiniteDifference, - Spectral::Quadrature::FaceCentered>(local_num_fd_points)); - }); - return cache_gl(dg_mesh.extents(0), subcell_extents); - } - default: - ERROR("Unsupported quadrature type in FD subcell projection matrix"); - } - } - case Spectral::Quadrature::Gauss: { - switch (subcell_quadrature) { - case Spectral::Quadrature::CellCentered: { - static const auto cache_g = make_static_cache< - CacheRange< - Spectral::minimum_number_of_points< - Spectral::Basis::Legendre, Spectral::Quadrature::Gauss>, - Spectral::maximum_number_of_points< - Spectral::Basis::Legendre> + - 1>, - CacheRange, - Spectral::maximum_number_of_points< - Spectral::Basis::FiniteDifference> + - 1>>([](const size_t local_num_dg_points, - const size_t local_num_fd_points) { - return Spectral::interpolation_matrix( - local_num_dg_points, - Spectral::collocation_points< - Spectral::Basis::FiniteDifference, - Spectral::Quadrature::CellCentered>(local_num_fd_points)); - }); - return cache_g(dg_mesh.extents(0), subcell_extents); - } - case Spectral::Quadrature::FaceCentered: { - static const auto cache_g = make_static_cache< - CacheRange< - Spectral::minimum_number_of_points< - Spectral::Basis::Legendre, Spectral::Quadrature::Gauss>, - Spectral::maximum_number_of_points< - Spectral::Basis::Legendre> + - 1>, - CacheRange, - Spectral::maximum_number_of_points< - Spectral::Basis::FiniteDifference> + - 1>>([](const size_t local_num_dg_points, - const size_t local_num_fd_points) { - return Spectral::interpolation_matrix( - local_num_dg_points, - Spectral::collocation_points< - Spectral::Basis::FiniteDifference, - Spectral::Quadrature::FaceCentered>(local_num_fd_points)); - }); - return cache_g(dg_mesh.extents(0), subcell_extents); - } - default: - ERROR("Unsupported quadrature type in FD subcell projection matrix"); - } - } - default: - ERROR("Unsupported quadrature type in FD subcell projection matrix"); - }; + static const auto cache = make_static_cache< + CacheRange< + Spectral::minimum_number_of_points< + Spectral::Basis::Legendre, Spectral::Quadrature::GaussLobatto>, + Spectral::maximum_number_of_points + 1>, + CacheRange, + Spectral::maximum_number_of_points< + Spectral::Basis::FiniteDifference> + + 1>, + CacheEnumeration, + CacheEnumeration>( + [](const size_t local_num_dg_points, const size_t local_num_fd_points, + const Spectral::Quadrature dg_quadrature, + const Spectral::Quadrature local_subcell_quadrature) { + return Spectral::interpolation_matrix( + Mesh<1>{local_num_dg_points, Spectral::Basis::Legendre, + dg_quadrature}, + Spectral::collocation_points( + Mesh<1>{local_num_fd_points, Spectral::Basis::FiniteDifference, + local_subcell_quadrature})); + }); + return cache(dg_mesh.extents(0), subcell_extents, dg_mesh.quadrature(0), + subcell_quadrature); } namespace { @@ -437,9 +352,8 @@ const Matrix& projection_matrix(const Mesh<1>& dg_mesh, ASSERT(ghost_zone_size <= max_ghost_zone_size and ghost_zone_size >= 2, "ghost_zone_size must be in [2, " << max_ghost_zone_size << " ] but got " << ghost_zone_size); - switch (dg_mesh.quadrature(0)) { - case Spectral::Quadrature::GaussLobatto: { - static const auto cache_gl = make_static_cache< + static const auto cache = + make_static_cache< CacheRange< Spectral::minimum_number_of_points< Spectral::Basis::Legendre, @@ -453,9 +367,12 @@ const Matrix& projection_matrix(const Mesh<1>& dg_mesh, Spectral::Basis::FiniteDifference> + 1>, CacheRange<2_st, max_ghost_zone_size + 1>, + CacheEnumeration, CacheEnumeration>( [](const size_t local_num_dg_points, const size_t local_num_fd_points, - const size_t local_ghost_zone_size, const Side local_side) { + const size_t local_ghost_zone_size, + const Spectral::Quadrature dg_quadrature, const Side local_side) { const DataVector& fd_points = Spectral::collocation_points< Spectral::Basis::FiniteDifference, Spectral::Quadrature::CellCentered>(local_num_fd_points); @@ -466,50 +383,13 @@ const Matrix& projection_matrix(const Mesh<1>& dg_mesh, : (local_num_fd_points - local_ghost_zone_size + i)]; } - return Spectral::interpolation_matrix< - Spectral::Basis::Legendre, Spectral::Quadrature::GaussLobatto>( - local_num_dg_points, target_points); + return Spectral::interpolation_matrix( + Mesh<1>{local_num_dg_points, Spectral::Basis::Legendre, + dg_quadrature}, + target_points); }); - return cache_gl(dg_mesh.extents(0), subcell_extents, ghost_zone_size, - side); - } - case Spectral::Quadrature::Gauss: { - static const auto cache_g = make_static_cache< - CacheRange< - Spectral::minimum_number_of_points, - Spectral::maximum_number_of_points + - 1>, - CacheRange, - Spectral::maximum_number_of_points< - Spectral::Basis::FiniteDifference> + - 1>, - CacheRange<2_st, max_ghost_zone_size + 1>, - CacheEnumeration>( - [](const size_t local_num_dg_points, const size_t local_num_fd_points, - const size_t local_ghost_zone_size, const Side local_side) { - const DataVector& fd_points = Spectral::collocation_points< - Spectral::Basis::FiniteDifference, - Spectral::Quadrature::CellCentered>(local_num_fd_points); - DataVector target_points(local_ghost_zone_size); - for (size_t i = 0; i < local_ghost_zone_size; ++i) { - target_points[i] = fd_points[local_side == Side::Lower - ? i - : (local_num_fd_points - - local_ghost_zone_size + i)]; - } - return Spectral::interpolation_matrix( - local_num_dg_points, target_points); - }); - return cache_g(dg_mesh.extents(0), subcell_extents, ghost_zone_size, - side); - } - default: - ERROR("Unsupported quadrature type in FD subcell projection matrix"); - }; + return cache(dg_mesh.extents(0), subcell_extents, ghost_zone_size, + dg_mesh.quadrature(0), side); } #define GET_DIM(data) BOOST_PP_TUPLE_ELEM(0, data) diff --git a/src/Utilities/StaticCache.hpp b/src/Utilities/StaticCache.hpp index a44251a413a2..6ca7bc013357 100644 --- a/src/Utilities/StaticCache.hpp +++ b/src/Utilities/StaticCache.hpp @@ -10,6 +10,7 @@ #include "Utilities/ErrorHandling/Assert.hpp" #include "Utilities/Gsl.hpp" #include "Utilities/Requires.hpp" +#include "Utilities/TMPL.hpp" #include "Utilities/TypeTraits/IsInteger.hpp" /// \ingroup UtilitiesGroup @@ -34,6 +35,8 @@ template struct CacheEnumeration { constexpr static size_t size = sizeof...(Enums); using value_type = EnumerationType; + static constexpr std::array values{Enums...}; + using value_list = tmpl::integral_list; }; /// \ingroup UtilitiesGroup @@ -79,115 +82,358 @@ class StaticCache { const T& operator()(const Args... parameters) const { static_assert(sizeof...(parameters) == sizeof...(Ranges), "Number of arguments must match number of ranges."); - return unwrap_cache(generate_tuple(parameters)...); + return unwrap_cache_combined(generate_tuple(parameters)...); } private: - template ::value> = nullptr> + template auto generate_tuple(const T1 parameter) const { - static_assert( - tt::is_integer_v>, - "The parameter passed for a CacheRange must be an integer type."); - return std::make_tuple( - static_cast(parameter), - std::integral_constant{}, - std::make_integer_sequence{}); - } + if constexpr (std::is_enum::value) { + static_assert( + std::is_same>::value, + "Mismatched enum parameter type and cached type."); + size_t array_location = std::numeric_limits::max(); + static const std::array values{ + Range::values}; + for (size_t i = 0; i < Range::size; ++i) { + if (parameter == gsl::at(values, i)) { + array_location = i; + break; + } + } + if (UNLIKELY(array_location == std::numeric_limits::max())) { + ERROR("Uncached enumeration value: " << parameter); + } + return std::tuple{array_location, typename Range::value_list{}}; + } else { + static_assert( + tt::is_integer_v>, + "The parameter passed for a CacheRange must be an integer type."); - template ::value> = nullptr> - std::tuple, Range> generate_tuple( - const T1 parameter) const { - static_assert( - std::is_same>::value, - "Mismatched enum parameter type and cached type."); - return {parameter, Range{}}; + // Check range here because the nested range checks in the unwrap_cache + // function cause significant compile time overhead. + if (UNLIKELY(Range::start > + static_cast(parameter) or + static_cast(parameter) >= + Range::start + + static_cast(Range::size))) { + ERROR("Index out of range: " + << Range::start << " <= " << parameter << " < " + << Range::start + + static_cast(Range::size)); + } + return std::tuple{ + // unsigned cast is safe since this is an index into an array + static_cast( + static_cast(parameter) - + Range::start), + tmpl::make_sequence< + tmpl::integral_constant, + Range::size>{}}; + } } + // Compilation time notes: + // + // - The separate peeling of different number of arguments is the + // fastest implementation Nils Deppe has found so far. + // - The second fastest is using a Cartesian product on the lists of + // possible values, followed by a for_each over that list to set the + // function pointers in the array. Note that having the for_each function + // be marked `constexpr` resulted in a 6x reduction in compilation time + // for clang 17 compared to the not constexpr version, but still 40% + // slower compilation compared to the pattern matching below. template - const T& unwrap_cache() const { + const T& unwrap_cache_combined() const { static const T cached_object = generator_(IntegralConstantValues::value...); return cached_object; } - template - const T& unwrap_cache( - std::tuple< - std::remove_cv_t, - std::integral_constant, - IndexOffset>, - std::integer_sequence, Is...>> - parameter0, - Args... parameters) const { - if (UNLIKELY(IndexOffset > std::get<0>(parameter0) or - std::get<0>(parameter0) >= - IndexOffset + - static_cast(sizeof...(Is)))) { - ERROR("Index out of range: " - << IndexOffset << " <= " << std::get<0>(parameter0) << " < " - << IndexOffset + static_cast(sizeof...(Is))); - } +#if defined(__GNUC__) && !defined(__clang__) && __GNUC__ > 10 && __GNUC__ < 14 +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Warray-bounds" +#endif + template + const T& unwrap_cache_combined( + std::tuple> parameter0) const { + // note that the act of assigning to the specified function pointer type + // fixes the template arguments that need to be inferred. + static const std::array::*)() + const, + sizeof...(IntegralConstants)> + cache{{&StaticCache::unwrap_cache_combined< + IntegralConstantValues..., IntegralConstants>...}}; + // The array `cache` holds pointers to member functions, so we dereference + // the pointer and invoke it on `this`. + return (this->*gsl::at(cache, std::get<0>(parameter0)))(); + } + + template + const T& unwrap_cache_combined( + std::tuple> parameter0, + std::tuple> parameter1) const { // note that the act of assigning to the specified function pointer type // fixes the template arguments that need to be inferred. static const std::array< - const T& (StaticCache::*)(Args...) const, - sizeof...(Is)> - cache{{&StaticCache::unwrap_cache< - IntegralConstantValues..., - std::integral_constant>...}}; + const T& (StaticCache::*)() const, + sizeof...(IntegralConstants0) * sizeof...(IntegralConstants1)> + cache = []() { + std::array::*)() const, + sizeof...(IntegralConstants0) * + sizeof...(IntegralConstants1)> + result; + size_t counter1 = 0; + const auto helper = [&counter1, + &result]() { + size_t counter0 = 0; + // Note: Left-to-right ordering is guaranteed by the comma + // operator, otherwise we'd need to use another + // EXPAND_PACK_LEFT_TO_RIGHT + (((result[counter0 + sizeof...(IntegralConstants0) * counter1] = + &StaticCache::unwrap_cache_combined< + IntegralConstantValues..., IntegralConstants0, + IntegralConstant1>), + ++counter0), + ...); + ++counter1; + }; + EXPAND_PACK_LEFT_TO_RIGHT( + helper.template operator()()); + return result; + }(); + // The array `cache` holds pointers to member functions, so we dereference // the pointer and invoke it on `this`. -#if defined(__GNUC__) && !defined(__clang__) && __GNUC__ > 10 && __GNUC__ < 14 -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Warray-bounds" -#endif - return (this->*gsl::at(cache, std::get<0>(parameter0) - IndexOffset))( - parameters...); -#if defined(__GNUC__) && !defined(__clang__) && __GNUC__ > 10 && __GNUC__ < 14 -#pragma GCC diagnostic pop -#endif + return (this->*gsl::at(cache, std::get<0>(parameter0) + + sizeof...(IntegralConstants0) * + std::get<0>(parameter1)))(); } - template - const T& unwrap_cache( - std::tuple> - parameter0, - Args... parameters) const { - size_t array_location = std::numeric_limits::max(); - static const std::array values{ - {EnumValues...}}; - for (size_t i = 0; i < sizeof...(EnumValues); ++i) { - if (std::get<0>(parameter0) == gsl::at(values, i)) { - array_location = i; - break; - } - } - if (UNLIKELY(array_location == std::numeric_limits::max())) { - ERROR("Uncached enumeration value: " << std::get<0>(parameter0)); - } + template + const T& unwrap_cache_combined( + std::tuple> parameter0, + std::tuple> parameter1, + std::tuple> parameter2) const { + // note that the act of assigning to the specified function pointer type + // fixes the template arguments that need to be inferred. + static const std::array< + const T& (StaticCache::*)() const, + sizeof...(IntegralConstants0) * sizeof...(IntegralConstants1) * + sizeof...(IntegralConstants2)> + cache = []() { + std::array::*)() const, + sizeof...(IntegralConstants0) * + sizeof...(IntegralConstants1) * + sizeof...(IntegralConstants2)> + result; + size_t counter2 = 0; + const auto helper2 = [&counter2, + &result]() { + size_t counter1 = 0; + const auto helper = [&counter1, &counter2, + &result]() { + size_t counter0 = 0; + // Note: Left-to-right ordering is guaranteed by the comma + // operator, otherwise we'd need to use another + // EXPAND_PACK_LEFT_TO_RIGHT + (((result[counter0 + + sizeof...(IntegralConstants0) * + (counter1 + + sizeof...(IntegralConstants1) * counter2)] = + &StaticCache:: + unwrap_cache_combined< + IntegralConstantValues..., IntegralConstants0, + IntegralConstant1, IntegralConstant2>), + ++counter0), + ...); + ++counter1; + }; + EXPAND_PACK_LEFT_TO_RIGHT( + helper.template operator()()); + ++counter2; + }; + EXPAND_PACK_LEFT_TO_RIGHT( + helper2.template operator()()); + return result; + }(); + + // The array `cache` holds pointers to member functions, so we dereference + // the pointer and invoke it on `this`. + return (this->*gsl::at(cache, std::get<0>(parameter0) + + sizeof...(IntegralConstants0) * + (std::get<0>(parameter1) + + sizeof...(IntegralConstants1) * + std::get<0>(parameter2))))(); + } + + template + const T& unwrap_cache_combined( + std::tuple> parameter0, + std::tuple> parameter1, + std::tuple> parameter2, + std::tuple> parameter3) const { + // note that the act of assigning to the specified function pointer type + // fixes the template arguments that need to be inferred. + static const std::array< + const T& (StaticCache::*)() const, + sizeof...(IntegralConstants0) * sizeof...(IntegralConstants1) * + sizeof...(IntegralConstants2) * sizeof...(IntegralConstants3)> + cache = []() { + std::array< + const T& (StaticCache::*)() const, + sizeof...(IntegralConstants0) * sizeof...(IntegralConstants1) * + sizeof...(IntegralConstants2) * sizeof...(IntegralConstants3)> + result; + size_t counter3 = 0; + const auto helper3 = [&counter3, + &result]() { + size_t counter2 = 0; + const auto helper2 = [&counter2, &counter3, + &result]() { + size_t counter1 = 0; + const auto helper = [&counter1, &counter2, &counter3, + &result]() { + size_t counter0 = 0; + // Note: Left-to-right ordering is guaranteed by the comma + // operator, otherwise we'd need to use another + // EXPAND_PACK_LEFT_TO_RIGHT + (((result[counter0 + + sizeof...(IntegralConstants0) * + (counter1 + + sizeof...(IntegralConstants1) * + (counter2 + sizeof...(IntegralConstants2) * + counter3))] = + &StaticCache:: + unwrap_cache_combined< + IntegralConstantValues..., IntegralConstants0, + IntegralConstant1, IntegralConstant2, + IntegralConstant3>), + ++counter0), + ...); + ++counter1; + }; + EXPAND_PACK_LEFT_TO_RIGHT( + helper.template operator()()); + ++counter2; + }; + EXPAND_PACK_LEFT_TO_RIGHT( + helper2.template operator()()); + ++counter3; + }; + EXPAND_PACK_LEFT_TO_RIGHT( + helper3.template operator()()); + return result; + }(); + + // The array `cache` holds pointers to member functions, so we dereference + // the pointer and invoke it on `this`. + return ( + this->*gsl::at(cache, std::get<0>(parameter0) + + sizeof...(IntegralConstants0) * + (std::get<0>(parameter1) + + sizeof...(IntegralConstants1) * + (std::get<0>(parameter2) + + sizeof...(IntegralConstants2) * + std::get<0>(parameter3)))))(); + } + + template + const T& unwrap_cache_combined( + std::tuple> parameter0, + std::tuple> parameter1, + std::tuple> parameter2, + std::tuple> parameter3, + std::tuple> parameter4, + const Args&... parameters) const { // note that the act of assigning to the specified function pointer type // fixes the template arguments that need to be inferred. static const std::array< const T& (StaticCache::*)(Args...) const, - sizeof...(EnumValues)> - cache{{&StaticCache::unwrap_cache< - IntegralConstantValues..., - std::integral_constant>...}}; + sizeof...(IntegralConstants0) * sizeof...(IntegralConstants1) * + sizeof...(IntegralConstants2) * sizeof...(IntegralConstants3) * + sizeof...(IntegralConstants3)> + cache = []() { + std::array< + const T& (StaticCache::*)(Args...) const, + sizeof...(IntegralConstants0) * sizeof...(IntegralConstants1) * + sizeof...(IntegralConstants2) * + sizeof...(IntegralConstants3) * sizeof...(IntegralConstants3)> + result; + size_t counter4 = 0; + const auto helper4 = [&counter4, + &result]() { + size_t counter3 = 0; + const auto helper3 = [&counter3, &counter4, + &result]() { + size_t counter2 = 0; + const auto helper2 = [&counter2, &counter3, &counter4, + &result]() { + size_t counter1 = 0; + const auto helper = [&counter1, &counter2, &counter3, &counter4, + &result]() { + size_t counter0 = 0; + // Note: Left-to-right ordering is guaranteed by the comma + // operator, otherwise we'd need to use another + // EXPAND_PACK_LEFT_TO_RIGHT + (((result[counter0 + + sizeof...(IntegralConstants0) * + (counter1 + + sizeof...(IntegralConstants1) * + (counter2 + + sizeof...(IntegralConstants2) * + (counter3 + + sizeof...(IntegralConstants3) * + counter4)))] = + &StaticCache:: + unwrap_cache_combined< + IntegralConstantValues..., IntegralConstants0, + IntegralConstant1, IntegralConstant2, + IntegralConstant3, IntegralConstant4>), + ++counter0), + ...); + ++counter1; + }; + EXPAND_PACK_LEFT_TO_RIGHT( + helper.template operator()()); + ++counter2; + }; + EXPAND_PACK_LEFT_TO_RIGHT( + helper2.template operator()()); + ++counter3; + }; + EXPAND_PACK_LEFT_TO_RIGHT( + helper3.template operator()()); + ++counter4; + }; + EXPAND_PACK_LEFT_TO_RIGHT( + helper4.template operator()()); + return result; + }(); + // The array `cache` holds pointers to member functions, so we dereference // the pointer and invoke it on `this`. -#if defined(__GNUC__) && !defined(__clang__) && __GNUC__ > 10 && __GNUC__ < 14 -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Warray-bounds" -#endif - return (this->*gsl::at(cache, array_location))(parameters...); + return (this->*gsl::at(cache, + std::get<0>(parameter0) + + sizeof...(IntegralConstants0) * + (std::get<0>(parameter1) + + sizeof...(IntegralConstants1) * + (std::get<0>(parameter2) + + sizeof...(IntegralConstants2) * + (std::get<0>(parameter3) + + sizeof...(IntegralConstants3) * + std::get<0>(parameter4))))))( + parameters...); + } #if defined(__GNUC__) && !defined(__clang__) && __GNUC__ > 10 && __GNUC__ < 14 #pragma GCC diagnostic pop #endif - } const Generator generator_; };