diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 32981b6c..4b9b45cd 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -16,3 +16,4 @@ add_subdirectory(dot_product) add_subdirectory(tiled_layout) add_subdirectory(restrict_accessor) add_subdirectory(aligned_accessor) +add_subdirectory(for_each_extents) diff --git a/examples/for_each_extents/CMakeLists.txt b/examples/for_each_extents/CMakeLists.txt new file mode 100644 index 00000000..719e1ebd --- /dev/null +++ b/examples/for_each_extents/CMakeLists.txt @@ -0,0 +1,2 @@ +mdspan_add_example(for_each_extents) +mdspan_add_example(for_each_extents_no_ranges) diff --git a/examples/for_each_extents/for_each_extents.cpp b/examples/for_each_extents/for_each_extents.cpp new file mode 100755 index 00000000..b366dea4 --- /dev/null +++ b/examples/for_each_extents/for_each_extents.cpp @@ -0,0 +1,98 @@ +#include +#include +#include +#include + +// "gcc trunk" on godbolt.org as of 2023/03/21 +// (> 12.2) does not define __cpp_lib_ranges_iota, +// yet std::views::iota works just fine. +// +// "icpx C++23" github.com/kokkos/mdspan test build +// has a broken header as of 2024/04/02. +#if defined(__cpp_lib_ranges_cartesian_product) && (! defined(__INTEL_LLVM_COMPILER)) // && defined(__cpp_lib_ranges_iota) +# define MDSPAN_EXAMPLE_CAN_USE_STD_RANGES 1 +#endif + +#if defined(MDSPAN_EXAMPLE_CAN_USE_STD_RANGES) + +// GCC >= 13 ("gcc trunk" on godbolt.org as of 2023/03/21) +// implements std::views::cartesian_product. +// If you don't have it, you can use range-v3 instead. +// Note that mixing std::views::iota with +// ranges::views::cartesian_product doesn't work. +// The range-v3 work-around looks like this. +// +// #include +// #include +// namespace ranges_views = ranges::views; + +#include +namespace ranges_views = std::views; + +auto print_args = [] (Args&&... args) { + ((std::cout << std::forward(args) << '\n'), ...); +}; + +template +auto reverse(std::index_sequence) -> + std::index_sequence; + +template +using reverse_index_sequence_t = + decltype(reverse(std::make_index_sequence())); + +template +void for_each_in_extents_impl(Callable&& f, + Kokkos::extents e, + std::index_sequence rank_sequence) +{ + // In the layout_left case, caller passes in N-1, N-2, ..., 1, 0. + // This reverses the order of the Cartesian product, + // but also reverses the order of indices in each tuple. + [&] (std::index_sequence) { + auto v = std::views::cartesian_product( + std::views::iota(IndexType(0), e.extent(Indices))...); + for (const auto& tuple_of_indices : v) { + // In the layout_left case, we undo the reversal of each tuple + // by getting its elements in reverse order. + [&] (std::index_sequence) { + std::forward(f)(std::get(tuple_of_indices)...); + } (rank_sequence); + } + } (rank_sequence); +} + +template +void for_each_in_extents(Callable&& f, + Kokkos::extents e, + Layout) +{ + using layout_type = std::remove_cvref_t; + if constexpr (std::is_same_v) { + for_each_in_extents_impl(std::forward(f), e, + reverse_index_sequence_t{}); + } + else { // layout_right or any other layout + for_each_in_extents_impl(std::forward(f), e, + reverse_index_sequence_t{}); + } +} + +#endif // defined(MDSPAN_EXAMPLE_CAN_USE_STD_RANGES) + +int main() { + +#if defined(MDSPAN_EXAMPLE_CAN_USE_STD_RANGES) + Kokkos::extents e; + auto printer = [] (int i, int j) { + std::cout << "(" << i << "," << j << ")\n"; + }; + std::cout << "layout_right:\n"; + for_each_in_extents(printer, e, Kokkos::layout_right{}); + std::cout << "\nlayout_left:\n"; + for_each_in_extents(printer, e, Kokkos::layout_left{}); +#endif // defined(MDSPAN_EXAMPLE_CAN_USE_STD_RANGES) + + return 0; +} diff --git a/examples/for_each_extents/for_each_extents_no_ranges.cpp b/examples/for_each_extents/for_each_extents_no_ranges.cpp new file mode 100755 index 00000000..e35f353d --- /dev/null +++ b/examples/for_each_extents/for_each_extents_no_ranges.cpp @@ -0,0 +1,366 @@ +#include +#include +#include +#include +#include + +// There's no separate feature test macro for the C++20 feature +// of lambdas with named template parameters (P0428R2). +#if __cplusplus >= 202002L +# define MDSPAN_EXAMPLE_CAN_USE_LAMBDA_TEMPLATE_PARAM_LIST 1 +#endif + +// This example doesn't currently work with Clang, +// because Clang doesn't like structured binding results +// being captured by inner lambdas. +#if ! defined(__clang__) && defined(MDSPAN_EXAMPLE_CAN_USE_LAMBDA_TEMPLATE_PARAM_LIST) + +////////////////////////////////////////////////////////////////////////// +// Part 1: Compile-time iteration +////////////////////////////////////////////////////////////////////////// + +// C++20 lets you write lambdas with explicitly named template +// parameters (vs. C++14 lambdas with "auto" parameters). If you have +// a lambda templated on that takes a +// std::index_sequence parameter, you can then call the +// lambda with the result of std::make_index_sequence. This calls +// the lambda with the template arguments 0, 1, 2, ..., N-1. You can +// then use these as "loop indices" to "iterate" at compile time over +// a parameter pack. +// +// If you don't have C++20, you can replace the lambda with a +// separate, named helper function. +// +// Another approach would be to write a reusable "template for each". +// This example doesn't do that, but it could be useful for +// backporting or for documenting intent. + +// Print all the elements of a parameter pack. +// +// This is a lambda and not a function because you can't +// straightforwardly use std::apply on templated nonmember functions +// (as it doesn't know which overload to use), but you can use it on +// generic lambdas. See the example here: +// +// https://en.cppreference.com/w/cpp/utility/apply +auto print_pack = [](InputTypes&& ... input) { + auto print_all = [&]( std::index_sequence ) { + auto print_one = [&] (std::size_t index, auto&& in) { + std::cout << in; + if(index + 1 < sizeof...(Indices)) { + std::cout << ", "; + } + }; + (print_one(Indices, input), ...); + }; + std::cout << '('; + print_all(std::make_index_sequence()); + std::cout << ")\n"; +}; + +////////////////////////////////////////////////////////////////////////// +// Part 2: Splitting extents +// +// extents is part of +// mdspan. It can mix run-time and compile-time extents values. +// +// We can express multidimensional iteration recursively +// by splitting an extents object into two parts (left and right), +// and iterating over one part while fixing the other. +////////////////////////////////////////////////////////////////////////// + +// Returns a new extents object representing +// all but the leftmost extent of e. +// +// extents -> extents +// extents -> extents +// +// This example shows that you can do +// index arithmetic on an index sequence. +template +auto right_extents( Kokkos::extents e ) +{ + static_assert(sizeof...(Extents) != 0); + return [&]( std::index_sequence ) { + return Kokkos::extents{ + e.extent(Indices + 1)... + }; + }( std::make_index_sequence() ); +} + +// Return two things: +// +// * the leftmost extent as an extents object, and +// * all the other (right) extents as a (single) extents object. +// +// Encoding the leftmost extent as an extents object +// lets us preserve its compile-time-ness. +// +// This needs to be a lambda or function object, +// not a templated function. +auto split_extents_at_leftmost = + [](Kokkos::extents e) +{ + static_assert(sizeof...(Extents) != 0); + Kokkos::extents left_ext( + e.extent(0)); + return std::tuple{left_ext, right_extents(e)}; +}; + +// right_extents can be implemented by overloading for +// extents. +// That approach doesn't work for left_extents. + +// Returns a new extents object representing +// all but the rightmost extent of e. +template +auto left_extents( Kokkos::extents e ) +{ + static_assert(sizeof...(Extents) != 0); + return [&]( std::index_sequence ) { + return Kokkos::extents{ + e.extent(Indices)... + }; + }( std::make_index_sequence() ); +} + +// This needs to be a lambda or function object, not a templated function. +auto split_extents_at_rightmost = + [](Kokkos::extents e) +{ + static_assert(sizeof...(Extents) != 0); + Kokkos::extents right_ext( + e.extent(e.rank() - 1)); + return std::tuple{left_extents(e), right_ext}; +}; + +////////////////////////////////////////////////////////////////////////// +// Part 3: Recursing on extents +////////////////////////////////////////////////////////////////////////// + +// This is a loop over one extent (dimension). +// By packaging up lambdas that fix other extents, +// we can use this as a building block +// for iterating over all the extents of a multidimensional array. +// +// This could also serve as a hook for passing along +// optimization information -- e.g., whether we want +// to apply "#pragma omp simd" to a particular extent. +template +void for_each_one_extent(Callable&& callable, Kokkos::extents ext) +{ + // If it's a run-time extent, do a run-time loop. + if constexpr(ext.static_extent(0) == Kokkos::dynamic_extent) { + for(IndexType index = 0; index < ext.extent(0); ++index) { + std::forward(callable)(index); + } + } + else { + // It's a compile-time extent, so use a fold expression + // to "iterate at compile time." + // This effectively unrolls the loop. + // + // Since we know the extent at compile time, + // we could also apply other optimizations here, + // like unrolling for specific SIMD widths. + [&] ( std::index_sequence ) { + (std::forward(callable)(Indices), ...); + }( std::make_index_sequence() ); + } +} + +// Call callable on each multidimensional index in the extents, +// iterating in row-major order. +template +void for_each_in_extents_row_major( + Callable&& callable, + Kokkos::extents ext) +{ + if constexpr(ext.rank() == 0) { + return; + } else if constexpr(ext.rank() == 1) { + for_each_one_extent(callable, ext); + } else { + auto [left_exts, right_exts] = split_extents_at_leftmost(ext); + auto inner = [&](auto... left_indices) { + auto next = [&] (auto... right_indices) { + // left_indices is really only one index here, + // but it's still a parameter pack. + // Writing the code this way suggests a more general approach. + std::forward(callable)(left_indices..., right_indices...); + }; + for_each_in_extents_row_major(next, right_exts); + }; + for_each_one_extent(inner, left_exts); + } +} + +// Call callable on each multidimensional index in the extents, +// iterating in column-major order. +// +// The implementation differs in only two places from the row-major version. +// This suggests a way to generalize. +// +// Overloading on extents +// works fine for the row major case, but not for the column major case. +template +void for_each_in_extents_col_major( + Callable&& callable, + Kokkos::extents ext) +{ + if constexpr(ext.rank() == 0) { + return; + } else if constexpr (ext.rank() == 1) { + for_each_one_extent(callable, ext); + } else { + // 1. Split rightmost instead of leftmost. + auto [left_exts, right_exts] = split_extents_at_rightmost(ext); + auto inner = [&](auto... right_indices) { + // 2. Put the left indices in the inner loop, + // instead of the right indices. + auto next = [&] (auto... left_indices) { + std::forward(callable)(left_indices..., right_indices...); + }; + for_each_in_extents_col_major(next, left_exts); + }; + for_each_in_extents_col_major(inner, right_exts); + } +} + +////////////////////////////////////////////////////////////////////////// +// Part 4: Generalize iteration order +////////////////////////////////////////////////////////////////////////// + +// We revise the above example +// by picking one iteration order as canonical +// (we've chosen row-major order above), +// and implementing other orders +// by changing the orders of extents and indices. + +template +void for_each_in_extents_impl(Callable&& callable, + Kokkos::extents ext, + ExtentsReorderer reorder_extents, + ExtentsSplitter split_extents, + IndicesReorderer reorder_indices) +{ + if constexpr(ext.rank() == 0) { + return; + } else if constexpr(ext.rank() == 1) { + for_each_one_extent(callable, ext); + } else { + // 1. Reorder the input extents. + auto reordered_extents = reorder_extents(ext); + + // 2. Split into "left" and "right." + // For row-major and column-major, the resulting left_exts + // should always have rank 1 (i.e., only contain one extent). + auto [left_exts, right_exts] = split_extents(reordered_extents); + + // 3. Create a lambda that loops over the right extents, + // and takes the left extent(s) as input. + auto inner = [&] (auto... left_indices) { + auto next = [&] (auto... right_indices) { + // 4. "Fix" the order of indices to match + // the above reordering of extents. + std::apply(std::forward(callable), + reorder_indices(left_indices..., right_indices...)); + }; + for_each_in_extents_impl(next, right_exts, reorder_extents, + split_extents, reorder_indices); + }; + + // 5. Take the above lambda and loop over the left extent(s). + for_each_in_extents_impl(inner, left_exts, reorder_extents, + split_extents, reorder_indices); + } +} + +auto extents_identity = []( + Kokkos::extents ext) +{ + return ext; +}; + +auto extents_reverse = []( + Kokkos::extents ext) +{ + constexpr std::size_t N = ext.rank(); + + return [&]( std::index_sequence ) { + return Kokkos::extents< + IndexType, + ext.static_extent(N - 1 - Indices)... + >{ ext.extent(N - 1 - Indices)... }; + }( std::make_index_sequence() ); +}; + +// Return a parameter pack as a tuple. +auto indices_identity = [](auto... indices) { + return std::tuple{indices...}; +}; + +// Get the n-th item in a parameter pack, +// where n is a compile-time value. +template +auto get_pack(Args... args) +{ + std::common_type_t result; + std::size_t i = 0; + return ((i++ == n ? (result = args, true) : false) || ...); +} + +// Return the reverse of a parameter pack as a std::tuple. +auto indices_reverse = [](auto... args) { + constexpr std::size_t N = sizeof...(args); + + return [&]( std::index_sequence ) { + return std::tuple{ get_pack(args)... }; + }( std::make_index_sequence() ); +}; + +// Row-major iteration +template +void for_each_in_extents(Callable&& callable, + Kokkos::extents ext, + Kokkos::layout_right) +{ + for_each_in_extents_impl(std::forward(callable), ext, + extents_identity, split_extents_at_leftmost, indices_identity); +} + +// Column-major iteration +template +void for_each_in_extents(Callable&& callable, + Kokkos::extents ext, + Kokkos::layout_left) +{ + for_each_in_extents_impl(std::forward(callable), ext, + extents_reverse, split_extents_at_rightmost, indices_reverse); +} + +#endif // ! defined(__clang__) && defined(MDSPAN_EXAMPLE_CAN_USE_LAMBDA_TEMPLATE_PARAM_LIST) + +int main() { + +#if ! defined(__clang__) && defined(MDSPAN_EXAMPLE_CAN_USE_LAMBDA_TEMPLATE_PARAM_LIST) + // The functions work for any combination + // of compile-time or run-time extents. + Kokkos::extents e{4}; + + std::cout << "\nRow major:\n"; + for_each_in_extents_row_major(print_pack, e); + + std::cout << "\nColumn major\n"; + for_each_in_extents_col_major(print_pack, e); + + std::cout << "\nfor_each_in_extents: row major:\n"; + for_each_in_extents(print_pack, e, Kokkos::layout_right{}); + + std::cout << "\nfor_each_in_extents: column major:\n"; + for_each_in_extents(print_pack, e, Kokkos::layout_left{}); +#endif // defined(MDSPAN_EXAMPLE_CAN_USE_LAMBDA_TEMPLATE_PARAM_LIST) + + return 0; +}