diff --git a/CMakeLists.txt b/CMakeLists.txt index 577099933a..6c1c2edbc1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,7 +10,7 @@ cmake_minimum_required(VERSION 3.22) -project(DLAF VERSION 0.4.0) +project(DLAF VERSION 0.1.0) # --------------------------------------------------------------------------- # CMake configurations diff --git a/ci/common-ci.yml b/ci/common-ci.yml index 38c635c488..626f81edc2 100644 --- a/ci/common-ci.yml +++ b/ci/common-ci.yml @@ -34,7 +34,7 @@ stages: reports: dotenv: build.env variables: - SPACK_SHA: 4b0479159feed0d9bcd6f1d75b166a6ac67f9602 + SPACK_SHA: dcc4423a9d0219a26ceeb52e329ed956e41f4d2c SPACK_DLAF_REPO: ./spack DOCKER_BUILD_ARGS: '[ "BASE_IMAGE", diff --git a/include/dlaf/eigensolver/band_to_tridiag/mc.h b/include/dlaf/eigensolver/band_to_tridiag/mc.h index a2c55d940a..86146d88d7 100644 --- a/include/dlaf/eigensolver/band_to_tridiag/mc.h +++ b/include/dlaf/eigensolver/band_to_tridiag/mc.h @@ -10,6 +10,9 @@ #pragma once +#include +#include + #include #include @@ -29,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -1034,6 +1038,19 @@ TridiagResult BandToTridiag::call_L( // Should be dispatched to local implementation if (1x1) grid. DLAF_ASSERT(grid.size() != comm::Size2D(1, 1), grid); +#ifdef DLAF_WITH_HDF5 + static std::atomic num_b2t_calls = 0; + std::stringstream fname; + fname << "band_to_tridiag-" << matrix::internal::TypeToString_v << "-" + << std::to_string(num_b2t_calls) << ".h5"; + std::optional file; + + if (getTuneParameters().debug_dump_band_to_tridiagonal_data) { + file = matrix::internal::FileHDF5(grid.fullCommunicator(), fname.str()); + file->write(mat_a, "/input"); + } +#endif + // note: A is square and has square blocksize SizeType size = mat_a.size().cols(); SizeType n = mat_a.nrTiles().cols(); @@ -1524,6 +1541,14 @@ TridiagResult BandToTridiag::call_L( mat_trid.readwrite(index))); } +#ifdef DLAF_WITH_HDF5 + if (getTuneParameters().debug_dump_band_to_tridiagonal_data) { + file->write(mat_trid, "/tridiagonal"); + } + + num_b2t_calls++; +#endif + return {std::move(mat_trid), std::move(mat_v)}; } } diff --git a/include/dlaf/eigensolver/eigensolver/impl.h b/include/dlaf/eigensolver/eigensolver/impl.h index f9ebc0c308..e39a246005 100644 --- a/include/dlaf/eigensolver/eigensolver/impl.h +++ b/include/dlaf/eigensolver/eigensolver/impl.h @@ -9,8 +9,10 @@ // #pragma once +#include #include #include +#include #include #include @@ -60,28 +62,35 @@ void Eigensolver::call(comm::CommunicatorGrid& grid, blas::Uplo uplo, M if (uplo != blas::Uplo::Lower) DLAF_UNIMPLEMENTED(uplo); - auto mat_taus = reduction_to_band(grid, mat_a, band_size); - auto ret = band_to_tridiagonal(grid, uplo, band_size, mat_a); - #ifdef DLAF_WITH_HDF5 + static std::atomic num_eigensolver_calls = 0; + std::stringstream fname; + fname << "eigensolver-" << matrix::internal::TypeToString_v << "-" + << std::to_string(num_eigensolver_calls) << ".h5"; std::optional file; - if (getTuneParameters().debug_dump_trisolver_data) { - file = matrix::internal::FileHDF5(grid.fullCommunicator(), "trid-ref.h5"); - file->write(ret.tridiagonal, "/tridiag"); + if (getTuneParameters().debug_dump_eigensolver_data) { + file = matrix::internal::FileHDF5(grid.fullCommunicator(), fname.str()); + file->write(mat_a, "/input"); } #endif + auto mat_taus = reduction_to_band(grid, mat_a, band_size); + + auto ret = band_to_tridiagonal(grid, uplo, band_size, mat_a); + tridiagonal_eigensolver(grid, ret.tridiagonal, evals, mat_e); + bt_band_to_tridiagonal(grid, band_size, mat_e, ret.hh_reflectors); + bt_reduction_to_band(grid, band_size, mat_e, mat_a, mat_taus); + #ifdef DLAF_WITH_HDF5 - if (getTuneParameters().debug_dump_trisolver_data) { + if (getTuneParameters().debug_dump_eigensolver_data) { file->write(evals, "/evals"); file->write(mat_e, "/evecs"); } -#endif - bt_band_to_tridiagonal(grid, band_size, mat_e, ret.hh_reflectors); - bt_reduction_to_band(grid, band_size, mat_e, mat_a, mat_taus); + num_eigensolver_calls++; +#endif } } diff --git a/include/dlaf/eigensolver/reduction_to_band/impl.h b/include/dlaf/eigensolver/reduction_to_band/impl.h index c61151102f..148078d422 100644 --- a/include/dlaf/eigensolver/reduction_to_band/impl.h +++ b/include/dlaf/eigensolver/reduction_to_band/impl.h @@ -9,8 +9,10 @@ // #pragma once +#include #include #include +#include #include #include @@ -41,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -1129,6 +1132,19 @@ Matrix ReductionToBand::call(comm::CommunicatorGrid& gr auto mpi_col_chain = grid.col_communicator_pipeline(); auto mpi_col_chain_panel = grid.col_communicator_pipeline(); +#ifdef DLAF_WITH_HDF5 + static std::atomic num_reduction_to_band_calls = 0; + std::stringstream fname; + fname << "reduction_to_band-" << matrix::internal::TypeToString_v << "-" + << std::to_string(num_reduction_to_band_calls) << ".h5"; + std::optional file; + + if (getTuneParameters().debug_dump_reduction_to_band_data) { + file = matrix::internal::FileHDF5(grid.fullCommunicator(), fname.str()); + file->write(mat_a, "/input"); + } +#endif + const auto& dist = mat_a.distribution(); const comm::Index2D rank = dist.rankIndex(); @@ -1144,8 +1160,17 @@ Matrix ReductionToBand::call(comm::CommunicatorGrid& gr comm::Index2D(mat_a.rankIndex().col(), 0), comm::Index2D(mat_a.sourceRankIndex().col(), 0))); - if (nrefls == 0) + if (nrefls == 0) { +#ifdef DLAF_WITH_HDF5 + if (getTuneParameters().debug_dump_reduction_to_band_data) { + file->write(mat_a, "/band"); + } + + num_reduction_to_band_calls++; +#endif + return mat_taus; + } Matrix mat_taus_retiled = mat_taus.retiledSubPipeline(LocalTileSize(mat_a.blockSize().cols() / band_size, 1)); @@ -1423,6 +1448,14 @@ Matrix ReductionToBand::call(comm::CommunicatorGrid& gr v.reset(); } +#ifdef DLAF_WITH_HDF5 + if (getTuneParameters().debug_dump_reduction_to_band_data) { + file->write(mat_a, "/band"); + } + + num_reduction_to_band_calls++; +#endif + return mat_taus; } } diff --git a/include/dlaf/eigensolver/tridiag_solver/impl.h b/include/dlaf/eigensolver/tridiag_solver/impl.h index fb6c584f4c..0e1f7c4c94 100644 --- a/include/dlaf/eigensolver/tridiag_solver/impl.h +++ b/include/dlaf/eigensolver/tridiag_solver/impl.h @@ -10,6 +10,8 @@ #pragma once #include +#include +#include #include #include @@ -27,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -363,6 +366,19 @@ void TridiagSolver::call(comm::CommunicatorGrid& grid, Matrix num_tridiag_solver_calls = 0; + std::stringstream fname; + fname << "tridiag_solver-" + << matrix::internal::TypeToString_v << std::to_string(num_tridiag_solver_calls) << ".h5"; + std::optional file; + + if (getTuneParameters().debug_dump_tridiag_solver_data) { + file = matrix::internal::FileHDF5(grid.fullCommunicator(), fname.str()); + file->write(tridiag, "/input"); + } +#endif + // If the matrix is composed by a single tile simply call stedc. if (evecs.nrTiles().linear_size() == 1) { if constexpr (D == Device::CPU) { @@ -441,6 +457,15 @@ void TridiagSolver::call(comm::CommunicatorGrid& grid, Matrix(row_task_chain, 0, n, ws_h.i1, ws_hm.e0, ws_hm.e2); copy(ws_hm.e2, evecs); + +#ifdef DLAF_WITH_HDF5 + if (getTuneParameters().debug_dump_tridiag_solver_data) { + file->write(evecs, "/evecs"); + file->write(evals, "/evals"); + } + + num_tridiag_solver_calls++; +#endif } // \overload TridiagSolver::call() diff --git a/include/dlaf/matrix/hdf5.h b/include/dlaf/matrix/hdf5.h index 50e8b2d334..30be778aec 100644 --- a/include/dlaf/matrix/hdf5.h +++ b/include/dlaf/matrix/hdf5.h @@ -15,6 +15,8 @@ #include #include #include +#include +#include #include #include @@ -53,6 +55,35 @@ const H5::PredType& hdf5_datatype>::type = hdf5_datatype::typ template struct hdf5_datatype : public hdf5_datatype {}; +// Type to string mappings +template +struct TypeToString { + static inline constexpr std::string_view value = typeid(T).name(); +}; + +template +inline constexpr std::string_view TypeToString_v = TypeToString::value; + +template <> +struct TypeToString { + static inline constexpr std::string_view value = "s"; +}; + +template <> +struct TypeToString { + static inline constexpr std::string_view value = "d"; +}; + +template <> +struct TypeToString> { + static inline constexpr std::string_view value = "c"; +}; + +template <> +struct TypeToString> { + static inline constexpr std::string_view value = "z"; +}; + // Helper function that for each local tile index in @p dist, gets a sender of a tile with // @p get_tile and sends it to a function that takes care of the mapping between file and memory. // Then, this function, passes all required arguments to @p dataset_op which should be either diff --git a/include/dlaf/tune.h b/include/dlaf/tune.h index 7de7f2aa2c..f5450d7cfb 100644 --- a/include/dlaf/tune.h +++ b/include/dlaf/tune.h @@ -22,11 +22,23 @@ namespace dlaf { /// DLA-Future tuning parameters. /// /// Holds the value of the parameters that can be used to tune DLA-Future. -/// - debug_dump_trisolver_data: -/// Enable dump of trisolver input/output data to "trid-ref.h5" file that will be created in the +/// - debug_dump_eigensolver_data: +/// Enable dump of eigensolver input/output data to "eigensolver.h5" file that will be created in the /// working folder (it should not exist before the execution). -/// WARNING: just a single execution can be dumped on disk, and any subsequent call fails. -/// Set with environment variable DLAF_DEBUG_DUMP_TRISOLVER_DATA. +/// Set with environment variable DLAF_DEBUG_DUMP_EIGENSOLVER_DATA. +/// - debug_dump_reduction_to_band_data: +/// Enable dump of reduction_to_band input/output data to "reduction_to_band.h5" file that will be +/// created in the working folder (it should not exist before the execution). +/// environment variable +/// DLAF_DEBUG_DUMP_EIGENSOLVER_DATA. +/// - debug_dump_band_to_tridiagonal_data: +/// Enable dump of band_to_trigiagonal input/output data to "band_to_tridiagonal.h5" file that will +/// be created in the working folder (it should not exist before the execution). +/// environment variable DLAF_DEBUG_DUMP_BAND_TO_TRIDIAGONAL_DATA. +/// - debug_dump_tridiag_solver_data: +/// Enable dump of tridiagonal solver input/output data to "tridiagonal.h5" file that will before +/// created in the working folder (it should not exist before the execution). +/// Set with environment variable DLAF_DEBUG_DUMP_TRIDIAG_SOLVER_DATA. /// - red2band_panel_nworkers: /// The maximum number of threads to use for computing the panel in the reduction to band algorithm. /// Set with --dlaf:red2band-panel-nworkers or env variable DLAF_RED2BAND_PANEL_NWORKERS. @@ -81,7 +93,10 @@ struct TuneParameters { red2band_panel_nworkers = std::max(1, default_pool_thread_count / 2); tridiag_rank1_nworkers = default_pool_thread_count; } - bool debug_dump_trisolver_data = false; + bool debug_dump_eigensolver_data = false; + bool debug_dump_reduction_to_band_data = false; + bool debug_dump_band_to_tridiagonal_data = false; + bool debug_dump_tridiag_solver_data = false; std::size_t red2band_panel_nworkers = 1; std::size_t red2band_barrier_busy_wait_us = 1000; std::size_t tridiag_rank1_nworkers = 1; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index cdc1f1fd0c..986fde4ec1 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -353,11 +353,6 @@ configure_package_config_file( INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${CMAKE_PROJECT_NAME} ) -write_basic_package_version_file( - DLAFConfigVersion.cmake VERSION ${PACKAGE_VERSION} COMPATIBILITY AnyNewerVersion -) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/DLAFConfig.cmake - ${CMAKE_CURRENT_BINARY_DIR}/DLAFConfigVersion.cmake DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${CMAKE_PROJECT_NAME} ) diff --git a/src/init.cpp b/src/init.cpp index 3fdba264b5..2eb38d7f38 100644 --- a/src/init.cpp +++ b/src/init.cpp @@ -221,7 +221,13 @@ void updateConfiguration(const pika::program_options::variables_map& vm, configu updateConfigurationValue(vm, param.band_to_tridiag_1d_block_size_base, "BAND_TO_TRIDIAG_1D_BLOCK_SIZE_BASE", "band-to-tridiag-1d-block-size-base"); - updateConfigurationValue(vm, param.debug_dump_trisolver_data, "DEBUG_DUMP_TRISOLVER_DATA", ""); + updateConfigurationValue(vm, param.debug_dump_eigensolver_data, "DEBUG_DUMP_EIGENSOLVER_DATA", ""); + updateConfigurationValue(vm, param.debug_dump_reduction_to_band_data, + "DEBUG_DUMP_REDUCTION_TO_BAND_DATA", ""); + updateConfigurationValue(vm, param.debug_dump_band_to_tridiagonal_data, + "DEBUG_DUMP_BAND_TO_TRIDIAGONAL_DATA", ""); + updateConfigurationValue(vm, param.debug_dump_tridiag_solver_data, "DEBUG_DUMP_TRIDIAG_SOLVER_DATA", + ""); updateConfigurationValue(vm, param.tridiag_rank1_nworkers, "TRIDIAG_RANK1_NWORKERS", "tridiag-rank1-nworkers");