diff --git a/doc/Doxyfile.in b/doc/Doxyfile.in index 9069c299e8..931b2d09c6 100644 --- a/doc/Doxyfile.in +++ b/doc/Doxyfile.in @@ -814,7 +814,6 @@ WARN_LOGFILE = # Note: If this tag is empty the current directory is searched. INPUT = ${DLAF_SOURCE_DIR}/doc/mainpage.dox INPUT += ${DLAF_SOURCE_DIR}/include -INPUT += ${DLAF_SOURCE_DIR}/src # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses @@ -2149,6 +2148,7 @@ PREDEFINED = DLAF_DOXYGEN PREDEFINED += DLAF_ASSERT_ENABLE PREDEFINED += DLAF_ASSERT_MODERATE_ENABLE PREDEFINED += DLAF_ASSERT_HEAVY_ENABLE +PREDEFINED += DLAF_WITH_GPU # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this # tag can be used to specify a list of macro names that should be expanded. The diff --git a/include/dlaf/auxiliary.h b/include/dlaf/auxiliary.h index b711c7a99c..bec74a7f29 100644 --- a/include/dlaf/auxiliary.h +++ b/include/dlaf/auxiliary.h @@ -9,4 +9,7 @@ // #pragma once +/// @file +/// Includes all auxiliary algorithms such as norms. + #include "dlaf/auxiliary/norm.h" diff --git a/include/dlaf/auxiliary/norm.h b/include/dlaf/auxiliary/norm.h index 91f6848f8d..0d0e96baae 100644 --- a/include/dlaf/auxiliary/norm.h +++ b/include/dlaf/auxiliary/norm.h @@ -9,6 +9,8 @@ // #pragma once +/// @file + #include "dlaf/auxiliary/norm/api.h" #include "dlaf/blas/enum_output.h" #include "dlaf/common/assert.h" diff --git a/include/dlaf/auxiliary/norm/api.h b/include/dlaf/auxiliary/norm/api.h index 4f020fc068..21393c81f0 100644 --- a/include/dlaf/auxiliary/norm/api.h +++ b/include/dlaf/auxiliary/norm/api.h @@ -27,7 +27,7 @@ struct Norm { Matrix& matrix); }; -/// ---- ETI +// ETI #define DLAF_NORM_ETI(KWORD, DATATYPE) KWORD template struct Norm; DLAF_NORM_ETI(extern, float) diff --git a/include/dlaf/blas/enum_output.h b/include/dlaf/blas/enum_output.h index 63820d8679..7adc873019 100644 --- a/include/dlaf/blas/enum_output.h +++ b/include/dlaf/blas/enum_output.h @@ -10,6 +10,8 @@ #pragma once +/// @file + #include #include diff --git a/include/dlaf/blas/scal.h b/include/dlaf/blas/scal.h index da0ddbbfc3..028480482a 100644 --- a/include/dlaf/blas/scal.h +++ b/include/dlaf/blas/scal.h @@ -13,13 +13,12 @@ #include /// @file - -namespace blas { - -/// Provides overloads for mixed real complex variants missing in blaspp. +/// Provides overloads for mixed real complex variants of scal missing in blaspp. /// - csscal /// - zdscal +namespace blas { + void scal(std::int64_t n, float a, std::complex* x, std::int64_t incx) noexcept; void scal(std::int64_t n, double a, std::complex* x, std::int64_t incx) noexcept; diff --git a/include/dlaf/blas/tile.h b/include/dlaf/blas/tile.h index 0f36218da8..1d5dafce43 100644 --- a/include/dlaf/blas/tile.h +++ b/include/dlaf/blas/tile.h @@ -9,6 +9,9 @@ // #pragma once +/// @file +/// Provides `Tile` wrappers for BLAS operations. + #include #include @@ -169,16 +172,12 @@ template void gemm(const blas::Op op_a, const blas::Op op_b, const T alpha, const Tile& a, const Tile& b, const T beta, const Tile& c); -/// \overload gemm -/// /// This overload takes a policy argument and a sender which must send all required arguments for the /// algorithm. Returns a sender which signals a connected receiver when the algorithm is done. template >> auto gemm(const dlaf::internal::Policy& p, Sender&& s); -/// \overload gemm -/// /// This overload partially applies the algorithm with a policy for later use with operator| with a /// sender on the left-hand side. template @@ -191,16 +190,12 @@ template void hemm(const blas::Side side, const blas::Uplo uplo, const T alpha, const Tile& a, const Tile& b, const T beta, const Tile& c); -/// \overload hemm -/// /// This overload takes a policy argument and a sender which must send all required arguments for the /// algorithm. Returns a sender which signals a connected receiver when the algorithm is done. template >> auto hemm(const dlaf::internal::Policy& p, Sender&& s); -/// \overload hemm -/// /// This overload partially applies the algorithm with a policy for later use with operator| with a /// sender on the left-hand side. template @@ -213,16 +208,12 @@ template void her2k(const blas::Uplo uplo, const blas::Op op, const T alpha, const Tile& a, const Tile& b, const BaseType beta, const Tile& c); -/// \overload her2k -/// /// This overload takes a policy argument and a sender which must send all required arguments for the /// algorithm. Returns a sender which signals a connected receiver when the algorithm is done. template >> auto her2k(const dlaf::internal::Policy& p, Sender&& s); -/// \overload her2k -/// /// This overload partially applies the algorithm with a policy for later use with operator| with a /// sender on the left-hand side. template @@ -235,16 +226,12 @@ template void herk(const blas::Uplo uplo, const blas::Op op, const BaseType alpha, const Tile& a, const BaseType beta, const Tile& c); -/// \overload herk -/// /// This overload takes a policy argument and a sender which must send all required arguments for the /// algorithm. Returns a sender which signals a connected receiver when the algorithm is done. template >> auto herk(const dlaf::internal::Policy& p, Sender&& s); -/// \overload herk -/// /// This overload partially applies the algorithm with a policy for later use with operator| with a /// sender on the left-hand side. template @@ -258,16 +245,12 @@ void trmm(const dlaf::internal::Policy& policy, const blas::Side side, const const blas::Op op, const blas::Diag diag, const T alpha, const Tile& a, const Tile& b); -/// \overload trmm -/// /// This overload takes a policy argument and a sender which must send all required arguments for the /// algorithm. Returns a sender which signals a connected receiver when the algorithm is done. template >> auto trmm(const dlaf::internal::Policy& p, Sender&& s); -/// \overload trmm -/// /// This overload partially applies the algorithm with a policy for later use with operator| with a /// sender on the left-hand side. template @@ -282,16 +265,12 @@ void trmm3(const dlaf::internal::Policy& policy, const blas::Side side, const const blas::Op op, const blas::Diag diag, const T alpha, const Tile& a, const Tile& b, const Tile& c); -/// \overload trmm3 -/// /// This overload takes a policy argument and a sender which must send all required arguments for the /// algorithm. Returns a sender which signals a connected receiver when the algorithm is done. template >> auto trmm3(const dlaf::internal::Policy& p, Sender&& s); -/// \overload trmm3 -/// /// This overload partially applies the algorithm with a policy for later use with operator| with a /// sender on the left-hand side. template @@ -305,16 +284,12 @@ void trsm(const dlaf::internal::Policy& policy, const blas::Side side, const const blas::Op op, const blas::Diag diag, const T alpha, const Tile& a, const Tile& b); -/// \overload trsm -/// /// This overload takes a policy argument and a sender which must send all required arguments for the /// algorithm. Returns a sender which signals a connected receiver when the algorithm is done. template >> auto trsm(const dlaf::internal::Policy& p, Sender&& s); -/// \overload trsm -/// /// This overload partially applies the algorithm with a policy for later use with operator| with a /// sender on the left-hand side. template diff --git a/include/dlaf/blas/tile_extensions.h b/include/dlaf/blas/tile_extensions.h index d5d92caaf7..ac8666b608 100644 --- a/include/dlaf/blas/tile_extensions.h +++ b/include/dlaf/blas/tile_extensions.h @@ -9,6 +9,9 @@ // #pragma once +/// @file +/// Provides `Tile` wrappers for extra basic linear algebra operations not covered by BLAS. + #include #include "dlaf/blas/tile.h" @@ -39,16 +42,12 @@ using matrix::Tile; template void add(T alpha, const matrix::Tile& tile_b, const matrix::Tile& tile_a); -/// \overload add -/// /// This overload takes a policy argument and a sender which must send all required arguments for the /// algorithm. Returns a sender which signals a connected receiver when the algorithm is done. template >> auto add(const dlaf::internal::Policy& p, Sender&& s); -/// \overload add -/// /// This overload partially applies the algorithm with a policy for later use with operator| with a /// sender on the left-hand side. template diff --git a/include/dlaf/common/assert.h b/include/dlaf/common/assert.h index 301db28b5d..bf25b5e678 100644 --- a/include/dlaf/common/assert.h +++ b/include/dlaf/common/assert.h @@ -10,8 +10,6 @@ #pragma once -/// @file - #include #include #include @@ -130,7 +128,7 @@ void silenceUnusedWarningFor(Args&&...) {} #define DLAF_STATIC_FAIL(DummyType, Msg) static_assert(sizeof(DummyType) == 0, Msg) -/// Returns a fake object of type T (specified by __VA_ARGS__) dereferencing a null pointer. +/// Returns a fake object of type T (specified by @p __VA_ARGS__ ) dereferencing a null pointer. /// /// At runtime the program is exited with an error before dereferencing the null pointer. /// This macro is useful when a return statement is needed in unreachable branches, diff --git a/include/dlaf/common/callable_object.h b/include/dlaf/common/callable_object.h index aa8cb8a238..22b310b0c7 100644 --- a/include/dlaf/common/callable_object.h +++ b/include/dlaf/common/callable_object.h @@ -10,6 +10,8 @@ #pragma once +/// @file + /// Given a function name @fname generates a constexpr object with name fname_o /// with type fname_t. The generated type has one static operator() which /// transparently forwards all arguments to a call to fname. This macro is diff --git a/include/dlaf/common/data.h b/include/dlaf/common/data.h index e2dc27b387..08c6ea6091 100644 --- a/include/dlaf/common/data.h +++ b/include/dlaf/common/data.h @@ -23,7 +23,15 @@ namespace dlaf { namespace common { +#ifdef DLAF_DOXYGEN + /// Traits for verifying if the given type is an implementation of the Data concept. +/// +/// Derive from @p std::true_type or @p std::false_type. +template +struct is_data {}; + +#else template struct is_data : std::false_type {}; @@ -37,6 +45,7 @@ struct is_data< std::is_same_v())), SizeType> && std::is_same_v())), bool>>> : std::true_type {}; +#endif template inline constexpr bool is_data_v = is_data::value; diff --git a/include/dlaf/common/data_descriptor.h b/include/dlaf/common/data_descriptor.h index 28a5c1716b..bea6a53b8d 100644 --- a/include/dlaf/common/data_descriptor.h +++ b/include/dlaf/common/data_descriptor.h @@ -138,7 +138,7 @@ struct DataDescriptor { SizeType stride_; }; -/// Helper class for creatig a DataDescriptor from a bounded C-array. +/// Helper class for creating a DataDescriptor from a bounded C-array. template struct DataDescriptor : DataDescriptor { /// Create a Data from a given bounded C-array. diff --git a/include/dlaf/common/index2d.h b/include/dlaf/common/index2d.h index 28e2238a44..39df3017ea 100644 --- a/include/dlaf/common/index2d.h +++ b/include/dlaf/common/index2d.h @@ -26,7 +26,7 @@ namespace dlaf { enum class Coord { Row, Col }; -// Given a Coord, returns its orthogonal +/// Given a Coord, returns its orthogonal constexpr Coord orthogonal(const Coord rc) { return rc == Coord::Row ? Coord::Col : Coord::Row; } diff --git a/include/dlaf/common/pipeline.h b/include/dlaf/common/pipeline.h index 2b8aa8f300..ccaa12ef2c 100644 --- a/include/dlaf/common/pipeline.h +++ b/include/dlaf/common/pipeline.h @@ -10,6 +10,8 @@ #pragma once +/// @file + #include #include #include diff --git a/include/dlaf/common/round_robin.h b/include/dlaf/common/round_robin.h index 309742153b..c4ed913aa3 100644 --- a/include/dlaf/common/round_robin.h +++ b/include/dlaf/common/round_robin.h @@ -9,6 +9,8 @@ // #pragma once +/// @file + #include namespace dlaf { diff --git a/include/dlaf/common/single_threaded_blas.h b/include/dlaf/common/single_threaded_blas.h index 452f14b0a0..1d925d5b2d 100644 --- a/include/dlaf/common/single_threaded_blas.h +++ b/include/dlaf/common/single_threaded_blas.h @@ -10,6 +10,8 @@ #pragma once +/// @file + #ifdef DLAF_ASSERT_MODERATE_ENABLE #include #endif diff --git a/include/dlaf/common/timer.h b/include/dlaf/common/timer.h index 693574a259..d38aeaa6fb 100644 --- a/include/dlaf/common/timer.h +++ b/include/dlaf/common/timer.h @@ -10,6 +10,8 @@ #pragma once +/// @file + #include namespace dlaf { diff --git a/include/dlaf/common/unwrap.h b/include/dlaf/common/unwrap.h index 8992660288..f427491212 100644 --- a/include/dlaf/common/unwrap.h +++ b/include/dlaf/common/unwrap.h @@ -9,6 +9,8 @@ // #pragma once +/// @file + #include #include diff --git a/include/dlaf/common/vector.h b/include/dlaf/common/vector.h index b0bad77175..7174148179 100644 --- a/include/dlaf/common/vector.h +++ b/include/dlaf/common/vector.h @@ -10,6 +10,8 @@ #pragma once +/// @file + #include #include "dlaf/types.h" diff --git a/include/dlaf/communication/communicator.h b/include/dlaf/communication/communicator.h index 948c70e877..2a688630ab 100644 --- a/include/dlaf/communication/communicator.h +++ b/include/dlaf/communication/communicator.h @@ -10,6 +10,8 @@ #pragma once +/// @file + #include #include "dlaf/communication/error.h" diff --git a/include/dlaf/communication/datatypes.h b/include/dlaf/communication/datatypes.h index c5193b1f03..360ec0b851 100644 --- a/include/dlaf/communication/datatypes.h +++ b/include/dlaf/communication/datatypes.h @@ -10,6 +10,8 @@ #pragma once +/// @file + #include "dlaf/communication/error.h" #include diff --git a/include/dlaf/communication/functions_sync.h b/include/dlaf/communication/functions_sync.h index ff0bebdc6e..0881962588 100644 --- a/include/dlaf/communication/functions_sync.h +++ b/include/dlaf/communication/functions_sync.h @@ -10,6 +10,8 @@ #pragma once +/// @file + #include "dlaf/communication/sync/all_reduce.h" #include "dlaf/communication/sync/basic.h" #include "dlaf/communication/sync/broadcast.h" diff --git a/include/dlaf/communication/init.h b/include/dlaf/communication/init.h index e72881fb36..772bc2d56c 100644 --- a/include/dlaf/communication/init.h +++ b/include/dlaf/communication/init.h @@ -9,6 +9,8 @@ // #pragma once +/// @file + #include #include diff --git a/include/dlaf/communication/kernels.h b/include/dlaf/communication/kernels.h index f39cfc28d5..062a2f13a1 100644 --- a/include/dlaf/communication/kernels.h +++ b/include/dlaf/communication/kernels.h @@ -10,6 +10,8 @@ #pragma once +/// @file + #include "dlaf/communication/kernels/all_reduce.h" #include "dlaf/communication/kernels/broadcast.h" #include "dlaf/communication/kernels/p2p.h" diff --git a/include/dlaf/communication/type_handler.h b/include/dlaf/communication/type_handler.h index d4e8f3e6d7..81853c46e0 100644 --- a/include/dlaf/communication/type_handler.h +++ b/include/dlaf/communication/type_handler.h @@ -10,6 +10,8 @@ #pragma once +/// @file + #include #include "dlaf/communication/datatypes.h" diff --git a/include/dlaf/eigensolver.h b/include/dlaf/eigensolver.h index 6cffb134ed..48cec0e6fd 100644 --- a/include/dlaf/eigensolver.h +++ b/include/dlaf/eigensolver.h @@ -9,6 +9,8 @@ // #pragma once +/// @file + #include "dlaf/eigensolver/band_to_tridiag.h" #include "dlaf/eigensolver/bt_band_to_tridiag.h" #include "dlaf/eigensolver/bt_reduction_to_band.h" diff --git a/include/dlaf/eigensolver/band_to_tridiag.h b/include/dlaf/eigensolver/band_to_tridiag.h index a31d03fbc0..b64e2106c0 100644 --- a/include/dlaf/eigensolver/band_to_tridiag.h +++ b/include/dlaf/eigensolver/band_to_tridiag.h @@ -9,6 +9,8 @@ // #pragma once +/// @file + #include #include "dlaf/common/assert.h" #include "dlaf/communication/communicator_grid.h" diff --git a/include/dlaf/eigensolver/band_to_tridiag/api.h b/include/dlaf/eigensolver/band_to_tridiag/api.h index f5964fde1d..3450f01977 100644 --- a/include/dlaf/eigensolver/band_to_tridiag/api.h +++ b/include/dlaf/eigensolver/band_to_tridiag/api.h @@ -45,7 +45,7 @@ struct BandToTridiag { Matrix& mat_a) noexcept; }; -/// ---- ETI +// ETI #define DLAF_EIGENSOLVER_B2T_ETI(KWORD, BACKEND, DEVICE, DATATYPE) \ KWORD template struct BandToTridiag; #define DLAF_EIGENSOLVER_B2T_DISTR_ETI(KWORD, BACKEND, DEVICE, DATATYPE) \ diff --git a/include/dlaf/eigensolver/band_to_tridiag/mc.h b/include/dlaf/eigensolver/band_to_tridiag/mc.h index 4a5a23221b..ee0a0c61e3 100644 --- a/include/dlaf/eigensolver/band_to_tridiag/mc.h +++ b/include/dlaf/eigensolver/band_to_tridiag/mc.h @@ -28,7 +28,7 @@ #include "dlaf/communication/communicator_grid.h" #include "dlaf/communication/kernels.h" #include "dlaf/eigensolver/band_to_tridiag/api.h" -#include "dlaf/eigensolver/get_1d_block_size.h" +#include "dlaf/eigensolver/internal/get_1d_block_size.h" #include "dlaf/lapack/gpu/lacpy.h" #include "dlaf/lapack/gpu/laset.h" #include "dlaf/lapack/tile.h" diff --git a/include/dlaf/eigensolver/bt_band_to_tridiag.h b/include/dlaf/eigensolver/bt_band_to_tridiag.h index 0f48cafe48..bf6cf3780e 100644 --- a/include/dlaf/eigensolver/bt_band_to_tridiag.h +++ b/include/dlaf/eigensolver/bt_band_to_tridiag.h @@ -10,6 +10,8 @@ #pragma once +/// @file + #include "dlaf/eigensolver/bt_band_to_tridiag/api.h" #include "dlaf/matrix/matrix.h" #include "dlaf/types.h" diff --git a/include/dlaf/eigensolver/bt_band_to_tridiag/api.h b/include/dlaf/eigensolver/bt_band_to_tridiag/api.h index 31a8dcab62..05bf10fad7 100644 --- a/include/dlaf/eigensolver/bt_band_to_tridiag/api.h +++ b/include/dlaf/eigensolver/bt_band_to_tridiag/api.h @@ -23,7 +23,7 @@ struct BackTransformationT2B { Matrix& mat_hh); }; -/// ---- ETI +// ETI #define DLAF_EIGENSOLVER_BT_BAND_TO_TRIDIAGONAL_ETI(KWORD, BACKEND, DEVICE, T) \ KWORD template struct BackTransformationT2B; diff --git a/include/dlaf/eigensolver/bt_reduction_to_band.h b/include/dlaf/eigensolver/bt_reduction_to_band.h index 28d82ab31b..7740754629 100644 --- a/include/dlaf/eigensolver/bt_reduction_to_band.h +++ b/include/dlaf/eigensolver/bt_reduction_to_band.h @@ -9,6 +9,8 @@ // #pragma once +/// @file + #include #include "dlaf/communication/communicator_grid.h" diff --git a/include/dlaf/eigensolver/bt_reduction_to_band/api.h b/include/dlaf/eigensolver/bt_reduction_to_band/api.h index d82c33e2b3..1101bfea91 100644 --- a/include/dlaf/eigensolver/bt_reduction_to_band/api.h +++ b/include/dlaf/eigensolver/bt_reduction_to_band/api.h @@ -25,7 +25,7 @@ struct BackTransformationReductionToBand { common::internal::vector>> taus); }; -/// ---- ETI +// ETI #define DLAF_EIGENSOLVER_BT_REDUCTION_TO_BAND_ETI(KWORD, BACKEND, DEVICE, DATATYPE) \ KWORD template struct BackTransformationReductionToBand; diff --git a/include/dlaf/eigensolver/eigensolver.h b/include/dlaf/eigensolver/eigensolver.h index e86868333d..5446fb7ae5 100644 --- a/include/dlaf/eigensolver/eigensolver.h +++ b/include/dlaf/eigensolver/eigensolver.h @@ -9,6 +9,8 @@ // #pragma once +/// @file + #include #include "dlaf/communication/communicator_grid.h" diff --git a/include/dlaf/eigensolver/eigensolver/api.h b/include/dlaf/eigensolver/eigensolver/api.h index 9fb039cf3e..28effdf1c6 100644 --- a/include/dlaf/eigensolver/eigensolver/api.h +++ b/include/dlaf/eigensolver/eigensolver/api.h @@ -34,7 +34,7 @@ struct Eigensolver { Matrix, D>& evals, Matrix& mat_e); }; -/// ---- ETI +// ETI #define DLAF_EIGENSOLVER_ETI(KWORD, BACKEND, DEVICE, DATATYPE) \ KWORD template struct Eigensolver; diff --git a/include/dlaf/eigensolver/eigensolver/impl.h b/include/dlaf/eigensolver/eigensolver/impl.h index 91afb7da92..65e180e9bb 100644 --- a/include/dlaf/eigensolver/eigensolver/impl.h +++ b/include/dlaf/eigensolver/eigensolver/impl.h @@ -20,7 +20,7 @@ #include "dlaf/eigensolver/band_to_tridiag.h" #include "dlaf/eigensolver/bt_band_to_tridiag.h" #include "dlaf/eigensolver/bt_reduction_to_band.h" -#include "dlaf/eigensolver/get_band_size.h" +#include "dlaf/eigensolver/internal/get_band_size.h" #include "dlaf/eigensolver/reduction_to_band.h" #include "dlaf/eigensolver/tridiag_solver.h" #include "dlaf/lapack/tile.h" diff --git a/include/dlaf/eigensolver/gen_eigensolver.h b/include/dlaf/eigensolver/gen_eigensolver.h index b9d7480b6a..94087ec8b5 100644 --- a/include/dlaf/eigensolver/gen_eigensolver.h +++ b/include/dlaf/eigensolver/gen_eigensolver.h @@ -9,6 +9,8 @@ // #pragma once +/// @file + #include #include "dlaf/eigensolver/gen_eigensolver/api.h" #include "dlaf/matrix/matrix.h" @@ -25,8 +27,8 @@ namespace dlaf::eigensolver { /// - the lower triangle or the upper triangle (depending on @p uplo) of @p mat_a, /// including the diagonal, is destroyed. /// - @p mat_b contains the Cholesky decomposition of B -/// - @eigenvalues contains all the eigenvalues lambda -/// - @eigenvectors contains all the eigenvectors x +/// - @p eigenvalues contains all the eigenvalues lambda +/// - @p eigenvectors contains all the eigenvectors x /// /// Implementation on local memory. /// @@ -104,8 +106,8 @@ EigensolverResult genEigensolver(blas::Uplo uplo, Matrix& mat_a, Mat /// - the lower triangle or the upper triangle (depending on @p uplo) of @p mat_a, /// including the diagonal, is destroyed. /// - @p mat_b contains the Cholesky decomposition of B -/// - @eigenvalues contains all the eigenvalues lambda -/// - @eigenvectors contains all the eigenvectors x +/// - @p eigenvalues contains all the eigenvalues lambda +/// - @p eigenvectors contains all the eigenvectors x /// /// Implementation on distributed memory. /// diff --git a/include/dlaf/eigensolver/gen_eigensolver/api.h b/include/dlaf/eigensolver/gen_eigensolver/api.h index c44c55c1d6..34f9b485be 100644 --- a/include/dlaf/eigensolver/gen_eigensolver/api.h +++ b/include/dlaf/eigensolver/gen_eigensolver/api.h @@ -25,7 +25,7 @@ struct GenEigensolver { Matrix& eigenvectors); }; -/// ---- ETI +// ETI #define DLAF_EIGENSOLVER_GEN_ETI(KWORD, BACKEND, DEVICE, DATATYPE) \ KWORD template struct GenEigensolver; diff --git a/include/dlaf/eigensolver/gen_to_std.h b/include/dlaf/eigensolver/gen_to_std.h index 2939dbde59..09e8b2c54a 100644 --- a/include/dlaf/eigensolver/gen_to_std.h +++ b/include/dlaf/eigensolver/gen_to_std.h @@ -9,6 +9,8 @@ // #pragma once +/// @file + #include #include "dlaf/communication/communicator_grid.h" #include "dlaf/eigensolver/gen_to_std/api.h" diff --git a/include/dlaf/eigensolver/gen_to_std/api.h b/include/dlaf/eigensolver/gen_to_std/api.h index 49257060dd..1d766bd851 100644 --- a/include/dlaf/eigensolver/gen_to_std/api.h +++ b/include/dlaf/eigensolver/gen_to_std/api.h @@ -24,7 +24,7 @@ struct GenToStd { static void call_U(comm::CommunicatorGrid grid, Matrix& mat_a, Matrix& mat_u); }; -/// ---- ETI +// ETI #define DLAF_GENTOSTD_ETI(KWORD, BACKEND, DEVICE, DATATYPE) \ KWORD template struct GenToStd; diff --git a/include/dlaf/eigensolver/get_1d_block_size.h b/include/dlaf/eigensolver/internal/get_1d_block_size.h similarity index 100% rename from include/dlaf/eigensolver/get_1d_block_size.h rename to include/dlaf/eigensolver/internal/get_1d_block_size.h diff --git a/include/dlaf/eigensolver/get_band_size.h b/include/dlaf/eigensolver/internal/get_band_size.h similarity index 100% rename from include/dlaf/eigensolver/get_band_size.h rename to include/dlaf/eigensolver/internal/get_band_size.h diff --git a/include/dlaf/eigensolver/get_red2band_panel_nworkers.h b/include/dlaf/eigensolver/internal/get_red2band_panel_nworkers.h similarity index 100% rename from include/dlaf/eigensolver/get_red2band_panel_nworkers.h rename to include/dlaf/eigensolver/internal/get_red2band_panel_nworkers.h diff --git a/include/dlaf/eigensolver/get_tridiag_rank1_nworkers.h b/include/dlaf/eigensolver/internal/get_tridiag_rank1_nworkers.h similarity index 100% rename from include/dlaf/eigensolver/get_tridiag_rank1_nworkers.h rename to include/dlaf/eigensolver/internal/get_tridiag_rank1_nworkers.h diff --git a/include/dlaf/eigensolver/reduction_to_band.h b/include/dlaf/eigensolver/reduction_to_band.h index 24c34373ef..e32582e517 100644 --- a/include/dlaf/eigensolver/reduction_to_band.h +++ b/include/dlaf/eigensolver/reduction_to_band.h @@ -10,6 +10,8 @@ #pragma once +/// @file + #include "dlaf/common/vector.h" #include "dlaf/communication/communicator_grid.h" #include "dlaf/matrix/matrix.h" @@ -107,12 +109,15 @@ common::internal::vector>> reduc /// /// In particular, @p mat_a will look like this (tile representation) if band_size == blocksize /// -/// B ~ ~ ~ ~ ~ -/// * B ~ ~ ~ ~ -/// v * B ~ ~ ~ -/// v v * B ~ ~ -/// v v v * B ~ -/// v v v v * B +/** @verbatim +B ~ ~ ~ ~ ~ +* B ~ ~ ~ ~ +v * B ~ ~ ~ +v v * B ~ ~ +v v v * B ~ +v v v v * B +@endverbatim +*/ /// /// where each column of `v` is an elementary reflector without its first element (which is always equal /// to 1), `B` are the tiles containg the band-diagonal form, while `*` tiles contain both elements @@ -120,14 +125,15 @@ common::internal::vector>> reduc /// diagonal excluded). /// /// In case band_size < blocksize: -/// -/// * ~ ~ ~ ~ ~ -/// * * ~ ~ ~ ~ -/// v * * ~ ~ ~ -/// v v * * ~ ~ -/// v v v * * ~ -/// v v v v * * -/// +/** @verbatim +* ~ ~ ~ ~ ~ +* * ~ ~ ~ ~ +v * * ~ ~ ~ +v v * * ~ ~ +v v v * * ~ +v v v v * * +@endverbatim +*/ /// @param grid is the CommunicatorGrid on which @p mat_a is distributed /// @param mat_a on entry it contains an Hermitian matrix, on exit it is overwritten with the /// band-diagonal result together with the elementary reflectors as described above. Just the tiles of diff --git a/include/dlaf/eigensolver/reduction_to_band/api.h b/include/dlaf/eigensolver/reduction_to_band/api.h index 5e53dcf564..0b6ea8588c 100644 --- a/include/dlaf/eigensolver/reduction_to_band/api.h +++ b/include/dlaf/eigensolver/reduction_to_band/api.h @@ -26,7 +26,7 @@ struct ReductionToBand { comm::CommunicatorGrid grid, Matrix& mat_a, const SizeType band_size); }; -/// ---- ETI +// ETI #define DLAF_EIGENSOLVER_REDUCTION_TO_BAND_ETI(KWORD, BACKEND, DEVICE, DATATYPE) \ KWORD template struct ReductionToBand; diff --git a/include/dlaf/eigensolver/reduction_to_band/impl.h b/include/dlaf/eigensolver/reduction_to_band/impl.h index 805c770fc4..f2ec116445 100644 --- a/include/dlaf/eigensolver/reduction_to_band/impl.h +++ b/include/dlaf/eigensolver/reduction_to_band/impl.h @@ -32,7 +32,7 @@ #include "dlaf/communication/kernels/all_reduce.h" #include "dlaf/communication/kernels/reduce.h" #include "dlaf/communication/rdma.h" -#include "dlaf/eigensolver/get_red2band_panel_nworkers.h" +#include "dlaf/eigensolver/internal/get_red2band_panel_nworkers.h" #include "dlaf/lapack/tile.h" #include "dlaf/matrix/copy_tile.h" #include "dlaf/matrix/distribution.h" diff --git a/include/dlaf/eigensolver/tridiag_solver.h b/include/dlaf/eigensolver/tridiag_solver.h index c812869cb9..a6b39d9af7 100644 --- a/include/dlaf/eigensolver/tridiag_solver.h +++ b/include/dlaf/eigensolver/tridiag_solver.h @@ -9,6 +9,8 @@ // #pragma once +/// @file + #include "dlaf/common/assert.h" #include "dlaf/communication/communicator_grid.h" #include "dlaf/eigensolver/tridiag_solver/api.h" diff --git a/include/dlaf/eigensolver/tridiag_solver/api.h b/include/dlaf/eigensolver/tridiag_solver/api.h index b122b1767c..3066dc8c4b 100644 --- a/include/dlaf/eigensolver/tridiag_solver/api.h +++ b/include/dlaf/eigensolver/tridiag_solver/api.h @@ -25,7 +25,7 @@ struct TridiagSolver { Matrix& evals, Matrix, device>& evecs); }; -/// ---- ETI +// ETI #define DLAF_TRIDIAGONAL_EIGENSOLVER_ETI(KWORD, BACKEND, DEVICE, DATATYPE) \ KWORD template struct TridiagSolver; diff --git a/include/dlaf/eigensolver/tridiag_solver/merge.h b/include/dlaf/eigensolver/tridiag_solver/merge.h index bf4d5169bf..0e19d76985 100644 --- a/include/dlaf/eigensolver/tridiag_solver/merge.h +++ b/include/dlaf/eigensolver/tridiag_solver/merge.h @@ -17,7 +17,7 @@ #include "dlaf/common/range2d.h" #include "dlaf/common/single_threaded_blas.h" #include "dlaf/communication/kernels.h" -#include "dlaf/eigensolver/get_tridiag_rank1_nworkers.h" +#include "dlaf/eigensolver/internal/get_tridiag_rank1_nworkers.h" #include "dlaf/eigensolver/tridiag_solver/coltype.h" #include "dlaf/eigensolver/tridiag_solver/index_manipulation.h" #include "dlaf/eigensolver/tridiag_solver/kernels.h" diff --git a/include/dlaf/factorization.h b/include/dlaf/factorization.h index 9c71c30daa..ae53af7ced 100644 --- a/include/dlaf/factorization.h +++ b/include/dlaf/factorization.h @@ -9,5 +9,7 @@ // #pragma once +/// @file + #include "dlaf/factorization/cholesky.h" #include "dlaf/factorization/qr.h" diff --git a/include/dlaf/factorization/cholesky.h b/include/dlaf/factorization/cholesky.h index 79157b62a5..fa6f397b57 100644 --- a/include/dlaf/factorization/cholesky.h +++ b/include/dlaf/factorization/cholesky.h @@ -9,6 +9,8 @@ // #pragma once +/// @file + #include #include "dlaf/communication/communicator_grid.h" #include "dlaf/factorization/cholesky/api.h" diff --git a/include/dlaf/factorization/cholesky/api.h b/include/dlaf/factorization/cholesky/api.h index 8916051833..3ea92edcac 100644 --- a/include/dlaf/factorization/cholesky/api.h +++ b/include/dlaf/factorization/cholesky/api.h @@ -23,7 +23,7 @@ struct Cholesky { static void call_U(comm::CommunicatorGrid grid, Matrix& mat_a); }; -/// ---- ETI +// ETI #define DLAF_FACTORIZATION_CHOLESKY_ETI(KWORD, BACKEND, DEVICE, DATATYPE) \ KWORD template struct Cholesky; diff --git a/include/dlaf/factorization/qr.h b/include/dlaf/factorization/qr.h index 0f0e0a9b6a..e4d17e19ec 100644 --- a/include/dlaf/factorization/qr.h +++ b/include/dlaf/factorization/qr.h @@ -9,6 +9,8 @@ // #pragma once +/// @file + #include "dlaf/factorization/qr/api.h" #include "dlaf/matrix/index.h" #include "dlaf/matrix/tile.h" diff --git a/include/dlaf/factorization/qr/api.h b/include/dlaf/factorization/qr/api.h index 46ea0a7e82..d089f1ff3e 100644 --- a/include/dlaf/factorization/qr/api.h +++ b/include/dlaf/factorization/qr/api.h @@ -85,7 +85,7 @@ struct QR_Tfactor { common::Pipeline& mpi_col_task_chain); }; -/// ---- ETI +// ETI #define DLAF_FACTORIZATION_QR_TFACTOR_ETI(KWORD, BACKEND, DEVICE, DATATYPE) \ KWORD template struct QR_Tfactor; diff --git a/include/dlaf/gpu/blas/api.h b/include/dlaf/gpu/blas/api.h index 95ba808692..b0a09b2d5d 100644 --- a/include/dlaf/gpu/blas/api.h +++ b/include/dlaf/gpu/blas/api.h @@ -10,8 +10,6 @@ #pragma once -/// @file - #ifdef DLAF_WITH_HIP #include diff --git a/include/dlaf/gpu/blas/error.h b/include/dlaf/gpu/blas/error.h index b087ae64dc..891d63a667 100644 --- a/include/dlaf/gpu/blas/error.h +++ b/include/dlaf/gpu/blas/error.h @@ -10,8 +10,6 @@ #pragma once -/// @file - #include #include diff --git a/include/dlaf/gpu/cublas/error.h b/include/dlaf/gpu/cublas/error.h index 17caee86d8..4b78a173ac 100644 --- a/include/dlaf/gpu/cublas/error.h +++ b/include/dlaf/gpu/cublas/error.h @@ -10,8 +10,6 @@ #pragma once -/// @file - #include #include diff --git a/include/dlaf/gpu/cusolver/error.h b/include/dlaf/gpu/cusolver/error.h index 1482a38074..ec70d58842 100644 --- a/include/dlaf/gpu/cusolver/error.h +++ b/include/dlaf/gpu/cusolver/error.h @@ -10,8 +10,6 @@ #pragma once -/// @file - #include #include "dlaf/gpu/lapack/api.h" diff --git a/include/dlaf/gpu/lapack/api.h b/include/dlaf/gpu/lapack/api.h index 755c228b37..6d24dfc167 100644 --- a/include/dlaf/gpu/lapack/api.h +++ b/include/dlaf/gpu/lapack/api.h @@ -10,8 +10,6 @@ #pragma once -/// @file - #ifdef DLAF_WITH_HIP #include diff --git a/include/dlaf/gpu/lapack/assert_info.h b/include/dlaf/gpu/lapack/assert_info.h index 560cddac92..af9a3357ed 100644 --- a/include/dlaf/gpu/lapack/assert_info.h +++ b/include/dlaf/gpu/lapack/assert_info.h @@ -14,8 +14,6 @@ #include -/// @file - #define DLAF_DECLARE_CUSOLVER_ASSERT_INFO(func) void assertInfo##func(whip::stream_t stream, int* info) namespace dlaf::gpulapack::internal { diff --git a/include/dlaf/gpu/lapack/error.h b/include/dlaf/gpu/lapack/error.h index 8595cde3a4..9c31a6d160 100644 --- a/include/dlaf/gpu/lapack/error.h +++ b/include/dlaf/gpu/lapack/error.h @@ -10,8 +10,6 @@ #pragma once -/// @file - #include #include diff --git a/include/dlaf/gpu/rocblas/error.h b/include/dlaf/gpu/rocblas/error.h index f92107d990..407d4d5c7a 100644 --- a/include/dlaf/gpu/rocblas/error.h +++ b/include/dlaf/gpu/rocblas/error.h @@ -10,8 +10,6 @@ #pragma once -/// @file - #include #include diff --git a/include/dlaf/gpu/rocsolver/error.h b/include/dlaf/gpu/rocsolver/error.h index eac12ba7ef..8c34a32d2c 100644 --- a/include/dlaf/gpu/rocsolver/error.h +++ b/include/dlaf/gpu/rocsolver/error.h @@ -10,8 +10,6 @@ #pragma once -/// @file - #include #include "dlaf/gpu/lapack/api.h" diff --git a/include/dlaf/init.h b/include/dlaf/init.h index e176be61a9..f3bd09f712 100644 --- a/include/dlaf/init.h +++ b/include/dlaf/init.h @@ -9,6 +9,8 @@ // #pragma once +/// @file + #include #include diff --git a/include/dlaf/matrix/copy.h b/include/dlaf/matrix/copy.h index 24bb0d9951..2a4a5c9724 100644 --- a/include/dlaf/matrix/copy.h +++ b/include/dlaf/matrix/copy.h @@ -10,6 +10,8 @@ #pragma once +/// @file + #include #include "dlaf/common/range2d.h" diff --git a/include/dlaf/matrix/copy_tile.h b/include/dlaf/matrix/copy_tile.h index f910e889e7..adaa61d5e2 100644 --- a/include/dlaf/matrix/copy_tile.h +++ b/include/dlaf/matrix/copy_tile.h @@ -10,6 +10,8 @@ #pragma once +/// @file + #include #if DLAF_WITH_GPU diff --git a/include/dlaf/matrix/distribution.h b/include/dlaf/matrix/distribution.h index 3dfd7bfc75..5baedced53 100644 --- a/include/dlaf/matrix/distribution.h +++ b/include/dlaf/matrix/distribution.h @@ -10,6 +10,8 @@ #pragma once +/// @file + #include "dlaf/communication/communicator_grid.h" #include "dlaf/matrix/index.h" #include "dlaf/matrix/util_distribution.h" diff --git a/include/dlaf/matrix/index.h b/include/dlaf/matrix/index.h index c94a032531..0f9b4a7a43 100644 --- a/include/dlaf/matrix/index.h +++ b/include/dlaf/matrix/index.h @@ -10,6 +10,8 @@ #pragma once +/// @file + #include "dlaf/common/index2d.h" #include "dlaf/types.h" diff --git a/include/dlaf/matrix/layout_info.h b/include/dlaf/matrix/layout_info.h index d9fb95c4ee..0a8d7029da 100644 --- a/include/dlaf/matrix/layout_info.h +++ b/include/dlaf/matrix/layout_info.h @@ -10,6 +10,8 @@ #pragma once +/// @file + #include "dlaf/common/assert.h" #include "dlaf/matrix/distribution.h" #include "dlaf/matrix/index.h" diff --git a/include/dlaf/matrix/matrix.h b/include/dlaf/matrix/matrix.h index 0e48a87022..8704feea74 100644 --- a/include/dlaf/matrix/matrix.h +++ b/include/dlaf/matrix/matrix.h @@ -10,6 +10,8 @@ #pragma once +/// @file + #include #include @@ -375,7 +377,7 @@ auto select(MatrixLike& matrix, common::IterableRange2D return internal::selectGeneric([&](auto index) { return matrix.readwrite(index); }, range); } -/// ---- ETI +// ETI #define DLAF_MATRIX_ETI(KWORD, DATATYPE, DEVICE) \ KWORD template class Matrix; \ diff --git a/include/dlaf/matrix/matrix_base.h b/include/dlaf/matrix/matrix_base.h index 664711b010..2d5638e0e1 100644 --- a/include/dlaf/matrix/matrix_base.h +++ b/include/dlaf/matrix/matrix_base.h @@ -10,6 +10,8 @@ #pragma once +/// @file + #include #include diff --git a/include/dlaf/matrix/matrix_mirror.h b/include/dlaf/matrix/matrix_mirror.h index 65c2963b36..541fbb524c 100644 --- a/include/dlaf/matrix/matrix_mirror.h +++ b/include/dlaf/matrix/matrix_mirror.h @@ -10,6 +10,8 @@ #pragma once +/// @file + #include #include @@ -170,7 +172,7 @@ class MatrixMirror : public MatrixMirror { } }; -/// ---- ETI +// ETI #define DLAF_MATRIX_MIRROR_ETI(KWORD, DATATYPE, TARGETDEVICE, SOURCEDEVICE) \ KWORD template class MatrixMirror; \ diff --git a/include/dlaf/matrix/panel.h b/include/dlaf/matrix/panel.h index ba3f323f49..0af7091eeb 100644 --- a/include/dlaf/matrix/panel.h +++ b/include/dlaf/matrix/panel.h @@ -9,6 +9,8 @@ // #pragma once +/// @file + #include #include "dlaf/common/index2d.h" diff --git a/include/dlaf/matrix/print_csv.h b/include/dlaf/matrix/print_csv.h index dbf6d99f35..532773bb3c 100644 --- a/include/dlaf/matrix/print_csv.h +++ b/include/dlaf/matrix/print_csv.h @@ -9,6 +9,8 @@ // #pragma once +/// @file + #include "dlaf/common/index2d.h" #include "dlaf/common/range2d.h" #include "dlaf/matrix/matrix.h" diff --git a/include/dlaf/matrix/print_gpu.h b/include/dlaf/matrix/print_gpu.h index a9245bbb3b..070074c68c 100644 --- a/include/dlaf/matrix/print_gpu.h +++ b/include/dlaf/matrix/print_gpu.h @@ -11,6 +11,8 @@ #ifdef DLAF_WITH_GPU #pragma once +/// @file + #include #include diff --git a/include/dlaf/matrix/print_numpy.h b/include/dlaf/matrix/print_numpy.h index 0acde1611e..dd72bd49d7 100644 --- a/include/dlaf/matrix/print_numpy.h +++ b/include/dlaf/matrix/print_numpy.h @@ -10,6 +10,8 @@ #pragma once +/// @file + #include #include "dlaf/common/index2d.h" diff --git a/include/dlaf/matrix/retiled_matrix.h b/include/dlaf/matrix/retiled_matrix.h index e550b9bd5f..454b1ecbc3 100644 --- a/include/dlaf/matrix/retiled_matrix.h +++ b/include/dlaf/matrix/retiled_matrix.h @@ -10,6 +10,8 @@ #pragma once +/// @file + #include #include @@ -161,7 +163,7 @@ class RetiledMatrix : public internal::MatrixBase { template class RetiledMatrix; -/// ---- ETI +// ETI #define DLAF_RETILED_MATRIX_ETI(KWORD, DATATYPE, DEVICE) \ KWORD template class RetiledMatrix; diff --git a/include/dlaf/matrix/tile.h b/include/dlaf/matrix/tile.h index ef31e0e2ff..dbba5eb434 100644 --- a/include/dlaf/matrix/tile.h +++ b/include/dlaf/matrix/tile.h @@ -10,6 +10,8 @@ #pragma once +/// @file + #include #include #include @@ -612,7 +614,7 @@ std::vector> splitTileDisjoint(ReadWriteTileSender; \ diff --git a/include/dlaf/matrix/views.h b/include/dlaf/matrix/views.h index fd72f7135c..3101601a0a 100644 --- a/include/dlaf/matrix/views.h +++ b/include/dlaf/matrix/views.h @@ -10,6 +10,8 @@ #pragma once +/// @file + #include #include "dlaf/common/assert.h" #include "dlaf/common/index2d.h" diff --git a/include/dlaf/memory/memory_chunk.h b/include/dlaf/memory/memory_chunk.h index 8b48ee7dde..40905273c4 100644 --- a/include/dlaf/memory/memory_chunk.h +++ b/include/dlaf/memory/memory_chunk.h @@ -10,6 +10,8 @@ #pragma once +/// @file + #include #include diff --git a/include/dlaf/memory/memory_view.h b/include/dlaf/memory/memory_view.h index 756d2af676..89068d1b86 100644 --- a/include/dlaf/memory/memory_view.h +++ b/include/dlaf/memory/memory_view.h @@ -10,6 +10,8 @@ #pragma once +/// @file + #include #include @@ -152,7 +154,7 @@ class MemoryView { SizeType size_; }; -/// ---- ETI +// ETI #define DLAF_MEMVIEW_ETI(KWORD, DATATYPE, DEVICE) KWORD template class MemoryView; diff --git a/include/dlaf/multiplication/general.h b/include/dlaf/multiplication/general.h index 8a0fc58e4b..ccce201482 100644 --- a/include/dlaf/multiplication/general.h +++ b/include/dlaf/multiplication/general.h @@ -10,6 +10,8 @@ #pragma once +/// @file + #include #include "dlaf/common/assert.h" diff --git a/include/dlaf/multiplication/general/api.h b/include/dlaf/multiplication/general/api.h index d1f8372dd5..16b59d7ea5 100644 --- a/include/dlaf/multiplication/general/api.h +++ b/include/dlaf/multiplication/general/api.h @@ -30,7 +30,7 @@ struct GeneralSub { Matrix& mat_b, const T beta, Matrix& mat_c); }; -/// ---- ETI +// ETI #define DLAF_MULTIPLICATION_GENERAL_ETI(KWORD, BACKEND, DEVICE, DATATYPE) \ KWORD template struct GeneralSub; diff --git a/include/dlaf/multiplication/hermitian.h b/include/dlaf/multiplication/hermitian.h index 745f8f918f..1c87e40de5 100644 --- a/include/dlaf/multiplication/hermitian.h +++ b/include/dlaf/multiplication/hermitian.h @@ -9,6 +9,8 @@ // #pragma once +/// @file + #include #include "dlaf/communication/communicator_grid.h" #include "dlaf/matrix/matrix.h" diff --git a/include/dlaf/multiplication/hermitian/api.h b/include/dlaf/multiplication/hermitian/api.h index 7aa900a122..f20583ce4b 100644 --- a/include/dlaf/multiplication/hermitian/api.h +++ b/include/dlaf/multiplication/hermitian/api.h @@ -22,7 +22,7 @@ struct Hermitian { Matrix& mat_b, const T beta, Matrix& mat_c); }; -/// ---- ETI +// ETI #define DLAF_MULTIPLICATION_HERMITIAN_ETI(KWORD, BACKEND, DEVICE, DATATYPE) \ KWORD template struct Hermitian; diff --git a/include/dlaf/multiplication/triangular.h b/include/dlaf/multiplication/triangular.h index c9cfd5577c..de30f334a0 100644 --- a/include/dlaf/multiplication/triangular.h +++ b/include/dlaf/multiplication/triangular.h @@ -9,6 +9,8 @@ // #pragma once +/// @file + #include #include "dlaf/communication/communicator_grid.h" #include "dlaf/matrix/matrix.h" diff --git a/include/dlaf/multiplication/triangular/api.h b/include/dlaf/multiplication/triangular/api.h index 9a46800860..528840ef25 100644 --- a/include/dlaf/multiplication/triangular/api.h +++ b/include/dlaf/multiplication/triangular/api.h @@ -45,7 +45,7 @@ struct Triangular { Matrix& mat_a, Matrix& mat_b); }; -/// ---- ETI +// ETI #define DLAF_MULTIPLICATION_TRIANGULAR_ETI(KWORD, BACKEND, DEVICE, DATATYPE) \ KWORD template struct Triangular; diff --git a/include/dlaf/permutations/general.h b/include/dlaf/permutations/general.h index 9c8cbb4bcf..a27e6b1adb 100644 --- a/include/dlaf/permutations/general.h +++ b/include/dlaf/permutations/general.h @@ -10,6 +10,8 @@ #pragma once +/// @file + #include #include "dlaf/common/assert.h" diff --git a/include/dlaf/permutations/general/api.h b/include/dlaf/permutations/general/api.h index 3a75a29e89..3ac178fb5f 100644 --- a/include/dlaf/permutations/general/api.h +++ b/include/dlaf/permutations/general/api.h @@ -27,7 +27,7 @@ struct Permutations { Matrix& mat_out); }; -/// ---- ETI +// ETI #define DLAF_PERMUTATIONS_GENERAL_ETI(KWORD, BACKEND, DEVICE, DATATYPE) \ KWORD template struct Permutations; \ KWORD template struct Permutations; diff --git a/include/dlaf/schedulers.h b/include/dlaf/schedulers.h index 204c7d2e00..b231940942 100644 --- a/include/dlaf/schedulers.h +++ b/include/dlaf/schedulers.h @@ -9,6 +9,8 @@ // #pragma once +/// @file + #include #include #include diff --git a/include/dlaf/solver.h b/include/dlaf/solver.h index 60cc2a7c37..93cbfd22e1 100644 --- a/include/dlaf/solver.h +++ b/include/dlaf/solver.h @@ -9,4 +9,6 @@ // #pragma once +/// @file + #include "dlaf/solver/triangular.h" diff --git a/include/dlaf/solver/triangular.h b/include/dlaf/solver/triangular.h index 25ccc67a24..60ad73a05f 100644 --- a/include/dlaf/solver/triangular.h +++ b/include/dlaf/solver/triangular.h @@ -9,6 +9,8 @@ // #pragma once +/// @file + #include #include "dlaf/communication/communicator_grid.h" #include "dlaf/matrix/matrix.h" diff --git a/include/dlaf/solver/triangular/api.h b/include/dlaf/solver/triangular/api.h index 10a2ebc690..10e40d36a0 100644 --- a/include/dlaf/solver/triangular/api.h +++ b/include/dlaf/solver/triangular/api.h @@ -53,7 +53,7 @@ struct Triangular { Matrix& mat_a, Matrix& mat_b); }; -/// ---- ETI +// ETI #define DLAF_SOLVER_TRIANGULAR_ETI(KWORD, BACKEND, DEVICE, DATATYPE) \ KWORD template struct Triangular; diff --git a/include/dlaf/tune.h b/include/dlaf/tune.h index 4aa83a509a..54531596c6 100644 --- a/include/dlaf/tune.h +++ b/include/dlaf/tune.h @@ -9,6 +9,8 @@ // #pragma once +/// @file + #include #include diff --git a/include/dlaf/util_blas.h b/include/dlaf/util_blas.h index adac196856..6d1fb3b5d1 100644 --- a/include/dlaf/util_blas.h +++ b/include/dlaf/util_blas.h @@ -10,6 +10,8 @@ #pragma once +/// @file + #include #include diff --git a/include/dlaf/util_lapack.h b/include/dlaf/util_lapack.h index 08b3b443ae..1beea431a3 100644 --- a/include/dlaf/util_lapack.h +++ b/include/dlaf/util_lapack.h @@ -10,6 +10,8 @@ #pragma once +/// @file + #include #include #include "dlaf/common/assert.h" diff --git a/include/dlaf/version.h.in b/include/dlaf/version.h.in index b849812295..04383cab09 100644 --- a/include/dlaf/version.h.in +++ b/include/dlaf/version.h.in @@ -10,6 +10,8 @@ #pragma once +/// @file + #include namespace dlaf { diff --git a/miniapp/miniapp_eigensolver.cpp b/miniapp/miniapp_eigensolver.cpp index 2d5168e34e..43d4238fe9 100644 --- a/miniapp/miniapp_eigensolver.cpp +++ b/miniapp/miniapp_eigensolver.cpp @@ -24,7 +24,7 @@ #include "dlaf/communication/communicator_grid.h" #include "dlaf/communication/init.h" #include "dlaf/eigensolver/eigensolver.h" -#include "dlaf/eigensolver/get_band_size.h" +#include "dlaf/eigensolver/internal/get_band_size.h" #include "dlaf/init.h" #include "dlaf/matrix/copy.h" #include "dlaf/matrix/matrix.h" diff --git a/miniapp/miniapp_gen_eigensolver.cpp b/miniapp/miniapp_gen_eigensolver.cpp index a7c328a246..c83e564925 100644 --- a/miniapp/miniapp_gen_eigensolver.cpp +++ b/miniapp/miniapp_gen_eigensolver.cpp @@ -24,7 +24,7 @@ #include "dlaf/communication/communicator_grid.h" #include "dlaf/communication/init.h" #include "dlaf/eigensolver/gen_eigensolver.h" -#include "dlaf/eigensolver/get_band_size.h" +#include "dlaf/eigensolver/internal/get_band_size.h" #include "dlaf/init.h" #include "dlaf/matrix/copy.h" #include "dlaf/matrix/matrix.h" diff --git a/src/communication/communicator_impl.h b/src/communication/communicator_impl.h index 3e10028a01..5834141861 100644 --- a/src/communication/communicator_impl.h +++ b/src/communication/communicator_impl.h @@ -12,8 +12,6 @@ #include "dlaf/communication/error.h" -/// @file - namespace dlaf { namespace comm {