diff --git a/include/dlaf/auxiliary/norm.h b/include/dlaf/auxiliary/norm.h index 0748fcc7fe..75575426a5 100644 --- a/include/dlaf/auxiliary/norm.h +++ b/include/dlaf/auxiliary/norm.h @@ -37,14 +37,17 @@ namespace dlaf::auxiliary { /// /// @pre `A.blockSize().rows() == A.blockSize().cols()`, /// @pre @p A is distributed according to @p grid, +/// @pre @p A has equal tile and block sizes, /// @return the norm @p norm_type of the Matrix @p A or 0 if `A.size().isEmpty()` (see LAPACK doc for /// additional info). template dlaf::BaseType norm(comm::CommunicatorGrid grid, comm::Index2D rank, lapack::Norm norm_type, blas::Uplo uplo, Matrix& A) { using dlaf::matrix::equal_process_grid; + using dlaf::matrix::retiled; DLAF_ASSERT(equal_process_grid(A, grid), A, grid); + DLAF_ASSERT(!retiled(A), A); // LAPACK documentation specify that if any dimension is 0, the result is 0 if (A.size().isEmpty()) diff --git a/include/dlaf/eigensolver/band_to_tridiag.h b/include/dlaf/eigensolver/band_to_tridiag.h index c0bc65cecf..ea487c4265 100644 --- a/include/dlaf/eigensolver/band_to_tridiag.h +++ b/include/dlaf/eigensolver/band_to_tridiag.h @@ -69,7 +69,8 @@ namespace eigensolver { /// @pre mat_a has a square size, /// @pre mat_a has a square block size, /// @pre band_size is a divisor of mat_a.blockSize().cols(), and band_size >= 2 -/// @pre mat_a is not distributed. +/// @pre mat_a is not distributed, +/// @pre mat_a has equal tile and block sizes. template TridiagResult bandToTridiag(blas::Uplo uplo, SizeType band_size, Matrix& mat_a) { @@ -77,6 +78,7 @@ TridiagResult bandToTridiag(blas::Uplo uplo, SizeType band_size, DLAF_ASSERT(matrix::square_blocksize(mat_a), mat_a); DLAF_ASSERT(mat_a.blockSize().rows() % band_size == 0, mat_a.blockSize().rows(), band_size); DLAF_ASSERT(matrix::local_matrix(mat_a), mat_a); + DLAF_ASSERT(!matrix::retiled(mat_a), mat_a); DLAF_ASSERT(band_size >= 2, band_size); switch (uplo) { @@ -140,13 +142,15 @@ TridiagResult bandToTridiag(blas::Uplo uplo, SizeType band_size, /// @pre mat_a has a square size, /// @pre mat_a has a square block size, /// @pre band_size is a divisor of mat_a.blockSize().cols() and band_size >= 2, -/// @pre mat_a is distributed according to grid. +/// @pre mat_a is distributed according to grid, +/// @pre mat_a has equal tile and block sizes. template TridiagResult bandToTridiag(comm::CommunicatorGrid grid, blas::Uplo uplo, SizeType band_size, Matrix& mat_a) { DLAF_ASSERT(matrix::square_size(mat_a), mat_a); DLAF_ASSERT(matrix::square_blocksize(mat_a), mat_a); DLAF_ASSERT(matrix::equal_process_grid(mat_a, grid), mat_a, grid); + DLAF_ASSERT(!matrix::retiled(mat_a), mat_a); DLAF_ASSERT(band_size >= 2, band_size); // If the grid contains only one rank force local implementation. diff --git a/include/dlaf/eigensolver/bt_band_to_tridiag.h b/include/dlaf/eigensolver/bt_band_to_tridiag.h index 438f1dffe7..e69bd369f6 100644 --- a/include/dlaf/eigensolver/bt_band_to_tridiag.h +++ b/include/dlaf/eigensolver/bt_band_to_tridiag.h @@ -51,6 +51,8 @@ namespace dlaf::eigensolver { // @pre band_size is a divisor of mat_hh.blockSize().cols() // @pre mat_e is not distributed // @pre mat_hh is not distributed +// @pre mat_e has equal tile and block sizes +// @pre mat_hh has equal tile and block sizes template void backTransformationBandToTridiag(const SizeType band_size, matrix::Matrix& mat_e, matrix::Matrix& mat_hh) { @@ -63,6 +65,9 @@ void backTransformationBandToTridiag(const SizeType band_size, matrix::Matrix= 2, band_size); DLAF_ASSERT(mat_hh.blockSize().rows() % band_size == 0, mat_hh.blockSize(), band_size); diff --git a/include/dlaf/eigensolver/bt_reduction_to_band.h b/include/dlaf/eigensolver/bt_reduction_to_band.h index b0e4426160..3710fa31e6 100644 --- a/include/dlaf/eigensolver/bt_reduction_to_band.h +++ b/include/dlaf/eigensolver/bt_reduction_to_band.h @@ -34,7 +34,9 @@ namespace eigensolver { /// @param mat_taus is the tau vector as returned by reductionToBand. The j-th element is the scaling /// factor for the j-th HH tranformation. /// @pre mat_c is not distributed, -/// @pre mat_v is not distributed. +/// @pre mat_v is not distributed, +/// @pre mat_c has equal tile and block sizes, +/// @pre mat_v has equal tile and block sizes. template void backTransformationReductionToBand(const SizeType b, Matrix& mat_c, Matrix& mat_v, @@ -45,6 +47,8 @@ void backTransformationReductionToBand(const SizeType b, Matrix& mat_ DLAF_ASSERT(square_blocksize(mat_v), mat_v); DLAF_ASSERT(mat_c.size().rows() == mat_v.size().rows(), mat_c, mat_v); DLAF_ASSERT(mat_c.blockSize().rows() == mat_v.blockSize().rows(), mat_c, mat_v); + DLAF_ASSERT(!retiled(mat_c), mat_c); + DLAF_ASSERT(!retiled(mat_v), mat_v); [[maybe_unused]] auto nr_reflectors_blocks = [&b, &mat_v]() { const SizeType m = mat_v.size().rows(); @@ -68,7 +72,9 @@ void backTransformationReductionToBand(const SizeType b, Matrix& mat_ /// @param mat_taus is the tau vector as returned by reductionToBand. The j-th element is the scaling /// factor for the j-th HH tranformation. /// @pre mat_c is distributed, -/// @pre mat_v is distributed according to grid. +/// @pre mat_v is distributed according to grid, +/// @pre mat_c has equal tile and block sizes, +/// @pre mat_v has equal tile and block sizes. template void backTransformationReductionToBand(comm::CommunicatorGrid grid, const SizeType b, Matrix& mat_c, Matrix& mat_v, @@ -79,6 +85,8 @@ void backTransformationReductionToBand(comm::CommunicatorGrid grid, const SizeTy DLAF_ASSERT(square_blocksize(mat_v), mat_v); DLAF_ASSERT(mat_c.size().rows() == mat_v.size().rows(), mat_c, mat_v); DLAF_ASSERT(mat_c.blockSize().rows() == mat_v.blockSize().rows(), mat_c, mat_v); + DLAF_ASSERT(!retiled(mat_c), mat_c); + DLAF_ASSERT(!retiled(mat_v), mat_v); [[maybe_unused]] auto nr_reflectors_blocks = [&b, &mat_v]() { const SizeType m = mat_v.size().rows(); diff --git a/include/dlaf/eigensolver/eigensolver.h b/include/dlaf/eigensolver/eigensolver.h index 80050f202e..471f61bf5d 100644 --- a/include/dlaf/eigensolver/eigensolver.h +++ b/include/dlaf/eigensolver/eigensolver.h @@ -50,6 +50,9 @@ void eigensolver(blas::Uplo uplo, Matrix& mat, Matrix, D>& eig DLAF_ASSERT(square_blocksize(eigenvectors), eigenvectors); DLAF_ASSERT(eigenvectors.size() == mat.size(), eigenvectors, mat); DLAF_ASSERT(eigenvectors.blockSize() == mat.blockSize(), eigenvectors, mat); + DLAF_ASSERT(!retiled(mat), mat); + DLAF_ASSERT(!retiled(eigenvalues), eigenvalues); + DLAF_ASSERT(!retiled(eigenvectors), eigenvectors); internal::Eigensolver::call(uplo, mat, eigenvalues, eigenvectors); } @@ -107,6 +110,9 @@ void eigensolver(comm::CommunicatorGrid grid, blas::Uplo uplo, Matrix& mat DLAF_ASSERT(square_blocksize(eigenvectors), eigenvectors); DLAF_ASSERT(eigenvectors.size() == mat.size(), eigenvectors, mat); DLAF_ASSERT(eigenvectors.blockSize() == mat.blockSize(), eigenvectors, mat); + DLAF_ASSERT(!retiled(mat), mat); + DLAF_ASSERT(!retiled(eigenvalues), eigenvalues); + DLAF_ASSERT(!retiled(eigenvectors), eigenvectors); internal::Eigensolver::call(grid, uplo, mat, eigenvalues, eigenvectors); } diff --git a/include/dlaf/eigensolver/gen_eigensolver.h b/include/dlaf/eigensolver/gen_eigensolver.h index 621b2a8161..6abd5c3a6d 100644 --- a/include/dlaf/eigensolver/gen_eigensolver.h +++ b/include/dlaf/eigensolver/gen_eigensolver.h @@ -58,6 +58,10 @@ void genEigensolver(blas::Uplo uplo, Matrix& mat_a, Matrix& mat_b, eigenvectors); DLAF_ASSERT(eigenvectors.size() == mat_a.size(), eigenvectors, mat_a); DLAF_ASSERT(eigenvectors.blockSize() == mat_a.blockSize(), eigenvectors, mat_a); + DLAF_ASSERT(!matrix::retiled(mat_a), mat_a); + DLAF_ASSERT(!matrix::retiled(mat_b), mat_b); + DLAF_ASSERT(!matrix::retiled(eigenvalues), eigenvalues); + DLAF_ASSERT(!matrix::retiled(eigenvectors), eigenvectors); internal::GenEigensolver::call(uplo, mat_a, mat_b, eigenvalues, eigenvectors); } @@ -139,6 +143,10 @@ void genEigensolver(comm::CommunicatorGrid grid, blas::Uplo uplo, Matrix& eigenvectors); DLAF_ASSERT(eigenvectors.size() == mat_a.size(), eigenvectors, mat_a); DLAF_ASSERT(eigenvectors.blockSize() == mat_a.blockSize(), eigenvectors, mat_a); + DLAF_ASSERT(!matrix::retiled(mat_a), mat_a); + DLAF_ASSERT(!matrix::retiled(mat_b), mat_b); + DLAF_ASSERT(!matrix::retiled(eigenvalues), eigenvalues); + DLAF_ASSERT(!matrix::retiled(eigenvectors), eigenvectors); internal::GenEigensolver::call(grid, uplo, mat_a, mat_b, eigenvalues, eigenvectors); } diff --git a/include/dlaf/eigensolver/gen_to_std.h b/include/dlaf/eigensolver/gen_to_std.h index 9fa1ec7976..ea58753831 100644 --- a/include/dlaf/eigensolver/gen_to_std.h +++ b/include/dlaf/eigensolver/gen_to_std.h @@ -38,6 +38,7 @@ namespace eigensolver { /// Note: B should be modifiable as the diagonal tiles might be temporarly modified during the calculation. /// @pre mat_a and mat_b have the same square size, /// @pre mat_a and mat_b have the same square block size, +/// @pre mat_a and mat_b have the same tile tile and block sizes, /// @pre mat_a and mat_b are not distributed. template void genToStd(blas::Uplo uplo, Matrix& mat_a, Matrix& mat_b) { @@ -47,6 +48,8 @@ void genToStd(blas::Uplo uplo, Matrix& mat_a, Matrix& mat_ DLAF_ASSERT(matrix::square_blocksize(mat_b), mat_b); DLAF_ASSERT(mat_a.size() == mat_b.size(), mat_a, mat_b); DLAF_ASSERT(mat_a.blockSize() == mat_b.blockSize(), mat_a, mat_b); + DLAF_ASSERT(!matrix::retiled(mat_a), mat_a); + DLAF_ASSERT(!matrix::retiled(mat_b), mat_b); DLAF_ASSERT(matrix::local_matrix(mat_a), mat_a); DLAF_ASSERT(matrix::local_matrix(mat_b), mat_b); @@ -80,6 +83,7 @@ void genToStd(blas::Uplo uplo, Matrix& mat_a, Matrix& mat_ /// Note: B should be modifiable as the diagonal tiles might be temporarly modified during the calculation. /// @pre mat_a and mat_b have the same square size, /// @pre mat_a and mat_b have the same square block size, +/// @pre mat_a and mat_b have the same tile tile and block sizes, /// @pre mat_a and mat_b are distributed according to the grid. template void genToStd(comm::CommunicatorGrid grid, blas::Uplo uplo, Matrix& mat_a, @@ -90,6 +94,8 @@ void genToStd(comm::CommunicatorGrid grid, blas::Uplo uplo, Matrix& m DLAF_ASSERT(matrix::square_blocksize(mat_b), mat_b); DLAF_ASSERT(mat_a.size() == mat_b.size(), mat_a, mat_b); DLAF_ASSERT(mat_a.blockSize() == mat_b.blockSize(), mat_a, mat_b); + DLAF_ASSERT(!matrix::retiled(mat_a), mat_a); + DLAF_ASSERT(!matrix::retiled(mat_b), mat_b); DLAF_ASSERT(matrix::equal_process_grid(mat_a, grid), mat_a, grid); DLAF_ASSERT(matrix::equal_process_grid(mat_b, grid), mat_b, grid); diff --git a/include/dlaf/eigensolver/reduction_to_band.h b/include/dlaf/eigensolver/reduction_to_band.h index 627bd98281..d1f5f7d670 100644 --- a/include/dlaf/eigensolver/reduction_to_band.h +++ b/include/dlaf/eigensolver/reduction_to_band.h @@ -32,12 +32,14 @@ namespace dlaf::eigensolver { /// /// @pre mat_a has a square size /// @pre mat_a has a square block size +/// @pre mat_a has equal tile and block sizes /// @pre mat_a is a local matrix /// @pre mat_a.blockSize().rows() % band_size == 0 template Matrix reductionToBand(Matrix& mat_a, const SizeType band_size) { DLAF_ASSERT(matrix::square_size(mat_a), mat_a); DLAF_ASSERT(matrix::square_blocksize(mat_a), mat_a); + DLAF_ASSERT(!matrix::retiled(mat_a), mat_a); DLAF_ASSERT(matrix::local_matrix(mat_a), mat_a); @@ -97,6 +99,7 @@ v v v v * * /// /// @pre mat_a has a square size /// @pre mat_a has a square block size +/// @pre mat_a has equal tile and block sizes /// @pre mat_a is distributed according to @p grid /// @pre mat_a.blockSize().rows() % band_size == 0 template @@ -104,6 +107,7 @@ Matrix reductionToBand(comm::CommunicatorGrid grid, Matrix const SizeType band_size) { DLAF_ASSERT(matrix::square_size(mat_a), mat_a); DLAF_ASSERT(matrix::square_blocksize(mat_a), mat_a); + DLAF_ASSERT(!matrix::retiled(mat_a), mat_a); DLAF_ASSERT(matrix::equal_process_grid(mat_a, grid), mat_a, grid); DLAF_ASSERT(band_size >= 2, band_size); diff --git a/include/dlaf/eigensolver/tridiag_solver.h b/include/dlaf/eigensolver/tridiag_solver.h index 7c224d33da..c7774d3557 100644 --- a/include/dlaf/eigensolver/tridiag_solver.h +++ b/include/dlaf/eigensolver/tridiag_solver.h @@ -33,14 +33,18 @@ namespace eigensolver { /// /// @pre tridiag and @p evals and @p evecs are local matrices /// @pre tridiag has 2 columns and column block size of 2 +/// @pre tridiag has equal tile and block sizes /// @pre evecs is a square matrix with number of rows equal to the number of rows of @p tridiag and @p evals -/// @pre evecs has a square block size with number of block rows eqaul to the block rows of @p tridiag and @p evals +/// @pre evecs has a square block size with number of block rows equal to the block rows of @p tridiag and @p evals +/// @pre evals has equal tile and block sizes +/// @pre evecs has equal tile and block sizes template void tridiagSolver(Matrix, Device::CPU>& tridiag, Matrix, device>& evals, Matrix& evecs) { DLAF_ASSERT(matrix::local_matrix(tridiag), tridiag); DLAF_ASSERT(tridiag.distribution().size().cols() == 2, tridiag); DLAF_ASSERT(tridiag.distribution().blockSize().cols() == 2, tridiag); + DLAF_ASSERT(!matrix::retiled(tridiag), tridiag); DLAF_ASSERT(matrix::local_matrix(evals), evals); DLAF_ASSERT(evals.distribution().size().cols() == 1, evals); @@ -49,6 +53,9 @@ void tridiagSolver(Matrix, Device::CPU>& tridiag, Matrix DLAF_ASSERT(matrix::square_size(evecs), evecs); DLAF_ASSERT(matrix::square_blocksize(evecs), evecs); + DLAF_ASSERT(!matrix::retiled(evecs), evecs); + DLAF_ASSERT(!matrix::retiled(evals), evals); + DLAF_ASSERT(tridiag.distribution().blockSize().rows() == evecs.distribution().blockSize().rows(), evecs.distribution().blockSize().rows(), tridiag.distribution().blockSize().rows()); DLAF_ASSERT(tridiag.distribution().blockSize().rows() == evals.distribution().blockSize().rows(), @@ -70,19 +77,26 @@ void tridiagSolver(Matrix, Device::CPU>& tridiag, Matrix /// of the second column is not used. /// @param evals [out] (n x 1) local matrix holding the eigenvalues of the the symmetric tridiagonal /// matrix -/// @param evecs [out] (n x n) distributed matrix holding the eigenvectors of the the symmetric tridiagonal +/// @param evecs [out] (n x n) distributed matrix holding the eigenvectors of the the symmetric +/// tridiagonal /// matrix on exit. /// /// @pre tridiag and @p evals are local matrices and are the same on all ranks /// @pre tridiag has 2 columns and column block size of 2 -/// @pre evecs is a square matrix with global number of rows equal to the number of rows of @p tridiag and @p evals -/// @pre evecs has a square block size with number of block rows eqaul to the block rows of @p tridiag and @p evals +/// @pre tridiag has equal tile and block sizes +/// @pre evecs is a square matrix with global number of rows equal to the number of rows of @p tridiag +/// and @p evals +/// @pre evecs has a square block size with number of block rows equal to the block rows of @p tridiag +/// and @p evals +/// @pre evals has equal tile and block sizes +/// @pre evecs has equal tile and block sizes template void tridiagSolver(comm::CommunicatorGrid grid, Matrix, Device::CPU>& tridiag, Matrix, D>& evals, Matrix& evecs) { DLAF_ASSERT(matrix::local_matrix(tridiag), tridiag); DLAF_ASSERT(tridiag.distribution().size().cols() == 2, tridiag); DLAF_ASSERT(tridiag.distribution().blockSize().cols() == 2, tridiag); + DLAF_ASSERT(!matrix::retiled(tridiag), tridiag); DLAF_ASSERT(matrix::local_matrix(evals), evals); DLAF_ASSERT(evals.distribution().size().cols() == 1, evals); @@ -91,6 +105,9 @@ void tridiagSolver(comm::CommunicatorGrid grid, Matrix, Device::CPU> DLAF_ASSERT(matrix::square_blocksize(evecs), evecs); DLAF_ASSERT(matrix::equal_process_grid(evecs, grid), evecs, grid); + DLAF_ASSERT(!matrix::retiled(evecs), evecs); + DLAF_ASSERT(!matrix::retiled(evals), evals); + DLAF_ASSERT(tridiag.distribution().blockSize().rows() == evecs.distribution().blockSize().rows(), evecs, tridiag); DLAF_ASSERT(tridiag.distribution().blockSize().rows() == evals.distribution().blockSize().rows(), diff --git a/include/dlaf/factorization/cholesky.h b/include/dlaf/factorization/cholesky.h index a52b1e4ddd..3c537c1fd3 100644 --- a/include/dlaf/factorization/cholesky.h +++ b/include/dlaf/factorization/cholesky.h @@ -34,11 +34,13 @@ namespace factorization { /// which contain the upper or the lower triangular part (depending on the value of uplo), /// @pre mat_a has a square size, /// @pre mat_a has a square block size, +/// @pre mat_a has equal tile and block sizes /// @pre mat_a is not distributed. template void cholesky(blas::Uplo uplo, Matrix& mat_a) { DLAF_ASSERT(matrix::square_size(mat_a), mat_a); DLAF_ASSERT(matrix::square_blocksize(mat_a), mat_a); + DLAF_ASSERT(!matrix::retiled(mat_a), mat_a); DLAF_ASSERT(matrix::local_matrix(mat_a), mat_a); if (uplo == blas::Uplo::Lower) @@ -60,11 +62,13 @@ void cholesky(blas::Uplo uplo, Matrix& mat_a) { /// which contain the upper or the lower triangular part (depending on the value of uplo), /// @pre mat_a has a square size, /// @pre mat_a has a square block size, +/// @pre mat_a has equal tile and block sizes /// @pre mat_a is distributed according to grid. template void cholesky(comm::CommunicatorGrid grid, blas::Uplo uplo, Matrix& mat_a) { DLAF_ASSERT(matrix::square_size(mat_a), mat_a); DLAF_ASSERT(matrix::square_blocksize(mat_a), mat_a); + DLAF_ASSERT(!matrix::retiled(mat_a), mat_a); DLAF_ASSERT(matrix::equal_process_grid(mat_a, grid), mat_a, grid); // Method only for Lower triangular matrix diff --git a/include/dlaf/multiplication/general.h b/include/dlaf/multiplication/general.h index f743dc21bc..6ef05ab17f 100644 --- a/include/dlaf/multiplication/general.h +++ b/include/dlaf/multiplication/general.h @@ -42,6 +42,7 @@ namespace dlaf::multiplication { /// Only tiles whose both row and col tile coords are in the closed range [a,b] are accessed. /// @pre mat_a, mat_b and mat_c have the same square block size, /// @pre mat_a, mat_b and mat_c have the same size, +/// @pre mat_a, mat_b and mat_c have equal tile and block sizes, /// @pre mat_a, mat_b and mat_c are not distributed, /// @pre a <= b <= mat_a.nrTiles().rows() template @@ -52,6 +53,10 @@ void generalSubMatrix(const SizeType a, const SizeType b, const blas::Op opA, co DLAF_ASSERT(dlaf::matrix::square_blocksize(mat_b), mat_b); DLAF_ASSERT(dlaf::matrix::square_blocksize(mat_c), mat_c); + DLAF_ASSERT(!dlaf::matrix::retiled(mat_a), mat_a); + DLAF_ASSERT(!dlaf::matrix::retiled(mat_b), mat_b); + DLAF_ASSERT(!dlaf::matrix::retiled(mat_c), mat_c); + DLAF_ASSERT(matrix::local_matrix(mat_a), mat_a); DLAF_ASSERT(matrix::local_matrix(mat_b), mat_b); DLAF_ASSERT(matrix::local_matrix(mat_c), mat_c); @@ -93,6 +98,7 @@ void generalSubMatrix(const SizeType a, const SizeType b, const blas::Op opA, co /// @pre mat_a, mat_b and mat_c are distributed in the same way, /// @pre mat_a, mat_b and mat_c have the same square block size, /// @pre mat_a, mat_b and mat_c have the same size, +/// @pre mat_a, mat_b and mat_c have equal tile and block sizes, /// @pre a <= b <= mat_a.nrTiles().rows() template void generalSubMatrix([[maybe_unused]] comm::CommunicatorGrid grid, @@ -108,6 +114,10 @@ void generalSubMatrix([[maybe_unused]] comm::CommunicatorGrid grid, DLAF_ASSERT(dlaf::matrix::square_blocksize(mat_b), mat_b); DLAF_ASSERT(dlaf::matrix::square_blocksize(mat_c), mat_c); + DLAF_ASSERT(!dlaf::matrix::retiled(mat_a), mat_a); + DLAF_ASSERT(!dlaf::matrix::retiled(mat_b), mat_b); + DLAF_ASSERT(!dlaf::matrix::retiled(mat_c), mat_c); + // Note: // This is an over-constraint, since the algorithm just cares about the sub-matrix size (and its // distribution). diff --git a/include/dlaf/multiplication/hermitian.h b/include/dlaf/multiplication/hermitian.h index b1adc576e1..57038a41ea 100644 --- a/include/dlaf/multiplication/hermitian.h +++ b/include/dlaf/multiplication/hermitian.h @@ -35,6 +35,7 @@ namespace dlaf::multiplication { /// elements of the result. /// @pre mat_a has a square size, /// @pre mat_a has a square block size, +/// @pre mat_a mat_b and mat_c have equal tile and block size, /// @pre mat_a mat_b and mat_c are not distributed, /// @pre mat_a mat_b are multipliable and the result can be summed to mat_c. template @@ -42,6 +43,9 @@ void hermitian(blas::Side side, blas::Uplo uplo, const T alpha, Matrix& mat_b, const T beta, Matrix& mat_c) { DLAF_ASSERT(matrix::square_size(mat_a), mat_a); DLAF_ASSERT(matrix::square_blocksize(mat_a), mat_a); + DLAF_ASSERT(!matrix::retiled(mat_a), mat_a); + DLAF_ASSERT(!matrix::retiled(mat_b), mat_b); + DLAF_ASSERT(!matrix::retiled(mat_c), mat_c); DLAF_ASSERT(matrix::local_matrix(mat_a), mat_a); DLAF_ASSERT(matrix::local_matrix(mat_b), mat_b); DLAF_ASSERT(matrix::local_matrix(mat_c), mat_c); @@ -83,6 +87,7 @@ void hermitian(blas::Side side, blas::Uplo uplo, const T alpha, Matrix @@ -90,6 +95,9 @@ void hermitian(comm::CommunicatorGrid grid, blas::Side side, blas::Uplo uplo, co Matrix& mat_a, Matrix& mat_b, const T beta, Matrix& mat_c) { DLAF_ASSERT(matrix::square_size(mat_a), mat_a); DLAF_ASSERT(matrix::square_blocksize(mat_a), mat_a); + DLAF_ASSERT(!matrix::retiled(mat_a), mat_a); + DLAF_ASSERT(!matrix::retiled(mat_b), mat_b); + DLAF_ASSERT(!matrix::retiled(mat_c), mat_c); DLAF_ASSERT(matrix::equal_process_grid(mat_a, grid), mat_a, grid); DLAF_ASSERT(matrix::equal_process_grid(mat_b, grid), mat_b, grid); DLAF_ASSERT(matrix::equal_process_grid(mat_c, grid), mat_c, grid); diff --git a/include/dlaf/multiplication/triangular.h b/include/dlaf/multiplication/triangular.h index 530c767121..cb2c499a3a 100644 --- a/include/dlaf/multiplication/triangular.h +++ b/include/dlaf/multiplication/triangular.h @@ -37,6 +37,7 @@ namespace multiplication { /// elements of the result. /// @pre mat_a has a square size, /// @pre mat_a has a square block size, +/// @pre mat_a and mat_b have equal tile and block sizes, /// @pre mat_a and mat_b are not distributed, /// @pre mat_a and mat_b are multipliable. template @@ -44,6 +45,8 @@ void triangular(blas::Side side, blas::Uplo uplo, blas::Op op, blas::Diag diag, Matrix& mat_a, Matrix& mat_b) { DLAF_ASSERT(matrix::square_size(mat_a), mat_a); DLAF_ASSERT(matrix::square_blocksize(mat_a), mat_a); + DLAF_ASSERT(!matrix::retiled(mat_a), mat_a); + DLAF_ASSERT(!matrix::retiled(mat_b), mat_b); DLAF_ASSERT(matrix::local_matrix(mat_a), mat_a); DLAF_ASSERT(matrix::local_matrix(mat_b), mat_b); @@ -104,6 +107,7 @@ void triangular(blas::Side side, blas::Uplo uplo, blas::Op op, blas::Diag diag, /// elements of the result. /// @pre mat_a has a square size, /// @pre mat_a has a square block size, +/// @pre mat_a and mat_b have equal tile and block sizes, /// @pre mat_a and mat_b are distributed according to the grid, /// @pre mat_a and mat_b are multipliable. template @@ -111,6 +115,8 @@ void triangular(comm::CommunicatorGrid grid, blas::Side side, blas::Uplo uplo, b blas::Diag diag, T alpha, Matrix& mat_a, Matrix& mat_b) { DLAF_ASSERT(matrix::square_size(mat_a), mat_a); DLAF_ASSERT(matrix::square_blocksize(mat_a), mat_a); + DLAF_ASSERT(!matrix::retiled(mat_a), mat_a); + DLAF_ASSERT(!matrix::retiled(mat_b), mat_b); DLAF_ASSERT(matrix::equal_process_grid(mat_a, grid), mat_a, grid); DLAF_ASSERT(matrix::equal_process_grid(mat_b, grid), mat_b, grid); diff --git a/include/dlaf/permutations/general.h b/include/dlaf/permutations/general.h index d9fb79cf92..fcad386526 100644 --- a/include/dlaf/permutations/general.h +++ b/include/dlaf/permutations/general.h @@ -52,6 +52,10 @@ void permute(SizeType i_begin, SizeType i_end, Matrix& perms, DLAF_ASSERT(matrix::equal_size(mat_in, mat_out), mat_in); DLAF_ASSERT(matrix::equal_blocksize(mat_in, mat_out), mat_in); + DLAF_ASSERT(!matrix::retiled(perms), perms); + DLAF_ASSERT(!matrix::retiled(mat_in), mat_in); + DLAF_ASSERT(!matrix::retiled(mat_out), mat_out); + DLAF_ASSERT(perms.size().rows() == mat_in.size().rows(), perms, mat_in); DLAF_ASSERT(perms.size().cols() == 1, perms); DLAF_ASSERT(perms.blockSize().rows() == mat_in.blockSize().rows(), mat_in, perms); @@ -100,6 +104,10 @@ void permute(comm::CommunicatorGrid grid, common::Pipeline& DLAF_ASSERT(matrix::equal_size(mat_in, mat_out), mat_in); DLAF_ASSERT(matrix::equal_blocksize(mat_in, mat_out), mat_in); + DLAF_ASSERT(!matrix::retiled(perms), perms); + DLAF_ASSERT(!matrix::retiled(mat_in), mat_in); + DLAF_ASSERT(!matrix::retiled(mat_out), mat_out); + DLAF_ASSERT(perms.size().rows() == mat_in.size().rows(), perms, mat_in); DLAF_ASSERT(perms.size().cols() == 1, perms); DLAF_ASSERT(perms.blockSize().rows() == mat_in.blockSize().rows(), mat_in, perms); diff --git a/include/dlaf/solver/triangular.h b/include/dlaf/solver/triangular.h index d08508d698..75160682f9 100644 --- a/include/dlaf/solver/triangular.h +++ b/include/dlaf/solver/triangular.h @@ -37,6 +37,7 @@ namespace solver { /// elements of the matrix X, /// @pre mat_a has a square size, /// @pre mat_a has a square block size, +/// @pre mat_a and mat_b have equal tile and block size, /// @pre mat_a and mat_b are not distributed, /// @pre mat_a and mat_b are multipliable. template @@ -44,6 +45,8 @@ void triangular(blas::Side side, blas::Uplo uplo, blas::Op op, blas::Diag diag, Matrix& mat_a, Matrix& mat_b) { DLAF_ASSERT(matrix::square_size(mat_a), mat_a); DLAF_ASSERT(matrix::square_blocksize(mat_a), mat_a); + DLAF_ASSERT(!matrix::retiled(mat_a), mat_a); + DLAF_ASSERT(!matrix::retiled(mat_b), mat_b); DLAF_ASSERT(matrix::local_matrix(mat_a), mat_a); DLAF_ASSERT(matrix::local_matrix(mat_b), mat_b); @@ -106,6 +109,7 @@ void triangular(blas::Side side, blas::Uplo uplo, blas::Op op, blas::Diag diag, /// the elements of the matrix X, /// @pre matrix A has a square size, /// @pre matrix A has a square block size, +/// @pre matrix A and matrix B have equal tile and block sizes, /// @pre matrix A and matrix B are distributed according to the grid, /// @pre matrix A and matrix B are multipliable. template @@ -113,6 +117,8 @@ void triangular(comm::CommunicatorGrid grid, blas::Side side, blas::Uplo uplo, b blas::Diag diag, T alpha, Matrix& mat_a, Matrix& mat_b) { DLAF_ASSERT(matrix::square_size(mat_a), mat_a); DLAF_ASSERT(matrix::square_blocksize(mat_a), mat_a); + DLAF_ASSERT(!matrix::retiled(mat_a), mat_a); + DLAF_ASSERT(!matrix::retiled(mat_b), mat_b); DLAF_ASSERT(matrix::equal_process_grid(mat_a, grid), mat_a, grid); DLAF_ASSERT(matrix::equal_process_grid(mat_b, grid), mat_b, grid); diff --git a/include/dlaf/util_matrix.h b/include/dlaf/util_matrix.h index 613a8ed7a4..f3cb372158 100644 --- a/include/dlaf/util_matrix.h +++ b/include/dlaf/util_matrix.h @@ -49,6 +49,12 @@ bool square_blocksize(const MatrixLike& m) noexcept { return m.blockSize().rows() == m.blockSize().cols(); } +/// Returns true if the matrix is retiled, i.e. if block and tile size are unequal. +template +bool retiled(const MatrixLike& m) noexcept { + return m.blockSize() != m.baseTileSize(); +} + /// Returns true if matrices have equal sizes. template bool equal_size(const MatrixLikeA& lhs, const MatrixLikeB& rhs) noexcept {