Skip to content

Commit

Permalink
Check that inputs to algorithms are not retiled
Browse files Browse the repository at this point in the history
  • Loading branch information
msimberg committed Jul 13, 2023
1 parent 3890a18 commit 0f5a091
Show file tree
Hide file tree
Showing 16 changed files with 117 additions and 8 deletions.
3 changes: 3 additions & 0 deletions include/dlaf/auxiliary/norm.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,17 @@ namespace dlaf::auxiliary {
///
/// @pre `A.blockSize().rows() == A.blockSize().cols()`,
/// @pre @p A is distributed according to @p grid,
/// @pre @p A has equal tile and block sizes,
/// @return the norm @p norm_type of the Matrix @p A or 0 if `A.size().isEmpty()` (see LAPACK doc for
/// additional info).
template <Backend backend, Device device, class T>
dlaf::BaseType<T> norm(comm::CommunicatorGrid grid, comm::Index2D rank, lapack::Norm norm_type,
blas::Uplo uplo, Matrix<const T, device>& A) {
using dlaf::matrix::equal_process_grid;
using dlaf::matrix::retiled;

DLAF_ASSERT(equal_process_grid(A, grid), A, grid);
DLAF_ASSERT(!retiled(A), A);

// LAPACK documentation specify that if any dimension is 0, the result is 0
if (A.size().isEmpty())
Expand Down
8 changes: 6 additions & 2 deletions include/dlaf/eigensolver/band_to_tridiag.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,14 +69,16 @@ namespace eigensolver {
/// @pre mat_a has a square size,
/// @pre mat_a has a square block size,
/// @pre band_size is a divisor of mat_a.blockSize().cols(), and band_size >= 2
/// @pre mat_a is not distributed.
/// @pre mat_a is not distributed,
/// @pre mat_a has equal tile and block sizes.
template <Backend B, Device D, class T>
TridiagResult<T, Device::CPU> bandToTridiag(blas::Uplo uplo, SizeType band_size,
Matrix<const T, D>& mat_a) {
DLAF_ASSERT(matrix::square_size(mat_a), mat_a);
DLAF_ASSERT(matrix::square_blocksize(mat_a), mat_a);
DLAF_ASSERT(mat_a.blockSize().rows() % band_size == 0, mat_a.blockSize().rows(), band_size);
DLAF_ASSERT(matrix::local_matrix(mat_a), mat_a);
DLAF_ASSERT(!matrix::retiled(mat_a), mat_a);
DLAF_ASSERT(band_size >= 2, band_size);

switch (uplo) {
Expand Down Expand Up @@ -140,13 +142,15 @@ TridiagResult<T, Device::CPU> bandToTridiag(blas::Uplo uplo, SizeType band_size,
/// @pre mat_a has a square size,
/// @pre mat_a has a square block size,
/// @pre band_size is a divisor of mat_a.blockSize().cols() and band_size >= 2,
/// @pre mat_a is distributed according to grid.
/// @pre mat_a is distributed according to grid,
/// @pre mat_a has equal tile and block sizes.
template <Backend backend, Device device, class T>
TridiagResult<T, Device::CPU> bandToTridiag(comm::CommunicatorGrid grid, blas::Uplo uplo,
SizeType band_size, Matrix<const T, device>& mat_a) {
DLAF_ASSERT(matrix::square_size(mat_a), mat_a);
DLAF_ASSERT(matrix::square_blocksize(mat_a), mat_a);
DLAF_ASSERT(matrix::equal_process_grid(mat_a, grid), mat_a, grid);
DLAF_ASSERT(!matrix::retiled(mat_a), mat_a);
DLAF_ASSERT(band_size >= 2, band_size);

// If the grid contains only one rank force local implementation.
Expand Down
5 changes: 5 additions & 0 deletions include/dlaf/eigensolver/bt_band_to_tridiag.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ namespace dlaf::eigensolver {
// @pre band_size is a divisor of mat_hh.blockSize().cols()
// @pre mat_e is not distributed
// @pre mat_hh is not distributed
// @pre mat_e has equal tile and block sizes
// @pre mat_hh has equal tile and block sizes
template <Backend B, Device D, class T>
void backTransformationBandToTridiag(const SizeType band_size, matrix::Matrix<T, D>& mat_e,
matrix::Matrix<const T, Device::CPU>& mat_hh) {
Expand All @@ -63,6 +65,9 @@ void backTransformationBandToTridiag(const SizeType band_size, matrix::Matrix<T,
DLAF_ASSERT(mat_hh.size().rows() == mat_e.size().rows(), mat_hh, mat_e);
DLAF_ASSERT(mat_hh.blockSize().rows() == mat_e.blockSize().rows(), mat_hh, mat_e);

DLAF_ASSERT(!matrix::retiled(mat_e), mat_e);
DLAF_ASSERT(!matrix::retiled(mat_hh), mat_hh);

DLAF_ASSERT(band_size >= 2, band_size);
DLAF_ASSERT(mat_hh.blockSize().rows() % band_size == 0, mat_hh.blockSize(), band_size);

Expand Down
12 changes: 10 additions & 2 deletions include/dlaf/eigensolver/bt_reduction_to_band.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@ namespace eigensolver {
/// @param mat_taus is the tau vector as returned by reductionToBand. The j-th element is the scaling
/// factor for the j-th HH tranformation.
/// @pre mat_c is not distributed,
/// @pre mat_v is not distributed.
/// @pre mat_v is not distributed,
/// @pre mat_c has equal tile and block sizes,
/// @pre mat_v has equal tile and block sizes.
template <Backend backend, Device device, class T>
void backTransformationReductionToBand(const SizeType b, Matrix<T, device>& mat_c,
Matrix<const T, device>& mat_v,
Expand All @@ -45,6 +47,8 @@ void backTransformationReductionToBand(const SizeType b, Matrix<T, device>& mat_
DLAF_ASSERT(square_blocksize(mat_v), mat_v);
DLAF_ASSERT(mat_c.size().rows() == mat_v.size().rows(), mat_c, mat_v);
DLAF_ASSERT(mat_c.blockSize().rows() == mat_v.blockSize().rows(), mat_c, mat_v);
DLAF_ASSERT(!retiled(mat_c), mat_c);
DLAF_ASSERT(!retiled(mat_v), mat_v);

[[maybe_unused]] auto nr_reflectors_blocks = [&b, &mat_v]() {
const SizeType m = mat_v.size().rows();
Expand All @@ -68,7 +72,9 @@ void backTransformationReductionToBand(const SizeType b, Matrix<T, device>& mat_
/// @param mat_taus is the tau vector as returned by reductionToBand. The j-th element is the scaling
/// factor for the j-th HH tranformation.
/// @pre mat_c is distributed,
/// @pre mat_v is distributed according to grid.
/// @pre mat_v is distributed according to grid,
/// @pre mat_c has equal tile and block sizes,
/// @pre mat_v has equal tile and block sizes.
template <Backend backend, Device device, class T>
void backTransformationReductionToBand(comm::CommunicatorGrid grid, const SizeType b,
Matrix<T, device>& mat_c, Matrix<const T, device>& mat_v,
Expand All @@ -79,6 +85,8 @@ void backTransformationReductionToBand(comm::CommunicatorGrid grid, const SizeTy
DLAF_ASSERT(square_blocksize(mat_v), mat_v);
DLAF_ASSERT(mat_c.size().rows() == mat_v.size().rows(), mat_c, mat_v);
DLAF_ASSERT(mat_c.blockSize().rows() == mat_v.blockSize().rows(), mat_c, mat_v);
DLAF_ASSERT(!retiled(mat_c), mat_c);
DLAF_ASSERT(!retiled(mat_v), mat_v);

[[maybe_unused]] auto nr_reflectors_blocks = [&b, &mat_v]() {
const SizeType m = mat_v.size().rows();
Expand Down
6 changes: 6 additions & 0 deletions include/dlaf/eigensolver/eigensolver.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ void eigensolver(blas::Uplo uplo, Matrix<T, D>& mat, Matrix<BaseType<T>, D>& eig
DLAF_ASSERT(square_blocksize(eigenvectors), eigenvectors);
DLAF_ASSERT(eigenvectors.size() == mat.size(), eigenvectors, mat);
DLAF_ASSERT(eigenvectors.blockSize() == mat.blockSize(), eigenvectors, mat);
DLAF_ASSERT(!retiled(mat), mat);
DLAF_ASSERT(!retiled(eigenvalues), eigenvalues);
DLAF_ASSERT(!retiled(eigenvectors), eigenvectors);

internal::Eigensolver<B, D, T>::call(uplo, mat, eigenvalues, eigenvectors);
}
Expand Down Expand Up @@ -107,6 +110,9 @@ void eigensolver(comm::CommunicatorGrid grid, blas::Uplo uplo, Matrix<T, D>& mat
DLAF_ASSERT(square_blocksize(eigenvectors), eigenvectors);
DLAF_ASSERT(eigenvectors.size() == mat.size(), eigenvectors, mat);
DLAF_ASSERT(eigenvectors.blockSize() == mat.blockSize(), eigenvectors, mat);
DLAF_ASSERT(!retiled(mat), mat);
DLAF_ASSERT(!retiled(eigenvalues), eigenvalues);
DLAF_ASSERT(!retiled(eigenvectors), eigenvectors);

internal::Eigensolver<B, D, T>::call(grid, uplo, mat, eigenvalues, eigenvectors);
}
Expand Down
8 changes: 8 additions & 0 deletions include/dlaf/eigensolver/gen_eigensolver.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,10 @@ void genEigensolver(blas::Uplo uplo, Matrix<T, D>& mat_a, Matrix<T, D>& mat_b,
eigenvectors);
DLAF_ASSERT(eigenvectors.size() == mat_a.size(), eigenvectors, mat_a);
DLAF_ASSERT(eigenvectors.blockSize() == mat_a.blockSize(), eigenvectors, mat_a);
DLAF_ASSERT(!matrix::retiled(mat_a), mat_a);
DLAF_ASSERT(!matrix::retiled(mat_b), mat_b);
DLAF_ASSERT(!matrix::retiled(eigenvalues), eigenvalues);
DLAF_ASSERT(!matrix::retiled(eigenvectors), eigenvectors);

internal::GenEigensolver<B, D, T>::call(uplo, mat_a, mat_b, eigenvalues, eigenvectors);
}
Expand Down Expand Up @@ -139,6 +143,10 @@ void genEigensolver(comm::CommunicatorGrid grid, blas::Uplo uplo, Matrix<T, D>&
eigenvectors);
DLAF_ASSERT(eigenvectors.size() == mat_a.size(), eigenvectors, mat_a);
DLAF_ASSERT(eigenvectors.blockSize() == mat_a.blockSize(), eigenvectors, mat_a);
DLAF_ASSERT(!matrix::retiled(mat_a), mat_a);
DLAF_ASSERT(!matrix::retiled(mat_b), mat_b);
DLAF_ASSERT(!matrix::retiled(eigenvalues), eigenvalues);
DLAF_ASSERT(!matrix::retiled(eigenvectors), eigenvectors);

internal::GenEigensolver<B, D, T>::call(grid, uplo, mat_a, mat_b, eigenvalues, eigenvectors);
}
Expand Down
6 changes: 6 additions & 0 deletions include/dlaf/eigensolver/gen_to_std.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ namespace eigensolver {
/// Note: B should be modifiable as the diagonal tiles might be temporarly modified during the calculation.
/// @pre mat_a and mat_b have the same square size,
/// @pre mat_a and mat_b have the same square block size,
/// @pre mat_a and mat_b have the same tile tile and block sizes,
/// @pre mat_a and mat_b are not distributed.
template <Backend backend, Device device, class T>
void genToStd(blas::Uplo uplo, Matrix<T, device>& mat_a, Matrix<T, device>& mat_b) {
Expand All @@ -47,6 +48,8 @@ void genToStd(blas::Uplo uplo, Matrix<T, device>& mat_a, Matrix<T, device>& mat_
DLAF_ASSERT(matrix::square_blocksize(mat_b), mat_b);
DLAF_ASSERT(mat_a.size() == mat_b.size(), mat_a, mat_b);
DLAF_ASSERT(mat_a.blockSize() == mat_b.blockSize(), mat_a, mat_b);
DLAF_ASSERT(!matrix::retiled(mat_a), mat_a);
DLAF_ASSERT(!matrix::retiled(mat_b), mat_b);
DLAF_ASSERT(matrix::local_matrix(mat_a), mat_a);
DLAF_ASSERT(matrix::local_matrix(mat_b), mat_b);

Expand Down Expand Up @@ -80,6 +83,7 @@ void genToStd(blas::Uplo uplo, Matrix<T, device>& mat_a, Matrix<T, device>& mat_
/// Note: B should be modifiable as the diagonal tiles might be temporarly modified during the calculation.
/// @pre mat_a and mat_b have the same square size,
/// @pre mat_a and mat_b have the same square block size,
/// @pre mat_a and mat_b have the same tile tile and block sizes,
/// @pre mat_a and mat_b are distributed according to the grid.
template <Backend backend, Device device, class T>
void genToStd(comm::CommunicatorGrid grid, blas::Uplo uplo, Matrix<T, device>& mat_a,
Expand All @@ -90,6 +94,8 @@ void genToStd(comm::CommunicatorGrid grid, blas::Uplo uplo, Matrix<T, device>& m
DLAF_ASSERT(matrix::square_blocksize(mat_b), mat_b);
DLAF_ASSERT(mat_a.size() == mat_b.size(), mat_a, mat_b);
DLAF_ASSERT(mat_a.blockSize() == mat_b.blockSize(), mat_a, mat_b);
DLAF_ASSERT(!matrix::retiled(mat_a), mat_a);
DLAF_ASSERT(!matrix::retiled(mat_b), mat_b);
DLAF_ASSERT(matrix::equal_process_grid(mat_a, grid), mat_a, grid);
DLAF_ASSERT(matrix::equal_process_grid(mat_b, grid), mat_b, grid);

Expand Down
4 changes: 4 additions & 0 deletions include/dlaf/eigensolver/reduction_to_band.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,14 @@ namespace dlaf::eigensolver {
///
/// @pre mat_a has a square size
/// @pre mat_a has a square block size
/// @pre mat_a has equal tile and block sizes
/// @pre mat_a is a local matrix
/// @pre mat_a.blockSize().rows() % band_size == 0
template <Backend B, Device D, class T>
Matrix<T, Device::CPU> reductionToBand(Matrix<T, D>& mat_a, const SizeType band_size) {
DLAF_ASSERT(matrix::square_size(mat_a), mat_a);
DLAF_ASSERT(matrix::square_blocksize(mat_a), mat_a);
DLAF_ASSERT(!matrix::retiled(mat_a), mat_a);

DLAF_ASSERT(matrix::local_matrix(mat_a), mat_a);

Expand Down Expand Up @@ -97,13 +99,15 @@ v v v v * *
///
/// @pre mat_a has a square size
/// @pre mat_a has a square block size
/// @pre mat_a has equal tile and block sizes
/// @pre mat_a is distributed according to @p grid
/// @pre mat_a.blockSize().rows() % band_size == 0
template <Backend B, Device D, class T>
Matrix<T, Device::CPU> reductionToBand(comm::CommunicatorGrid grid, Matrix<T, D>& mat_a,
const SizeType band_size) {
DLAF_ASSERT(matrix::square_size(mat_a), mat_a);
DLAF_ASSERT(matrix::square_blocksize(mat_a), mat_a);
DLAF_ASSERT(!matrix::retiled(mat_a), mat_a);
DLAF_ASSERT(matrix::equal_process_grid(mat_a, grid), mat_a, grid);

DLAF_ASSERT(band_size >= 2, band_size);
Expand Down
25 changes: 21 additions & 4 deletions include/dlaf/eigensolver/tridiag_solver.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,18 @@ namespace eigensolver {
///
/// @pre tridiag and @p evals and @p evecs are local matrices
/// @pre tridiag has 2 columns and column block size of 2
/// @pre tridiag has equal tile and block sizes
/// @pre evecs is a square matrix with number of rows equal to the number of rows of @p tridiag and @p evals
/// @pre evecs has a square block size with number of block rows eqaul to the block rows of @p tridiag and @p evals
/// @pre evecs has a square block size with number of block rows equal to the block rows of @p tridiag and @p evals
/// @pre evals has equal tile and block sizes
/// @pre evecs has equal tile and block sizes
template <Backend backend, Device device, class T>
void tridiagSolver(Matrix<BaseType<T>, Device::CPU>& tridiag, Matrix<BaseType<T>, device>& evals,
Matrix<T, device>& evecs) {
DLAF_ASSERT(matrix::local_matrix(tridiag), tridiag);
DLAF_ASSERT(tridiag.distribution().size().cols() == 2, tridiag);
DLAF_ASSERT(tridiag.distribution().blockSize().cols() == 2, tridiag);
DLAF_ASSERT(!matrix::retiled(tridiag), tridiag);

DLAF_ASSERT(matrix::local_matrix(evals), evals);
DLAF_ASSERT(evals.distribution().size().cols() == 1, evals);
Expand All @@ -49,6 +53,9 @@ void tridiagSolver(Matrix<BaseType<T>, Device::CPU>& tridiag, Matrix<BaseType<T>
DLAF_ASSERT(matrix::square_size(evecs), evecs);
DLAF_ASSERT(matrix::square_blocksize(evecs), evecs);

DLAF_ASSERT(!matrix::retiled(evecs), evecs);
DLAF_ASSERT(!matrix::retiled(evals), evals);

DLAF_ASSERT(tridiag.distribution().blockSize().rows() == evecs.distribution().blockSize().rows(),
evecs.distribution().blockSize().rows(), tridiag.distribution().blockSize().rows());
DLAF_ASSERT(tridiag.distribution().blockSize().rows() == evals.distribution().blockSize().rows(),
Expand All @@ -70,19 +77,26 @@ void tridiagSolver(Matrix<BaseType<T>, Device::CPU>& tridiag, Matrix<BaseType<T>
/// of the second column is not used.
/// @param evals [out] (n x 1) local matrix holding the eigenvalues of the the symmetric tridiagonal
/// matrix
/// @param evecs [out] (n x n) distributed matrix holding the eigenvectors of the the symmetric tridiagonal
/// @param evecs [out] (n x n) distributed matrix holding the eigenvectors of the the symmetric
/// tridiagonal
/// matrix on exit.
///
/// @pre tridiag and @p evals are local matrices and are the same on all ranks
/// @pre tridiag has 2 columns and column block size of 2
/// @pre evecs is a square matrix with global number of rows equal to the number of rows of @p tridiag and @p evals
/// @pre evecs has a square block size with number of block rows eqaul to the block rows of @p tridiag and @p evals
/// @pre tridiag has equal tile and block sizes
/// @pre evecs is a square matrix with global number of rows equal to the number of rows of @p tridiag
/// and @p evals
/// @pre evecs has a square block size with number of block rows equal to the block rows of @p tridiag
/// and @p evals
/// @pre evals has equal tile and block sizes
/// @pre evecs has equal tile and block sizes
template <Backend B, Device D, class T>
void tridiagSolver(comm::CommunicatorGrid grid, Matrix<BaseType<T>, Device::CPU>& tridiag,
Matrix<BaseType<T>, D>& evals, Matrix<T, D>& evecs) {
DLAF_ASSERT(matrix::local_matrix(tridiag), tridiag);
DLAF_ASSERT(tridiag.distribution().size().cols() == 2, tridiag);
DLAF_ASSERT(tridiag.distribution().blockSize().cols() == 2, tridiag);
DLAF_ASSERT(!matrix::retiled(tridiag), tridiag);

DLAF_ASSERT(matrix::local_matrix(evals), evals);
DLAF_ASSERT(evals.distribution().size().cols() == 1, evals);
Expand All @@ -91,6 +105,9 @@ void tridiagSolver(comm::CommunicatorGrid grid, Matrix<BaseType<T>, Device::CPU>
DLAF_ASSERT(matrix::square_blocksize(evecs), evecs);
DLAF_ASSERT(matrix::equal_process_grid(evecs, grid), evecs, grid);

DLAF_ASSERT(!matrix::retiled(evecs), evecs);
DLAF_ASSERT(!matrix::retiled(evals), evals);

DLAF_ASSERT(tridiag.distribution().blockSize().rows() == evecs.distribution().blockSize().rows(),
evecs, tridiag);
DLAF_ASSERT(tridiag.distribution().blockSize().rows() == evals.distribution().blockSize().rows(),
Expand Down
4 changes: 4 additions & 0 deletions include/dlaf/factorization/cholesky.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,13 @@ namespace factorization {
/// which contain the upper or the lower triangular part (depending on the value of uplo),
/// @pre mat_a has a square size,
/// @pre mat_a has a square block size,
/// @pre mat_a has equal tile and block sizes
/// @pre mat_a is not distributed.
template <Backend backend, Device device, class T>
void cholesky(blas::Uplo uplo, Matrix<T, device>& mat_a) {
DLAF_ASSERT(matrix::square_size(mat_a), mat_a);
DLAF_ASSERT(matrix::square_blocksize(mat_a), mat_a);
DLAF_ASSERT(!matrix::retiled(mat_a), mat_a);
DLAF_ASSERT(matrix::local_matrix(mat_a), mat_a);

if (uplo == blas::Uplo::Lower)
Expand All @@ -60,11 +62,13 @@ void cholesky(blas::Uplo uplo, Matrix<T, device>& mat_a) {
/// which contain the upper or the lower triangular part (depending on the value of uplo),
/// @pre mat_a has a square size,
/// @pre mat_a has a square block size,
/// @pre mat_a has equal tile and block sizes
/// @pre mat_a is distributed according to grid.
template <Backend backend, Device device, class T>
void cholesky(comm::CommunicatorGrid grid, blas::Uplo uplo, Matrix<T, device>& mat_a) {
DLAF_ASSERT(matrix::square_size(mat_a), mat_a);
DLAF_ASSERT(matrix::square_blocksize(mat_a), mat_a);
DLAF_ASSERT(!matrix::retiled(mat_a), mat_a);
DLAF_ASSERT(matrix::equal_process_grid(mat_a, grid), mat_a, grid);

// Method only for Lower triangular matrix
Expand Down
10 changes: 10 additions & 0 deletions include/dlaf/multiplication/general.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ namespace dlaf::multiplication {
/// Only tiles whose both row and col tile coords are in the closed range [a,b] are accessed.
/// @pre mat_a, mat_b and mat_c have the same square block size,
/// @pre mat_a, mat_b and mat_c have the same size,
/// @pre mat_a, mat_b and mat_c have equal tile and block sizes,
/// @pre mat_a, mat_b and mat_c are not distributed,
/// @pre a <= b <= mat_a.nrTiles().rows()
template <Backend B, Device D, class T>
Expand All @@ -52,6 +53,10 @@ void generalSubMatrix(const SizeType a, const SizeType b, const blas::Op opA, co
DLAF_ASSERT(dlaf::matrix::square_blocksize(mat_b), mat_b);
DLAF_ASSERT(dlaf::matrix::square_blocksize(mat_c), mat_c);

DLAF_ASSERT(!dlaf::matrix::retiled(mat_a), mat_a);
DLAF_ASSERT(!dlaf::matrix::retiled(mat_b), mat_b);
DLAF_ASSERT(!dlaf::matrix::retiled(mat_c), mat_c);

DLAF_ASSERT(matrix::local_matrix(mat_a), mat_a);
DLAF_ASSERT(matrix::local_matrix(mat_b), mat_b);
DLAF_ASSERT(matrix::local_matrix(mat_c), mat_c);
Expand Down Expand Up @@ -93,6 +98,7 @@ void generalSubMatrix(const SizeType a, const SizeType b, const blas::Op opA, co
/// @pre mat_a, mat_b and mat_c are distributed in the same way,
/// @pre mat_a, mat_b and mat_c have the same square block size,
/// @pre mat_a, mat_b and mat_c have the same size,
/// @pre mat_a, mat_b and mat_c have equal tile and block sizes,
/// @pre a <= b <= mat_a.nrTiles().rows()
template <Backend B, Device D, class T>
void generalSubMatrix([[maybe_unused]] comm::CommunicatorGrid grid,
Expand All @@ -108,6 +114,10 @@ void generalSubMatrix([[maybe_unused]] comm::CommunicatorGrid grid,
DLAF_ASSERT(dlaf::matrix::square_blocksize(mat_b), mat_b);
DLAF_ASSERT(dlaf::matrix::square_blocksize(mat_c), mat_c);

DLAF_ASSERT(!dlaf::matrix::retiled(mat_a), mat_a);
DLAF_ASSERT(!dlaf::matrix::retiled(mat_b), mat_b);
DLAF_ASSERT(!dlaf::matrix::retiled(mat_c), mat_c);

// Note:
// This is an over-constraint, since the algorithm just cares about the sub-matrix size (and its
// distribution).
Expand Down
Loading

0 comments on commit 0f5a091

Please sign in to comment.