Skip to content

Commit

Permalink
Add MatrixRef to take a sub-matrix from an existing Matrix (#934)
Browse files Browse the repository at this point in the history
  • Loading branch information
msimberg authored Jul 28, 2023
1 parent 9b7f0e0 commit 7114b18
Show file tree
Hide file tree
Showing 8 changed files with 579 additions and 1 deletion.
59 changes: 58 additions & 1 deletion include/dlaf/matrix/distribution.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,15 @@
namespace dlaf {
namespace matrix {

/// Contains information to create a sub-distribution.
struct SubDistributionSpec {
GlobalElementIndex origin;
GlobalElementSize size;
};

/// Distribution contains the information about the size and distribution of a matrix.
///
/// More details available in misc/matrix_distribution.md.

class Distribution {
public:
/// Constructs a distribution for a non distributed matrix of size {0, 0} and block size {1, 1}.
Expand Down Expand Up @@ -119,6 +124,15 @@ class Distribution {

Distribution& operator=(Distribution&& rhs) noexcept;

/// Constructs a sub-distribution based on the given distribution @p dist specified by @p spec.
///
/// @param[in] dist is the input distribution,
/// @param[in] spec contains the origin and size of the new distribution relative to the input distribution,
/// @pre spec.origin.isValid()
/// @pre spec.size.isValid()
/// @pre spec.origin + spec.size <= dist.size()
Distribution(Distribution dist, const SubDistributionSpec& spec);

bool operator==(const Distribution& rhs) const noexcept {
return size_ == rhs.size_ && local_size_ == rhs.local_size_ && tile_size_ == rhs.tile_size_ &&
block_size_ == rhs.block_size_ && global_nr_tiles_ == rhs.global_nr_tiles_ &&
Expand Down Expand Up @@ -490,6 +504,30 @@ class Distribution {
localElementDistanceFromLocalTile<Coord::Col>(begin.col(), end.col())};
}

/// Returns the tile index in the current distribution corresponding to a tile index @p sub_index in a
/// sub-distribution (defined by @p sub_offset and @p sub_distribution)
GlobalTileIndex globalTileIndexFromSubDistribution(const GlobalElementIndex& sub_offset,
const Distribution& sub_distribution,
const GlobalTileIndex& sub_index) const noexcept {
DLAF_ASSERT(sub_index.isIn(sub_distribution.nrTiles()), sub_index, sub_distribution.nrTiles());
DLAF_ASSERT(isCompatibleSubDistribution(sub_offset, sub_distribution), "");
const GlobalTileIndex tile_offset = globalTileIndex(sub_offset);
return tile_offset + common::sizeFromOrigin(sub_index);
}

/// Returns the element offset within the tile in the current distribution corresponding to a tile
/// index @p sub_index in a sub-distribution (defined by @p sub_offset and @p sub_distribution)
TileElementIndex tileElementOffsetFromSubDistribution(
const GlobalElementIndex& sub_offset, const Distribution& sub_distribution,
const GlobalTileIndex& sub_index) const noexcept {
DLAF_ASSERT(sub_index.isIn(sub_distribution.nrTiles()), sub_index, sub_distribution.nrTiles());
DLAF_ASSERT(isCompatibleSubDistribution(sub_offset, sub_distribution), "");
return {
sub_index.row() == 0 ? tileElementFromGlobalElement<Coord::Row>(sub_offset.row()) : 0,
sub_index.col() == 0 ? tileElementFromGlobalElement<Coord::Col>(sub_offset.col()) : 0,
};
}

private:
/// @pre block_size_, and tile_size_ are already set correctly.
template <Coord rc>
Expand Down Expand Up @@ -564,6 +602,25 @@ class Distribution {
/// @post offset_.row() < block_size_.rows() && offset_.col() < block_size_.cols()
void normalizeSourceRankAndOffset() noexcept;

/// Checks if another distribution is a compatible sub-distribution of the current distribution.
///
/// Compatible means that the block size, tile size, rank index, and grid size are equal.
/// Sub-distribution means that the source rank index of the sub-distribution is the rank index
/// of the tile at sub_offset in the current distribution. Additionally, the size and offset of
/// the sub-distribution must be within the size of the current distribution.
bool isCompatibleSubDistribution(const GlobalElementIndex& sub_offset,
const Distribution& sub_distribution) const noexcept {
const bool compatibleGrid = blockSize() == sub_distribution.blockSize() &&
baseTileSize() == sub_distribution.baseTileSize() &&
rankIndex() == sub_distribution.rankIndex() &&
commGridSize() == sub_distribution.commGridSize();
const bool compatibleSourceRankIndex =
rankGlobalTile(globalTileIndex(sub_offset)) == sub_distribution.sourceRankIndex();
const bool compatibleSize = sub_offset.row() + sub_distribution.size().rows() <= size().rows() &&
sub_offset.col() + sub_distribution.size().cols() <= size().cols();
return compatibleGrid && compatibleSourceRankIndex && compatibleSize;
}

/// Sets default values.
///
/// offset_ = {0, 0}
Expand Down
194 changes: 194 additions & 0 deletions include/dlaf/matrix/matrix_ref.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
//
// Distributed Linear Algebra with Future (DLAF)
//
// Copyright (c) 2018-2023, ETH Zurich
// All rights reserved.
//
// Please, refer to the LICENSE file in the root directory.
// SPDX-License-Identifier: BSD-3-Clause
//

#pragma once

/// @file

#include <dlaf/matrix/distribution.h>
#include <dlaf/matrix/matrix.h>
#include <dlaf/matrix/matrix_base.h>
#include <dlaf/matrix/tile.h>
#include <dlaf/types.h>

namespace dlaf::matrix::internal {
/// Contains information to create a sub-matrix.
using SubMatrixSpec = SubDistributionSpec;

/// A @c MatrixRef represents a sub-matrix of a @c Matrix.
///
/// The class has reference semantics, meaning accesses to a @c MatrixRef and
/// it's corresponding @c Matrix are interleaved if calls to read/readwrite are
/// interleaved. Access to a @c MatrixRef and its corresponding @c Matrix is not
/// thread-safe. A @c MatrixRef must outlive its corresponding @c Matrix.
template <class T, Device D>
class MatrixRef;

template <class T, Device D>
class MatrixRef<const T, D> : public internal::MatrixBase {
public:
static constexpr Device device = D;

using ElementType = T;
using TileType = Tile<ElementType, D>;
using ConstTileType = Tile<const ElementType, D>;
using TileDataType = internal::TileData<ElementType, D>;
using ReadOnlySenderType = ReadOnlyTileSender<T, D>;

/// Create a sub-matrix of @p mat specified by @p spec.
///
/// @param[in] mat is the input matrix,
/// @param[in] spec contains the origin and size of the new matrix relative to the input matrix,
/// @pre spec.origin.isValid(),
/// @pre spec.size.isValid(),
/// @pre spec.origin + spec.size <= mat.size().
MatrixRef(Matrix<const T, D>& mat, const SubMatrixSpec& spec)
: internal::MatrixBase(Distribution(mat.distribution(), spec)), mat_const_(mat),
origin_(spec.origin) {}

MatrixRef() = delete;
MatrixRef(MatrixRef&&) = delete;
MatrixRef(const MatrixRef&) = delete;
MatrixRef& operator=(MatrixRef&&) = delete;
MatrixRef& operator=(const MatrixRef&) = delete;

/// Returns a read-only sender of the Tile with local index @p index.
///
/// @pre index.isIn(distribution().localNrTiles()).
ReadOnlySenderType read(const LocalTileIndex& index) noexcept {
// Note: this forwards to the overload with GlobalTileIndex which will
// handle taking a subtile if needed
return read(distribution().globalTileIndex(index));
}

/// Returns a read-only sender of the Tile with global index @p index.
///
/// @pre the global tile is stored in the current process,
/// @pre index.isIn(globalNrTiles()).
ReadOnlySenderType read(const GlobalTileIndex& index) {
DLAF_ASSERT(index.isIn(distribution().nrTiles()), index, distribution().nrTiles());

const auto parent_index(
mat_const_.distribution().globalTileIndexFromSubDistribution(origin_, distribution(), index));
auto tile_sender = mat_const_.read(parent_index);

const auto parent_dist = mat_const_.distribution();
const auto parent_tile_size = parent_dist.tileSize(parent_index);
const auto tile_size = tileSize(index);

// If the corresponding tile in the parent distribution is exactly the same
// size as the tile in the sub-distribution, we don't need to take a subtile
// and can return the tile sender directly. This avoids unnecessary wrapping.
if (parent_tile_size == tile_size) {
return tile_sender;
}

// Otherwise we have to extract a subtile from the tile in the parent
// distribution.
const auto ij_tile =
parent_dist.tileElementOffsetFromSubDistribution(origin_, distribution(), index);
return splitTile(std::move(tile_sender), SubTileSpec{ij_tile, tile_size});
}

private:
Matrix<const T, D>& mat_const_;

protected:
GlobalElementIndex origin_;
};

template <class T, Device D>
class MatrixRef : public MatrixRef<const T, D> {
public:
static constexpr Device device = D;

using ElementType = T;
using TileType = Tile<ElementType, D>;
using ConstTileType = Tile<const ElementType, D>;
using TileDataType = internal::TileData<ElementType, D>;
using ReadWriteSenderType = ReadWriteTileSender<T, D>;

/// Create a sub-matrix of @p mat specified by @p spec.
///
/// @param[in] mat is the input matrix,
/// @param[in] spec contains the origin and size of the new matrix relative to the input matrix,
/// @pre spec.origin.isValid(),
/// @pre spec.size.isValid(),
/// @pre spec.origin + spec.size <= mat.size().
MatrixRef(Matrix<T, D>& mat, const SubMatrixSpec& spec)
: MatrixRef<const T, D>(mat, spec), mat_(mat) {}

MatrixRef() = delete;
MatrixRef(MatrixRef&&) = delete;
MatrixRef(const MatrixRef&) = delete;
MatrixRef& operator=(MatrixRef&&) = delete;
MatrixRef& operator=(const MatrixRef&) = delete;

/// Returns a sender of the Tile with local index @p index.
///
/// @pre index.isIn(distribution().localNrTiles()).
ReadWriteSenderType readwrite(const LocalTileIndex& index) noexcept {
// Note: this forwards to the overload with GlobalTileIndex which will
// handle taking a subtile if needed
return readwrite(this->distribution().globalTileIndex(index));
}

/// Returns a sender of the Tile with global index @p index.
///
/// @pre the global tile is stored in the current process,
/// @pre index.isIn(globalNrTiles()).
ReadWriteSenderType readwrite(const GlobalTileIndex& index) {
DLAF_ASSERT(index.isIn(this->distribution().nrTiles()), index, this->distribution().nrTiles());

const auto parent_index(
mat_.distribution().globalTileIndexFromSubDistribution(origin_, this->distribution(), index));
auto tile_sender = mat_.readwrite(parent_index);

const auto parent_dist = mat_.distribution();
const auto parent_tile_size = parent_dist.tileSize(parent_index);
const auto tile_size = this->tileSize(index);

// If the corresponding tile in the parent distribution is exactly the same
// size as the tile in the sub-distribution, we don't need to take a subtile
// and can return the tile sender directly. This avoids unnecessary wrapping.
if (parent_tile_size == tile_size) {
return tile_sender;
}

// Otherwise we have to extract a subtile from the tile in the parent
// distribution.
const auto ij_tile =
parent_dist.tileElementOffsetFromSubDistribution(origin_, this->distribution(), index);
return splitTile(std::move(tile_sender), SubTileSpec{ij_tile, tile_size});
}

private:
Matrix<T, D>& mat_;
using MatrixRef<const T, D>::origin_;
};

// ETI

#define DLAF_MATRIX_REF_ETI(KWORD, DATATYPE, DEVICE) \
KWORD template class MatrixRef<DATATYPE, DEVICE>; \
KWORD template class MatrixRef<const DATATYPE, DEVICE>;

DLAF_MATRIX_REF_ETI(extern, float, Device::CPU)
DLAF_MATRIX_REF_ETI(extern, double, Device::CPU)
DLAF_MATRIX_REF_ETI(extern, std::complex<float>, Device::CPU)
DLAF_MATRIX_REF_ETI(extern, std::complex<double>, Device::CPU)

#if defined(DLAF_WITH_GPU)
DLAF_MATRIX_REF_ETI(extern, float, Device::GPU)
DLAF_MATRIX_REF_ETI(extern, double, Device::GPU)
DLAF_MATRIX_REF_ETI(extern, std::complex<float>, Device::GPU)
DLAF_MATRIX_REF_ETI(extern, std::complex<double>, Device::GPU)
#endif
}
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@ DLAF_addSublibrary(
init.cpp
matrix/distribution.cpp
matrix/layout_info.cpp
matrix/matrix_ref.cpp
matrix/tile.cpp
matrix.cpp
matrix_mirror.cpp
Expand Down
13 changes: 13 additions & 0 deletions src/matrix/distribution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,19 @@ Distribution& Distribution::operator=(Distribution&& rhs) noexcept {
return *this;
}

Distribution::Distribution(Distribution rhs, const SubDistributionSpec& spec)
: Distribution(std::move(rhs)) {
DLAF_ASSERT(spec.origin.isValid(), spec.origin);
DLAF_ASSERT(spec.size.isValid(), spec.size);
DLAF_ASSERT(spec.origin.row() + spec.size.rows() <= size_.rows(), spec.origin, spec.size, size_);
DLAF_ASSERT(spec.origin.col() + spec.size.cols() <= size_.cols(), spec.origin, spec.size, size_);

offset_ = offset_ + sizeFromOrigin(spec.origin);
size_ = spec.size;

computeGlobalAndLocalNrTilesAndLocalSize();
}

void Distribution::computeGlobalSizeForNonDistr() noexcept {
size_ = GlobalElementSize(local_size_.rows(), local_size_.cols());
}
Expand Down
26 changes: 26 additions & 0 deletions src/matrix/matrix_ref.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
//
// Distributed Linear Algebra with Future (DLAF)
//
// Copyright (c) 2018-2023, ETH Zurich
// All rights reserved.
//
// Please, refer to the LICENSE file in the root directory.
// SPDX-License-Identifier: BSD-3-Clause
//

#include <dlaf/matrix/matrix_ref.h>

namespace dlaf::matrix::internal {

DLAF_MATRIX_REF_ETI(, float, Device::CPU)
DLAF_MATRIX_REF_ETI(, double, Device::CPU)
DLAF_MATRIX_REF_ETI(, std::complex<float>, Device::CPU)
DLAF_MATRIX_REF_ETI(, std::complex<double>, Device::CPU)

#if defined(DLAF_WITH_GPU)
DLAF_MATRIX_REF_ETI(, float, Device::GPU)
DLAF_MATRIX_REF_ETI(, double, Device::GPU)
DLAF_MATRIX_REF_ETI(, std::complex<float>, Device::GPU)
DLAF_MATRIX_REF_ETI(, std::complex<double>, Device::GPU)
#endif
}
8 changes: 8 additions & 0 deletions test/unit/matrix/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,14 @@ DLAF_addTest(
MPIRANKS 6
)

DLAF_addTest(
test_matrix_ref
SOURCES test_matrix_ref.cpp
LIBRARIES dlaf.core
USE_MAIN MPIPIKA
MPIRANKS 6
)

DLAF_addTest(
test_panel
SOURCES test_panel.cpp
Expand Down
Loading

0 comments on commit 7114b18

Please sign in to comment.