-
Notifications
You must be signed in to change notification settings - Fork 14
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
add miniapp triangular multiplication #1029
Merged
Merged
Changes from all commits
Commits
Show all changes
17 commits
Select commit
Hold shift + click to select a range
bd7db61
add new file, copy miniapp_solver's file in miniapp_multiplication's …
29b9dc5
change include in triangular multiplication
7b7b483
add trmm inside miniapps.py
07a1889
remove four lines of code view on gitHub
4ebc181
add trmm file
ea075c3
refactor code
fa98f75
refactor code v.2
5774763
refactor code v3
03e96ac
refactor code v4
ff628b7
definitive
2e86c08
add include
16806f8
refactor code about the name of variables
3073502
refactor
a351087
complete
7788497
change in miniapp_triangular solver blas::Side::Left
8b116f8
remove const auto side = opts.side;
3fc72f4
remove another line of const auto side = opts.side; in miniapp multip…
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,288 @@ | ||
// | ||
// Distributed Linear Algebra with Future (DLAF) | ||
// | ||
// Copyright (c) 2018-2023, ETH Zurich | ||
// All rights reserved. | ||
// | ||
// Please, refer to the LICENSE file in the root directory. | ||
// SPDX-License-Identifier: BSD-3-Clause | ||
// | ||
|
||
#include <cstdlib> | ||
#include <iostream> | ||
#include <limits> | ||
#include <type_traits> | ||
|
||
#include <blas/util.hh> | ||
#include <mpi.h> | ||
|
||
#include <pika/init.hpp> | ||
#include <pika/program_options.hpp> | ||
#include <pika/runtime.hpp> | ||
|
||
#include <dlaf/common/format_short.h> | ||
#include <dlaf/common/index2d.h> | ||
#include <dlaf/common/range2d.h> | ||
#include <dlaf/common/timer.h> | ||
#include <dlaf/communication/communicator_grid.h> | ||
#include <dlaf/communication/init.h> | ||
#include <dlaf/init.h> | ||
#include <dlaf/matrix/copy.h> | ||
#include <dlaf/matrix/index.h> | ||
#include <dlaf/matrix/matrix_mirror.h> | ||
#include <dlaf/miniapp/dispatch.h> | ||
#include <dlaf/miniapp/options.h> | ||
#include <dlaf/multiplication/triangular.h> | ||
#include <dlaf/types.h> | ||
#include <dlaf/util_matrix.h> | ||
|
||
namespace { | ||
|
||
using dlaf::Backend; | ||
using dlaf::DefaultDevice_v; | ||
using dlaf::Device; | ||
using dlaf::GlobalElementIndex; | ||
using dlaf::GlobalElementSize; | ||
using dlaf::SizeType; | ||
using dlaf::TileElementSize; | ||
using dlaf::comm::Communicator; | ||
using dlaf::comm::CommunicatorGrid; | ||
using dlaf::common::Ordering; | ||
|
||
struct Options | ||
: dlaf::miniapp::MiniappOptions<dlaf::miniapp::SupportReal::Yes, dlaf::miniapp::SupportComplex::No> { | ||
SizeType m; | ||
SizeType n; | ||
SizeType mb; | ||
SizeType nb; | ||
blas::Side side; | ||
blas::Uplo uplo; | ||
blas::Op op; | ||
blas::Diag diag; | ||
|
||
Options(const pika::program_options::variables_map& vm) | ||
: MiniappOptions(vm), m(vm["m"].as<SizeType>()), n(vm["n"].as<SizeType>()), | ||
mb(vm["mb"].as<SizeType>()), nb(vm["nb"].as<SizeType>()), | ||
side(dlaf::miniapp::parseSide(vm["side"].as<std::string>())), | ||
uplo(dlaf::miniapp::parseUplo(vm["uplo"].as<std::string>())), | ||
op(dlaf::miniapp::parseOp(vm["op"].as<std::string>())), | ||
diag(dlaf::miniapp::parseDiag(vm["diag"].as<std::string>())) { | ||
DLAF_ASSERT(m > 0 && n > 0, m, n); | ||
DLAF_ASSERT(mb > 0 && nb > 0, mb, nb); | ||
|
||
if (do_check != dlaf::miniapp::CheckIterFreq::None) { | ||
std::cerr << "Warning! At the moment result checking it is not implemented." << std::endl; | ||
do_check = dlaf::miniapp::CheckIterFreq::None; | ||
} | ||
} | ||
|
||
Options(Options&&) = default; | ||
Options(const Options&) = default; | ||
Options& operator=(Options&&) = default; | ||
Options& operator=(const Options&) = default; | ||
}; | ||
|
||
template <typename T> | ||
using matrix_values_t = std::function<T(const GlobalElementIndex&)>; | ||
|
||
template <typename T> | ||
using linear_system_t = | ||
std::tuple<matrix_values_t<T>, matrix_values_t<T>, matrix_values_t<T>>; // A, B, X | ||
|
||
template <typename T> | ||
linear_system_t<T> sampleLeftTr(blas::Uplo uplo, blas::Op op, blas::Diag diag, T alpha, SizeType m); | ||
} | ||
|
||
struct triangularMultiplicationMiniapp { | ||
template <dlaf::Backend backend, typename T> | ||
static void run(const Options& opts) { | ||
Communicator world(MPI_COMM_WORLD); | ||
CommunicatorGrid comm_grid(world, opts.grid_rows, opts.grid_cols, Ordering::ColumnMajor); | ||
|
||
// Allocate memory for the matrices | ||
dlaf::matrix::Matrix<T, Device::CPU> ah(GlobalElementSize{opts.m, opts.m}, | ||
TileElementSize{opts.mb, opts.mb}, comm_grid); | ||
dlaf::matrix::Matrix<T, Device::CPU> bh(GlobalElementSize{opts.m, opts.n}, | ||
TileElementSize{opts.mb, opts.nb}, comm_grid); | ||
|
||
dlaf::matrix::MatrixMirror<T, DefaultDevice_v<backend>, Device::CPU> a(ah); | ||
dlaf::matrix::MatrixMirror<T, DefaultDevice_v<backend>, Device::CPU> b(bh); | ||
|
||
auto sync_barrier = [&]() { | ||
a.get().waitLocalTiles(); | ||
b.get().waitLocalTiles(); | ||
DLAF_MPI_CHECK_ERROR(MPI_Barrier(world)); | ||
}; | ||
|
||
const auto side = opts.side; | ||
DLAF_ASSERT(side == blas::Side::Left, side); | ||
|
||
const auto uplo = opts.uplo; | ||
const auto op = opts.op; | ||
const auto diag = opts.diag; | ||
const T alpha = 2.0; | ||
|
||
double m = ah.size().rows(); | ||
double n = bh.size().cols(); | ||
auto add_mul = n * m * m / 2; | ||
const double total_ops = dlaf::total_ops<T>(add_mul, add_mul); | ||
|
||
auto [in_op_a, out_b, in_b] = ::sampleLeftTr(uplo, op, diag, alpha, ah.size().rows()); | ||
|
||
for (int64_t run_index = -opts.nwarmups; run_index < opts.nruns; ++run_index) { | ||
if (0 == world.rank() && run_index >= 0) | ||
std::cout << "[" << run_index << "]" << std::endl; | ||
|
||
// setup matrix A and b | ||
using dlaf::matrix::util::set; | ||
set(ah, in_op_a, op); | ||
set(bh, in_b); | ||
a.copySourceToTarget(); | ||
b.copySourceToTarget(); | ||
|
||
sync_barrier(); | ||
|
||
dlaf::common::Timer<> timeit; | ||
if (opts.local) | ||
dlaf::triangular_multiplication<backend, dlaf::DefaultDevice_v<backend>, T>(side, uplo, op, diag, | ||
alpha, a.get(), | ||
b.get()); | ||
else | ||
dlaf::triangular_multiplication<backend, dlaf::DefaultDevice_v<backend>, T>( | ||
comm_grid, side, uplo, op, diag, alpha, a.get(), b.get()); | ||
|
||
sync_barrier(); | ||
|
||
// benchmark results | ||
if (0 == world.rank() && run_index >= 0) { | ||
auto elapsed_time = timeit.elapsed(); | ||
double gigaflops = total_ops / elapsed_time / 1e9; | ||
|
||
std::cout << "[" << run_index << "]" | ||
<< " " << elapsed_time << "s" | ||
<< " " << gigaflops << "GFlop/s" | ||
<< " " << dlaf::internal::FormatShort{opts.type} | ||
<< dlaf::internal::FormatShort{opts.side} << dlaf::internal::FormatShort{opts.uplo} | ||
<< dlaf::internal::FormatShort{opts.op} << dlaf::internal::FormatShort{opts.diag} | ||
<< " " << bh.size() << " " << bh.blockSize() << " " << comm_grid.size() << " " | ||
<< pika::get_os_thread_count() << " " << backend << std::endl; | ||
} | ||
|
||
b.copyTargetToSource(); | ||
|
||
// (optional) run test | ||
if ((opts.do_check == dlaf::miniapp::CheckIterFreq::Last && run_index == (opts.nruns - 1)) || | ||
opts.do_check == dlaf::miniapp::CheckIterFreq::All) { | ||
DLAF_UNIMPLEMENTED("Check"); | ||
} | ||
} | ||
} | ||
}; | ||
|
||
int pika_main(pika::program_options::variables_map& vm) { | ||
pika::scoped_finalize pika_finalizer; | ||
dlaf::ScopedInitializer init(vm); | ||
|
||
const Options opts(vm); | ||
dlaf::miniapp::dispatchMiniapp<triangularMultiplicationMiniapp>(opts); | ||
|
||
return EXIT_SUCCESS; | ||
} | ||
|
||
int main(int argc, char** argv) { | ||
dlaf::comm::mpi_init mpi_initter(argc, argv); | ||
|
||
// options | ||
using namespace pika::program_options; | ||
options_description desc_commandline( | ||
"Benchmark computation of A . B, " | ||
"where A is a non-unit lower triangular matrix, and B is an m by n matrix\n\n" | ||
"options\n" | ||
"Usage: miniapp_triangular_multiplication [options]"); | ||
desc_commandline.add(dlaf::miniapp::getMiniappOptionsDescription()); | ||
desc_commandline.add(dlaf::getOptionsDescription()); | ||
|
||
// clang-format off | ||
desc_commandline.add_options() | ||
("m", value<SizeType>() ->default_value(4096), "Matrix b rows") | ||
("n", value<SizeType>() ->default_value(512), "Matrix b columns") | ||
("mb", value<SizeType>() ->default_value(256), "Matrix b block rows") | ||
("nb", value<SizeType>() ->default_value(512), "Matrix b block columns") | ||
; | ||
// clang-format on | ||
dlaf::miniapp::addSideOption(desc_commandline); | ||
dlaf::miniapp::addUploOption(desc_commandline); | ||
dlaf::miniapp::addOpOption(desc_commandline); | ||
dlaf::miniapp::addDiagOption(desc_commandline); | ||
|
||
pika::init_params p; | ||
p.desc_cmdline = desc_commandline; | ||
p.rp_callback = dlaf::initResourcePartitionerHandler; | ||
return pika::init(pika_main, argc, argv, p); | ||
} | ||
|
||
namespace { | ||
/// Returns a tuple of element generators of three matrices A(m x m), B (m x n), X (m x n), for which it | ||
/// holds op(A) X = alpha B (alpha can be any value). | ||
rasolca marked this conversation as resolved.
Show resolved
Hide resolved
|
||
/// | ||
/// The elements of op(A) (@p el_op_a) are chosen such that: | ||
/// op(A)_ik = (i+1) / (k+.5) * exp(I*(2*i-k)) for the referenced elements | ||
/// op(A)_ik = -9.9 otherwise, | ||
/// where I = 0 for real types or I is the complex unit for complex types. | ||
/// | ||
/// The elements of X (@p el_x) are computed as | ||
/// X_kj = (k+.5) / (j+2) * exp(I*(k+j)). | ||
/// These data are typically used to check whether the result of the equation | ||
/// performed with any algorithm is consistent with the computed values. | ||
/// | ||
/// Finally, the elements of B (@p el_b) should be: | ||
/// B_ij = (Sum_k op(A)_ik * X_kj) / alpha | ||
/// = (op(A)_ii * X_ij + (kk-1) * gamma) / alpha, | ||
/// where gamma = (i+1) / (j+2) * exp(I*(2*i+j)), | ||
/// kk = i+1 if op(a) is an lower triangular matrix, or | ||
/// kk = m-i if op(a) is an lower triangular matrix. | ||
/// Therefore | ||
/// B_ij = (X_ij + (kk-1) * gamma) / alpha, if diag == Unit | ||
/// B_ij = kk * gamma / alpha, otherwise. | ||
template <typename T> | ||
linear_system_t<T> sampleLeftTr(blas::Uplo uplo, blas::Op op, blas::Diag diag, T alpha, SizeType m) { | ||
Comment on lines
+247
to
+248
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note to reviewers: This is code duplication, but it will disappear in favor of random matrices to be able to:
|
||
static_assert(std::is_arithmetic_v<T> && !std::is_integral_v<T>, | ||
"it is valid just with floating point values"); | ||
|
||
bool op_a_lower = (uplo == blas::Uplo::Lower && op == blas::Op::NoTrans) || | ||
(uplo == blas::Uplo::Upper && op != blas::Op::NoTrans); | ||
|
||
auto el_op_a = [op_a_lower, diag](const GlobalElementIndex& index) -> T { | ||
if ((op_a_lower && index.row() < index.col()) || (!op_a_lower && index.row() > index.col()) || | ||
(diag == blas::Diag::Unit && index.row() == index.col())) | ||
return static_cast<T>(-9.9); | ||
|
||
const T i = index.row(); | ||
const T k = index.col(); | ||
|
||
return (i + static_cast<T>(1)) / (k + static_cast<T>(.5)); | ||
}; | ||
|
||
auto el_x = [](const GlobalElementIndex& index) -> T { | ||
const T k = index.row(); | ||
const T j = index.col(); | ||
|
||
return (k + static_cast<T>(.5)) / (j + static_cast<T>(2)); | ||
}; | ||
|
||
auto el_b = [m, alpha, diag, op_a_lower, el_x](const GlobalElementIndex& index) -> T { | ||
const dlaf::BaseType<T> kk = op_a_lower ? index.row() + 1 : m - index.row(); | ||
|
||
const T i = index.row(); | ||
const T j = index.col(); | ||
const T gamma = (i + 1) / (j + 2); | ||
if (diag == blas::Diag::Unit) | ||
return ((kk - 1) * gamma + el_x(index)) / alpha; | ||
else | ||
return kk * gamma / alpha; | ||
}; | ||
|
||
return {el_op_a, el_b, el_x}; | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
#!/usr/bin/env python3 | ||
# coding: utf-8 | ||
|
||
# | ||
# Distributed Linear Algebra with Future (DLAF) | ||
# | ||
# Copyright (c) 2018-2023, ETH Zurich | ||
# All rights reserved. | ||
# | ||
# Please, refer to the LICENSE file in the root directory. | ||
# SPDX-License-Identifier: BSD-3-Clause | ||
# | ||
|
||
import postprocess as pp | ||
|
||
df = pp.parse_jobs_cmdargs(description="Plot trmm strong scaling benchmarks.") | ||
|
||
df_grp = pp.calc_trmm_metrics(df) | ||
pp.gen_trmm_plots_strong(df_grp) | ||
pp.gen_trmm_plots_strong(df_grp, combine_mb=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
#!/usr/bin/env python3 | ||
# coding: utf-8 | ||
|
||
# | ||
# Distributed Linear Algebra with Future (DLAF) | ||
# | ||
# Copyright (c) 2018-2023, ETH Zurich | ||
# All rights reserved. | ||
# | ||
# Please, refer to the LICENSE file in the root directory. | ||
# SPDX-License-Identifier: BSD-3-Clause | ||
# | ||
|
||
import postprocess as pp | ||
|
||
df = pp.parse_jobs_cmdargs(description="Plot trmm weak scaling benchmarks.") | ||
|
||
df_grp = pp.calc_trmm_metrics(df) | ||
pp.gen_trmm_plots_weak(df_grp, 1024, logx=True) | ||
pp.gen_trmm_plots_weak(df_grp, 1024, logx=True, combine_mb=True) |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we need to assert the value of this parameter, given that we have
sampleLeftTr
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, it is needed here and in the triangular solver miniapp as well. Currently the matrix a size is (m x m), so Side::Right fails unless m == n.