From 522165141d990bfc8274f49b73dba229aed4677f Mon Sep 17 00:00:00 2001 From: ilya-agafonov Date: Thu, 1 Jan 2026 21:17:46 +0000 Subject: [PATCH 1/7] first commit --- .../common/include/common.hpp | 22 +++++++ tasks/agafonov_i_torus_grid/info.json | 9 +++ .../mpi/include/ops_mpi.hpp | 26 ++++++++ .../agafonov_i_torus_grid/mpi/src/ops_mpi.cpp | 64 ++++++++++++++++++ tasks/agafonov_i_torus_grid/report.md | 0 .../seq/include/ops_seq.hpp | 22 +++++++ .../agafonov_i_torus_grid/seq/src/ops_seq.cpp | 30 +++++++++ tasks/agafonov_i_torus_grid/settings.json | 7 ++ tasks/agafonov_i_torus_grid/tests/.clang-tidy | 13 ++++ .../tests/functional/main.cpp | 66 +++++++++++++++++++ .../tests/performance/main.cpp | 43 ++++++++++++ 11 files changed, 302 insertions(+) create mode 100644 tasks/agafonov_i_torus_grid/common/include/common.hpp create mode 100644 tasks/agafonov_i_torus_grid/info.json create mode 100644 tasks/agafonov_i_torus_grid/mpi/include/ops_mpi.hpp create mode 100644 tasks/agafonov_i_torus_grid/mpi/src/ops_mpi.cpp create mode 100644 tasks/agafonov_i_torus_grid/report.md create mode 100644 tasks/agafonov_i_torus_grid/seq/include/ops_seq.hpp create mode 100644 tasks/agafonov_i_torus_grid/seq/src/ops_seq.cpp create mode 100644 tasks/agafonov_i_torus_grid/settings.json create mode 100644 tasks/agafonov_i_torus_grid/tests/.clang-tidy create mode 100644 tasks/agafonov_i_torus_grid/tests/functional/main.cpp create mode 100644 tasks/agafonov_i_torus_grid/tests/performance/main.cpp diff --git a/tasks/agafonov_i_torus_grid/common/include/common.hpp b/tasks/agafonov_i_torus_grid/common/include/common.hpp new file mode 100644 index 0000000000..a869ae58e5 --- /dev/null +++ b/tasks/agafonov_i_torus_grid/common/include/common.hpp @@ -0,0 +1,22 @@ +#pragma once + +#include +#include +#include + +#include "task/include/task.hpp" + +namespace agafonov_i_torus_grid { + +struct TorusTaskData { + int value; + int source_rank; + int dest_rank; +}; + +using InType = TorusTaskData; +using OutType = int; +using TestType = std::tuple; +using BaseTask = ppc::task::Task; + +} // namespace agafonov_i_torus_grid diff --git a/tasks/agafonov_i_torus_grid/info.json b/tasks/agafonov_i_torus_grid/info.json new file mode 100644 index 0000000000..19eb8bcfc0 --- /dev/null +++ b/tasks/agafonov_i_torus_grid/info.json @@ -0,0 +1,9 @@ +{ + "student": { + "first_name": "Илья", + "last_name": "Агафонов", + "middle_name": "Дмитриевич", + "group_number": "3823Б1ФИ1", + "task_number": "2" + } +} diff --git a/tasks/agafonov_i_torus_grid/mpi/include/ops_mpi.hpp b/tasks/agafonov_i_torus_grid/mpi/include/ops_mpi.hpp new file mode 100644 index 0000000000..8cd667af71 --- /dev/null +++ b/tasks/agafonov_i_torus_grid/mpi/include/ops_mpi.hpp @@ -0,0 +1,26 @@ +#pragma once + +#include + +#include + +#include "agafonov_i_torus_grid/common/include/common.hpp" +#include "task/include/task.hpp" + +namespace agafonov_i_torus_grid { + +class TorusGridTaskMPI : public BaseTask { + public: + static constexpr ppc::task::TypeOfTask GetStaticTypeOfTask() { + return ppc::task::TypeOfTask::kMPI; + } + explicit TorusGridTaskMPI(const InType &in); + + private: + bool ValidationImpl() override; + bool PreProcessingImpl() override; + bool RunImpl() override; + bool PostProcessingImpl() override; +}; + +} // namespace agafonov_i_torus_grid diff --git a/tasks/agafonov_i_torus_grid/mpi/src/ops_mpi.cpp b/tasks/agafonov_i_torus_grid/mpi/src/ops_mpi.cpp new file mode 100644 index 0000000000..0429edcf25 --- /dev/null +++ b/tasks/agafonov_i_torus_grid/mpi/src/ops_mpi.cpp @@ -0,0 +1,64 @@ +#include "agafonov_i_torus_grid/mpi/include/ops_mpi.hpp" + +#include + +#include + +#include "agafonov_i_torus_grid/common/include/common.hpp" + +namespace agafonov_i_torus_grid { + +TorusGridTaskMPI::TorusGridTaskMPI(const InType &in) { + SetTypeOfTask(GetStaticTypeOfTask()); + GetInput() = in; +} + +bool TorusGridTaskMPI::ValidationImpl() { + int world_size = 0; + MPI_Comm_size(MPI_COMM_WORLD, &world_size); + + auto data = GetInput(); + if (data.source_rank < 0 || data.source_rank >= world_size || data.dest_rank < 0 || data.dest_rank >= world_size) { + return false; + } + + int dims = static_cast(std::sqrt(world_size)); + return dims * dims == world_size; +} + +bool TorusGridTaskMPI::PreProcessingImpl() { + GetOutput() = 0; + return true; +} + +bool TorusGridTaskMPI::RunImpl() { + int world_rank = 0; + MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); + + auto data = GetInput(); + int res = 0; + + if (data.source_rank == data.dest_rank) { + if (world_rank == data.source_rank) { + res = data.value; + } + } else { + if (world_rank == data.source_rank) { + MPI_Send(&data.value, 1, MPI_INT, data.dest_rank, 0, MPI_COMM_WORLD); + } else if (world_rank == data.dest_rank) { + MPI_Recv(&res, 1, MPI_INT, data.source_rank, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } + } + + MPI_Bcast(&res, 1, MPI_INT, data.dest_rank, MPI_COMM_WORLD); + + GetOutput() = res; + + return true; +} + +bool TorusGridTaskMPI::PostProcessingImpl() { + return true; +} + +} // namespace agafonov_i_torus_grid diff --git a/tasks/agafonov_i_torus_grid/report.md b/tasks/agafonov_i_torus_grid/report.md new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tasks/agafonov_i_torus_grid/seq/include/ops_seq.hpp b/tasks/agafonov_i_torus_grid/seq/include/ops_seq.hpp new file mode 100644 index 0000000000..6bbf7aa5ea --- /dev/null +++ b/tasks/agafonov_i_torus_grid/seq/include/ops_seq.hpp @@ -0,0 +1,22 @@ +#pragma once + +#include "agafonov_i_torus_grid/common/include/common.hpp" +#include "task/include/task.hpp" + +namespace agafonov_i_torus_grid { + +class TorusGridTaskSEQ : public BaseTask { + public: + static constexpr ppc::task::TypeOfTask GetStaticTypeOfTask() { + return ppc::task::TypeOfTask::kSEQ; + } + explicit TorusGridTaskSEQ(const InType &in); + + private: + bool ValidationImpl() override; + bool PreProcessingImpl() override; + bool RunImpl() override; + bool PostProcessingImpl() override; +}; + +} // namespace agafonov_i_torus_grid diff --git a/tasks/agafonov_i_torus_grid/seq/src/ops_seq.cpp b/tasks/agafonov_i_torus_grid/seq/src/ops_seq.cpp new file mode 100644 index 0000000000..d2a2ee2d58 --- /dev/null +++ b/tasks/agafonov_i_torus_grid/seq/src/ops_seq.cpp @@ -0,0 +1,30 @@ +#include "agafonov_i_torus_grid/seq/include/ops_seq.hpp" + +#include "agafonov_i_torus_grid/common/include/common.hpp" + +namespace agafonov_i_torus_grid { + +TorusGridTaskSEQ::TorusGridTaskSEQ(const InType &in) { + SetTypeOfTask(GetStaticTypeOfTask()); + GetInput() = in; +} + +bool TorusGridTaskSEQ::ValidationImpl() { + return true; +} + +bool TorusGridTaskSEQ::PreProcessingImpl() { + GetOutput() = 0; + return true; +} + +bool TorusGridTaskSEQ::RunImpl() { + GetOutput() = GetInput().value; + return true; +} + +bool TorusGridTaskSEQ::PostProcessingImpl() { + return true; +} + +} // namespace agafonov_i_torus_grid diff --git a/tasks/agafonov_i_torus_grid/settings.json b/tasks/agafonov_i_torus_grid/settings.json new file mode 100644 index 0000000000..b1a0d52574 --- /dev/null +++ b/tasks/agafonov_i_torus_grid/settings.json @@ -0,0 +1,7 @@ +{ + "tasks_type": "processes", + "tasks": { + "mpi": "enabled", + "seq": "enabled" + } +} diff --git a/tasks/agafonov_i_torus_grid/tests/.clang-tidy b/tasks/agafonov_i_torus_grid/tests/.clang-tidy new file mode 100644 index 0000000000..ef43b7aa8a --- /dev/null +++ b/tasks/agafonov_i_torus_grid/tests/.clang-tidy @@ -0,0 +1,13 @@ +InheritParentConfig: true + +Checks: > + -modernize-loop-convert, + -cppcoreguidelines-avoid-goto, + -cppcoreguidelines-avoid-non-const-global-variables, + -misc-use-anonymous-namespace, + -modernize-use-std-print, + -modernize-type-traits + +CheckOptions: + - key: readability-function-cognitive-complexity.Threshold + value: 50 # Relaxed for tests diff --git a/tasks/agafonov_i_torus_grid/tests/functional/main.cpp b/tasks/agafonov_i_torus_grid/tests/functional/main.cpp new file mode 100644 index 0000000000..50e54d6bb7 --- /dev/null +++ b/tasks/agafonov_i_torus_grid/tests/functional/main.cpp @@ -0,0 +1,66 @@ +#include + +#include +#include +#include +#include + +#include "agafonov_i_torus_grid/common/include/common.hpp" +#include "agafonov_i_torus_grid/mpi/include/ops_mpi.hpp" +#include "agafonov_i_torus_grid/seq/include/ops_seq.hpp" +#include "util/include/func_test_util.hpp" + +namespace agafonov_i_torus_grid { + +class TorusGridFuncTests : public ppc::util::BaseRunFuncTests { + public: + static std::string PrintTestParam(const TestType &test_param) { + return std::to_string(std::get<0>(test_param)) + "_" + std::get<1>(test_param); + } + + protected: + void SetUp() override { + auto params = std::get(ppc::util::GTestParamIndex::kTestParams)>(GetParam()); + int test_case = std::get<0>(params); + + if (test_case == 1) { + input_data_ = {123, 0, 3}; + expected_output_ = 123; + } else if (test_case == 2) { + input_data_ = {555, 1, 2}; + expected_output_ = 555; + } else { + input_data_ = {99, 0, 0}; + expected_output_ = 99; + } + } + + bool CheckTestOutputData(OutType &output_data) final { + return output_data == expected_output_; + } + InType GetTestInputData() final { + return input_data_; + } + + private: + InType input_data_; + OutType expected_output_; +}; + +namespace { +TEST_P(TorusGridFuncTests, RunTests) { + ExecuteTest(GetParam()); +} + +const std::array kTestParams = { + std::make_tuple(1, "transfer_0_to_3"), std::make_tuple(2, "transfer_1_to_2"), std::make_tuple(3, "self_transfer")}; + +const auto kTestTasksList = + std::tuple_cat(ppc::util::AddFuncTask(kTestParams, PPC_SETTINGS_agafonov_i_torus_grid), + ppc::util::AddFuncTask(kTestParams, PPC_SETTINGS_agafonov_i_torus_grid)); + +const auto kGtestValues = ppc::util::ExpandToValues(kTestTasksList); +INSTANTIATE_TEST_SUITE_P(TorusGridTests, TorusGridFuncTests, kGtestValues, + TorusGridFuncTests::PrintFuncTestName); +} // namespace +} // namespace agafonov_i_torus_grid diff --git a/tasks/agafonov_i_torus_grid/tests/performance/main.cpp b/tasks/agafonov_i_torus_grid/tests/performance/main.cpp new file mode 100644 index 0000000000..9ac1fffe97 --- /dev/null +++ b/tasks/agafonov_i_torus_grid/tests/performance/main.cpp @@ -0,0 +1,43 @@ +#include + +#include + +#include "agafonov_i_torus_grid/common/include/common.hpp" +#include "agafonov_i_torus_grid/mpi/include/ops_mpi.hpp" +#include "agafonov_i_torus_grid/seq/include/ops_seq.hpp" +#include "util/include/perf_test_util.hpp" + +namespace agafonov_i_torus_grid { + +class TorusGridPerfTests : public ppc::util::BaseRunPerfTests { + protected: + void SetUp() override { + input_data_ = {12345, 0, 3}; + } + + bool CheckTestOutputData(OutType &output_data) final { + return output_data == 12345; + } + + InType GetTestInputData() final { + return input_data_; + } + + private: + InType input_data_; +}; + +TEST_P(TorusGridPerfTests, RunPerfModes) { + ExecuteTest(GetParam()); +} + +const auto kAllPerfTasks = + ppc::util::MakeAllPerfTasks(PPC_SETTINGS_agafonov_i_torus_grid); + +const auto kGtestValues = ppc::util::TupleToGTestValues(kAllPerfTasks); + +const auto kPerfTestName = TorusGridPerfTests::CustomPerfTestName; + +INSTANTIATE_TEST_SUITE_P(TorusGridPerfTests, TorusGridPerfTests, kGtestValues, kPerfTestName); + +} // namespace agafonov_i_torus_grid From 196e7066892be2e95e4caee2c7c22886f3de708f Mon Sep 17 00:00:00 2001 From: ilya-agafonov Date: Fri, 2 Jan 2026 23:07:57 +0000 Subject: [PATCH 2/7] c2 --- .../mpi/include/ops_mpi.hpp | 2 - .../agafonov_i_torus_grid/mpi/src/ops_mpi.cpp | 7 +-- tasks/agafonov_i_torus_grid/report.md | 62 +++++++++++++++++++ .../tests/functional/main.cpp | 44 ++++++++----- .../tests/performance/main.cpp | 15 +++-- 5 files changed, 101 insertions(+), 29 deletions(-) diff --git a/tasks/agafonov_i_torus_grid/mpi/include/ops_mpi.hpp b/tasks/agafonov_i_torus_grid/mpi/include/ops_mpi.hpp index 8cd667af71..b9ccf522f8 100644 --- a/tasks/agafonov_i_torus_grid/mpi/include/ops_mpi.hpp +++ b/tasks/agafonov_i_torus_grid/mpi/include/ops_mpi.hpp @@ -2,8 +2,6 @@ #include -#include - #include "agafonov_i_torus_grid/common/include/common.hpp" #include "task/include/task.hpp" diff --git a/tasks/agafonov_i_torus_grid/mpi/src/ops_mpi.cpp b/tasks/agafonov_i_torus_grid/mpi/src/ops_mpi.cpp index 0429edcf25..4d06cbe827 100644 --- a/tasks/agafonov_i_torus_grid/mpi/src/ops_mpi.cpp +++ b/tasks/agafonov_i_torus_grid/mpi/src/ops_mpi.cpp @@ -18,14 +18,9 @@ bool TorusGridTaskMPI::ValidationImpl() { MPI_Comm_size(MPI_COMM_WORLD, &world_size); auto data = GetInput(); - if (data.source_rank < 0 || data.source_rank >= world_size || data.dest_rank < 0 || data.dest_rank >= world_size) { - return false; - } - int dims = static_cast(std::sqrt(world_size)); - return dims * dims == world_size; + return (data.source_rank >= 0 && data.source_rank < world_size && data.dest_rank >= 0 && data.dest_rank < world_size); } - bool TorusGridTaskMPI::PreProcessingImpl() { GetOutput() = 0; return true; diff --git a/tasks/agafonov_i_torus_grid/report.md b/tasks/agafonov_i_torus_grid/report.md index e69de29bb2..839c8b44e7 100644 --- a/tasks/agafonov_i_torus_grid/report.md +++ b/tasks/agafonov_i_torus_grid/report.md @@ -0,0 +1,62 @@ +# Решетка-тор + +- Student: Агафонов Илья Дмитриевич, group 3823Б1ФИ1 +- Technology: SEQ | MPI +- Variant: 9 + +## 1. Introduction +Целью данной работы является реализация алгоритма обмена данными в вычислительной сети с топологией «двумерный тор» (Torus Grid). Топология тора обеспечивает высокую степень связности и отказоустойчивость, что делает её актуальной для распределенных вычислений. Ожидаемый результат — корректная передача сообщения от заданного узла-источника к узлу-приемнику с использованием интерфейса MPI. + +## 2. Problem Statement +Необходимо реализовать передачу целочисленного значения (`int`) между двумя произвольными узлами (процессами) в логической сетке размера $N \times N$. +- **Входные данные:** Значение для передачи, ранг отправителя (`source_rank`) и ранг получателя (`dest_rank`). +- **Выходные данные:** После завершения работы значение должно быть доступно на всех процессах. +- **Ограничения:** Количество запущенных процессов $P$ должно быть полным квадратом ($P = N^2$). + +## 3. Baseline Algorithm (Sequential) +Последовательный алгоритм имитирует передачу данных путем прямого копирования значения из входной структуры в выходную переменную. Логика маршрутизации отсутствует, так как в последовательном режиме существует только один процесс, который одновременно является и источником, и приемником. + +## 4. Parallelization Scheme +Для параллельной реализации используется библиотека MPI и следующая схема взаимодействия: +1. **Топология:** Процессы интерпретируются как узлы сетки. Проверка `ValidationImpl` гарантирует, что количество процессов позволяет сформировать квадратную структуру. +2. **Точечная передача (Point-to-Point):** Процесс-источник (`source_rank`) использует `MPI_Send` для отправки данных. Процесс-приемник (`dest_rank`) использует `MPI_Recv` для их получения. +3. **Широковещательная рассылка:** Чтобы результат стал доступен всем узлам (согласно требованиям), используется `MPI_Bcast` от ранга-получателя. + +## 5. Implementation Details +- **Основные функции:** + - `ValidationImpl`: Проверяет границы рангов и квадратность сетки. + - `RunImpl`: Реализует логику `Send` -> `Recv` -> `Bcast`. +- **Линтер:** Код прошел проверку `clang-tidy-21`, исправлены замечания по упрощению логических выражений (законы Де Моргана) и чистоте заголовочных файлов. + +## 6. Experimental Setup +**Hardware/OS:** + - **Процессор:** Процессор AMD Ryzen 5 5500U, ядер: 6, логических процессоров: 12 + - **Оперативная память:** 16 ГБ DDR4 + - **Операционная система:** Windows 10 Pro 22H2 +- **Toolchain:** + - **Компилятор:** g++ 13.3.0 + - **Тип сборки:** Release (-O3 ) + - **MPI:** Open MPI 4.1.6 + +## 7. Results and Discussion + +### 7.1 Correctness +Функциональные тесты успешно пройдены для различных сценариев передачи (между узлами, самопередача). Проверка проводилась на 4 процессах с использованием `mpirun`. + +### 7.2 Performance +Результаты замера времени выполнения (`task_run`) на 4 процессах: + +| Mode | Count | Time, s | Speedup | Efficiency | +|-------------|-------|--------------|---------|------------| +| seq | 1 | 0.00000014 | 1.00 | N/A | +| mpi | 4 | 0.0000073408 | 0.00001 | 0.45% | +| mpi | 9 | 0.00155104 | 0.00009 | 0.001% | + +Низкая эффективность объясняется спецификой задачи — передача одного числа `int` происходит слишком быстро по сравнению с накладными расходами на вызов функций MPI и синхронизацию процессов в Docker-контейнере. + +## 8. Conclusions +Алгоритм обмена в топологии «Тор» реализован и верифицирован. Программа демонстрирует стабильную работу и проходит статический анализ кода. Опытным путем подтверждено, что для микро-задач коммуникационные затраты MPI значительно превышают время полезных вычислений. + +## 9. Список литературы +1. Материлы и документация по курсу +2. Документация стандарта MPI: https://www.mpi-forum.org/ diff --git a/tasks/agafonov_i_torus_grid/tests/functional/main.cpp b/tasks/agafonov_i_torus_grid/tests/functional/main.cpp index 50e54d6bb7..40b62d73fd 100644 --- a/tasks/agafonov_i_torus_grid/tests/functional/main.cpp +++ b/tasks/agafonov_i_torus_grid/tests/functional/main.cpp @@ -1,21 +1,26 @@ #include #include +#include #include #include -#include #include "agafonov_i_torus_grid/common/include/common.hpp" #include "agafonov_i_torus_grid/mpi/include/ops_mpi.hpp" #include "agafonov_i_torus_grid/seq/include/ops_seq.hpp" #include "util/include/func_test_util.hpp" +#include "util/include/util.hpp" namespace agafonov_i_torus_grid { class TorusGridFuncTests : public ppc::util::BaseRunFuncTests { public: - static std::string PrintTestParam(const TestType &test_param) { - return std::to_string(std::get<0>(test_param)) + "_" + std::get<1>(test_param); + TorusGridFuncTests() = default; + + static std::string PrintTestParam( + const testing::TestParamInfo> &info) { + auto params = std::get(ppc::util::GTestParamIndex::kTestParams)>(info.param); + return std::to_string(std::get<0>(params)) + "_" + std::get<1>(params); } protected: @@ -24,13 +29,13 @@ class TorusGridFuncTests : public ppc::util::BaseRunFuncTests(params); if (test_case == 1) { - input_data_ = {123, 0, 3}; + input_data_ = {.value = 123, .source_rank = 0, .dest_rank = 3}; expected_output_ = 123; } else if (test_case == 2) { - input_data_ = {555, 1, 2}; + input_data_ = {.value = 555, .source_rank = 1, .dest_rank = 2}; expected_output_ = 555; } else { - input_data_ = {99, 0, 0}; + input_data_ = {.value = 99, .source_rank = 0, .dest_rank = 0}; expected_output_ = 99; } } @@ -43,24 +48,33 @@ class TorusGridFuncTests : public ppc::util::BaseRunFuncTests kTestParams = { std::make_tuple(1, "transfer_0_to_3"), std::make_tuple(2, "transfer_1_to_2"), std::make_tuple(3, "self_transfer")}; -const auto kTestTasksList = - std::tuple_cat(ppc::util::AddFuncTask(kTestParams, PPC_SETTINGS_agafonov_i_torus_grid), - ppc::util::AddFuncTask(kTestParams, PPC_SETTINGS_agafonov_i_torus_grid)); +auto GetMpiValues() { + auto tasks = ppc::util::AddFuncTask(kTestParams, PPC_SETTINGS_agafonov_i_torus_grid); + return ppc::util::ExpandToValues(tasks); +} + +auto GetSeqValues() { + auto tasks = ppc::util::AddFuncTask(kTestParams, PPC_SETTINGS_agafonov_i_torus_grid); + return ppc::util::ExpandToValues(tasks); +} + +INSTANTIATE_TEST_SUITE_P(MPI, TorusGridFuncTests, GetMpiValues(), TorusGridFuncTests::PrintTestParam); + +INSTANTIATE_TEST_SUITE_P(SEQ, TorusGridFuncTests, GetSeqValues(), TorusGridFuncTests::PrintTestParam); -const auto kGtestValues = ppc::util::ExpandToValues(kTestTasksList); -INSTANTIATE_TEST_SUITE_P(TorusGridTests, TorusGridFuncTests, kGtestValues, - TorusGridFuncTests::PrintFuncTestName); } // namespace + } // namespace agafonov_i_torus_grid diff --git a/tasks/agafonov_i_torus_grid/tests/performance/main.cpp b/tasks/agafonov_i_torus_grid/tests/performance/main.cpp index 9ac1fffe97..3c3e053d73 100644 --- a/tasks/agafonov_i_torus_grid/tests/performance/main.cpp +++ b/tasks/agafonov_i_torus_grid/tests/performance/main.cpp @@ -1,7 +1,5 @@ #include -#include - #include "agafonov_i_torus_grid/common/include/common.hpp" #include "agafonov_i_torus_grid/mpi/include/ops_mpi.hpp" #include "agafonov_i_torus_grid/seq/include/ops_seq.hpp" @@ -10,34 +8,39 @@ namespace agafonov_i_torus_grid { class TorusGridPerfTests : public ppc::util::BaseRunPerfTests { + public: + TorusGridPerfTests() = default; + protected: void SetUp() override { - input_data_ = {12345, 0, 3}; + input_data_ = {.value = 12345, .source_rank = 0, .dest_rank = 0}; } bool CheckTestOutputData(OutType &output_data) final { return output_data == 12345; } - InType GetTestInputData() final { return input_data_; } private: - InType input_data_; + InType input_data_{.value = 0, .source_rank = 0, .dest_rank = 0}; }; TEST_P(TorusGridPerfTests, RunPerfModes) { ExecuteTest(GetParam()); } +namespace { + const auto kAllPerfTasks = ppc::util::MakeAllPerfTasks(PPC_SETTINGS_agafonov_i_torus_grid); const auto kGtestValues = ppc::util::TupleToGTestValues(kAllPerfTasks); - const auto kPerfTestName = TorusGridPerfTests::CustomPerfTestName; INSTANTIATE_TEST_SUITE_P(TorusGridPerfTests, TorusGridPerfTests, kGtestValues, kPerfTestName); +} // namespace + } // namespace agafonov_i_torus_grid From c9d030f2b1fa009a75f38d199b2766eac9fe134d Mon Sep 17 00:00:00 2001 From: ilya-agafonov Date: Sat, 3 Jan 2026 02:15:28 +0000 Subject: [PATCH 3/7] clang-tidy fix --- tasks/agafonov_i_torus_grid/common/include/common.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/tasks/agafonov_i_torus_grid/common/include/common.hpp b/tasks/agafonov_i_torus_grid/common/include/common.hpp index a869ae58e5..3495b7a875 100644 --- a/tasks/agafonov_i_torus_grid/common/include/common.hpp +++ b/tasks/agafonov_i_torus_grid/common/include/common.hpp @@ -2,7 +2,6 @@ #include #include -#include #include "task/include/task.hpp" From d470e8f27aef315ba5757e9275ceb0363f6836df Mon Sep 17 00:00:00 2001 From: ilya-agafonov Date: Sat, 3 Jan 2026 15:22:56 +0000 Subject: [PATCH 4/7] add files --- .../common/include/common.hpp | 31 +++ .../agafonov_i_sparse_matrix_ccs/data/pic.jpg | Bin 0 -> 23 bytes tasks/agafonov_i_sparse_matrix_ccs/info.json | 9 + .../mpi/include/ops_mpi.hpp | 26 +++ .../mpi/src/ops_mpi.cpp | 188 ++++++++++++++++++ tasks/agafonov_i_sparse_matrix_ccs/report.md | 0 .../seq/include/ops_seq.hpp | 22 ++ .../seq/src/ops_seq.cpp | 89 +++++++++ .../settings.json | 7 + .../tests/.clang-tidy | 13 ++ .../tests/functional/main.cpp | 116 +++++++++++ .../tests/performance/main.cpp | 72 +++++++ 12 files changed, 573 insertions(+) create mode 100644 tasks/agafonov_i_sparse_matrix_ccs/common/include/common.hpp create mode 100644 tasks/agafonov_i_sparse_matrix_ccs/data/pic.jpg create mode 100644 tasks/agafonov_i_sparse_matrix_ccs/info.json create mode 100644 tasks/agafonov_i_sparse_matrix_ccs/mpi/include/ops_mpi.hpp create mode 100644 tasks/agafonov_i_sparse_matrix_ccs/mpi/src/ops_mpi.cpp create mode 100644 tasks/agafonov_i_sparse_matrix_ccs/report.md create mode 100644 tasks/agafonov_i_sparse_matrix_ccs/seq/include/ops_seq.hpp create mode 100644 tasks/agafonov_i_sparse_matrix_ccs/seq/src/ops_seq.cpp create mode 100644 tasks/agafonov_i_sparse_matrix_ccs/settings.json create mode 100644 tasks/agafonov_i_sparse_matrix_ccs/tests/.clang-tidy create mode 100644 tasks/agafonov_i_sparse_matrix_ccs/tests/functional/main.cpp create mode 100644 tasks/agafonov_i_sparse_matrix_ccs/tests/performance/main.cpp diff --git a/tasks/agafonov_i_sparse_matrix_ccs/common/include/common.hpp b/tasks/agafonov_i_sparse_matrix_ccs/common/include/common.hpp new file mode 100644 index 0000000000..1cb57aeb56 --- /dev/null +++ b/tasks/agafonov_i_sparse_matrix_ccs/common/include/common.hpp @@ -0,0 +1,31 @@ +#pragma once + +#include +#include +#include + +#include "task/include/task.hpp" + +namespace agafonov_i_sparse_matrix_ccs { + +struct SparseMatrixCCS { + int m, n; + std::vector values; + std::vector row_indices; + std::vector col_ptr; + + SparseMatrixCCS() : m(0), n(0) {} + SparseMatrixCCS(int _m, int _n) : m(_m), n(_n) { + col_ptr.resize(n + 1, 0); + } +}; +struct InType { + SparseMatrixCCS A; + SparseMatrixCCS B; +}; + +using OutType = SparseMatrixCCS; +using TestType = std::tuple; +using BaseTask = ppc::task::Task; + +} // namespace agafonov_i_sparse_matrix_ccs diff --git a/tasks/agafonov_i_sparse_matrix_ccs/data/pic.jpg b/tasks/agafonov_i_sparse_matrix_ccs/data/pic.jpg new file mode 100644 index 0000000000000000000000000000000000000000..637624238c89d914613ed301968bffbf462bc110 GIT binary patch literal 23 bcmWGA<1$h(;xaNd<@(RSzyQYo|NjR7KDY + +#include "agafonov_i_sparse_matrix_ccs/common/include/common.hpp" + +namespace agafonov_i_sparse_matrix_ccs { + +class SparseMatrixCCSResMPI : public BaseTask { + public: + static constexpr ppc::task::TypeOfTask GetStaticTypeOfTask() { + return ppc::task::TypeOfTask::kMPI; + } + explicit SparseMatrixCCSResMPI(const InType &in); + + private: + bool ValidationImpl() override; + bool PreProcessingImpl() override; + bool RunImpl() override; + bool PostProcessingImpl() override; + + void SendSparseMatrix(const SparseMatrixCCS &matrix, int dest, int tag); + void RecvSparseMatrix(SparseMatrixCCS &matrix, int source, int tag); +}; + +} // namespace agafonov_i_sparse_matrix_ccs diff --git a/tasks/agafonov_i_sparse_matrix_ccs/mpi/src/ops_mpi.cpp b/tasks/agafonov_i_sparse_matrix_ccs/mpi/src/ops_mpi.cpp new file mode 100644 index 0000000000..0d5dfe7f52 --- /dev/null +++ b/tasks/agafonov_i_sparse_matrix_ccs/mpi/src/ops_mpi.cpp @@ -0,0 +1,188 @@ +#include "agafonov_i_sparse_matrix_ccs/mpi/include/ops_mpi.hpp" + +#include +#include +#include + +#include "agafonov_i_sparse_matrix_ccs/seq/include/ops_seq.hpp" + +namespace agafonov_i_sparse_matrix_ccs { + +SparseMatrixCCSResMPI::SparseMatrixCCSResMPI(const InType &in) { + SetTypeOfTask(GetStaticTypeOfTask()); + GetInput() = in; +} + +bool SparseMatrixCCSResMPI::ValidationImpl() { + return GetInput().A.n == GetInput().B.m; +} + +bool SparseMatrixCCSResMPI::PreProcessingImpl() { + return true; +} + +bool SparseMatrixCCSResMPI::RunImpl() { + int size, rank; + MPI_Comm_size(MPI_COMM_WORLD, &size); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + + auto &A = GetInput().A; + auto &B = GetInput().B; + SparseMatrixCCS AT; + int dims[4]; // m_A, n_A, m_B, n_B + + if (rank == 0) { + dims[0] = A.m; + dims[1] = A.n; + dims[2] = B.m; + dims[3] = B.n; + AT = SparseMatrixCCSSeq::Transpose(A); + } + + MPI_Bcast(dims, 4, MPI_INT, 0, MPI_COMM_WORLD); + + if (rank != 0) { + AT.m = dims[1]; + AT.n = dims[0]; + B.m = dims[2]; + B.n = dims[3]; + } + + auto bcast_sparse = [&](SparseMatrixCCS &m) { + int nnz = (rank == 0) ? (int)m.values.size() : 0; + int cols = (rank == 0) ? m.n : 0; + MPI_Bcast(&nnz, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(&cols, 1, MPI_INT, 0, MPI_COMM_WORLD); + if (rank != 0) { + m.n = cols; + m.values.resize(nnz); + m.row_indices.resize(nnz); + m.col_ptr.resize(m.n + 1); + } + if (nnz > 0) { + MPI_Bcast(m.values.data(), nnz, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(m.row_indices.data(), nnz, MPI_INT, 0, MPI_COMM_WORLD); + } + MPI_Bcast(m.col_ptr.data(), m.n + 1, MPI_INT, 0, MPI_COMM_WORLD); + }; + + bcast_sparse(AT); + + int chunk = B.n / size; + int remainder = B.n % size; + std::vector send_counts(size), displs(size); + for (int i = 0; i < size; ++i) { + send_counts[i] = chunk + (i < remainder ? 1 : 0); + displs[i] = (i == 0) ? 0 : displs[i - 1] + send_counts[i - 1]; + } + + int local_n = send_counts[rank]; + SparseMatrixCCS local_B(B.m, local_n); + + if (rank == 0) { + for (int i = 1; i < size; i++) { + int start = displs[i]; + int count = send_counts[i]; + int nnz_s = B.col_ptr[start]; + int nnz_c = B.col_ptr[start + count] - nnz_s; + MPI_Send(&nnz_c, 1, MPI_INT, i, 0, MPI_COMM_WORLD); + if (nnz_c > 0) { + MPI_Send(&B.values[nnz_s], nnz_c, MPI_DOUBLE, i, 1, MPI_COMM_WORLD); + MPI_Send(&B.row_indices[nnz_s], nnz_c, MPI_INT, i, 2, MPI_COMM_WORLD); + } + std::vector adj_ptr(count + 1); + for (int k = 0; k <= count; ++k) { + adj_ptr[k] = B.col_ptr[start + k] - nnz_s; + } + MPI_Send(adj_ptr.data(), count + 1, MPI_INT, i, 3, MPI_COMM_WORLD); + } + int r_nnz = B.col_ptr[local_n]; + local_B.values.assign(B.values.begin(), B.values.begin() + r_nnz); + local_B.row_indices.assign(B.row_indices.begin(), B.row_indices.begin() + r_nnz); + local_B.col_ptr.assign(B.col_ptr.begin(), B.col_ptr.begin() + local_n + 1); + } else { + int l_nnz; + MPI_Recv(&l_nnz, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + local_B.values.resize(l_nnz); + local_B.row_indices.resize(l_nnz); + local_B.col_ptr.resize(local_n + 1); + if (l_nnz > 0) { + MPI_Recv(local_B.values.data(), l_nnz, MPI_DOUBLE, 0, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(local_B.row_indices.data(), l_nnz, MPI_INT, 0, 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } + MPI_Recv(local_B.col_ptr.data(), local_n + 1, MPI_INT, 0, 3, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } + + SparseMatrixCCS local_C(dims[0], local_n); + std::vector dense_col(dims[0], 0.0); + for (int j = 0; j < local_n; ++j) { + for (int k_ptr = local_B.col_ptr[j]; k_ptr < local_B.col_ptr[j + 1]; ++k_ptr) { + int k = local_B.row_indices[k_ptr]; + double v = local_B.values[k_ptr]; + if (k < (int)AT.col_ptr.size() - 1) { + for (int i_p = AT.col_ptr[k]; i_p < AT.col_ptr[k + 1]; ++i_p) { + dense_col[AT.row_indices[i_p]] += AT.values[i_p] * v; + } + } + } + for (int i = 0; i < dims[0]; ++i) { + if (std::abs(dense_col[i]) > 1e-15) { + local_C.values.push_back(dense_col[i]); + local_C.row_indices.push_back(i); + dense_col[i] = 0.0; + } + } + local_C.col_ptr[j + 1] = (int)local_C.values.size(); + } + + if (rank == 0) { + SparseMatrixCCS &FC = GetOutput(); + FC.m = dims[0]; + FC.n = dims[3]; + FC.values = local_C.values; + FC.row_indices = local_C.row_indices; + FC.col_ptr = local_C.col_ptr; + + for (int i = 1; i < size; ++i) { + int r_nnz; + MPI_Recv(&r_nnz, 1, MPI_INT, i, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + std::vector rv(r_nnz); + std::vector rr(r_nnz); + int r_cols = send_counts[i]; + std::vector rp(r_cols + 1); + + if (r_nnz > 0) { + MPI_Recv(rv.data(), r_nnz, MPI_DOUBLE, i, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(rr.data(), r_nnz, MPI_INT, i, 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } + MPI_Recv(rp.data(), r_cols + 1, MPI_INT, i, 3, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + + int current_total_nnz = (int)FC.values.size(); + FC.values.insert(FC.values.end(), rv.begin(), rv.end()); + FC.row_indices.insert(FC.row_indices.end(), rr.begin(), rr.end()); + for (int k = 1; k <= r_cols; ++k) { + FC.col_ptr.push_back(rp[k] + current_total_nnz); + } + } + } else { + int l_nnz = (int)local_C.values.size(); + MPI_Send(&l_nnz, 1, MPI_INT, 0, 0, MPI_COMM_WORLD); + if (l_nnz > 0) { + MPI_Send(local_C.values.data(), l_nnz, MPI_DOUBLE, 0, 1, MPI_COMM_WORLD); + MPI_Send(local_C.row_indices.data(), l_nnz, MPI_INT, 0, 2, MPI_COMM_WORLD); + } + MPI_Send(local_C.col_ptr.data(), local_n + 1, MPI_INT, 0, 3, MPI_COMM_WORLD); + } + + MPI_Bcast(&GetOutput().m, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(&GetOutput().n, 1, MPI_INT, 0, MPI_COMM_WORLD); + + MPI_Barrier(MPI_COMM_WORLD); + return true; +} + +bool SparseMatrixCCSResMPI::PostProcessingImpl() { + return true; +} + +} // namespace agafonov_i_sparse_matrix_ccs diff --git a/tasks/agafonov_i_sparse_matrix_ccs/report.md b/tasks/agafonov_i_sparse_matrix_ccs/report.md new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tasks/agafonov_i_sparse_matrix_ccs/seq/include/ops_seq.hpp b/tasks/agafonov_i_sparse_matrix_ccs/seq/include/ops_seq.hpp new file mode 100644 index 0000000000..32bf2780f6 --- /dev/null +++ b/tasks/agafonov_i_sparse_matrix_ccs/seq/include/ops_seq.hpp @@ -0,0 +1,22 @@ +#pragma once + +#include "agafonov_i_sparse_matrix_ccs/common/include/common.hpp" + +namespace agafonov_i_sparse_matrix_ccs { + +class SparseMatrixCCSSeq : public BaseTask { + public: + static constexpr ppc::task::TypeOfTask GetStaticTypeOfTask() { + return ppc::task::TypeOfTask::kSEQ; + } + explicit SparseMatrixCCSSeq(const InType &in); + static SparseMatrixCCS Transpose(const SparseMatrixCCS &A); + + private: + bool ValidationImpl() override; + bool PreProcessingImpl() override; + bool RunImpl() override; + bool PostProcessingImpl() override; +}; + +} // namespace agafonov_i_sparse_matrix_ccs diff --git a/tasks/agafonov_i_sparse_matrix_ccs/seq/src/ops_seq.cpp b/tasks/agafonov_i_sparse_matrix_ccs/seq/src/ops_seq.cpp new file mode 100644 index 0000000000..e0a6c89581 --- /dev/null +++ b/tasks/agafonov_i_sparse_matrix_ccs/seq/src/ops_seq.cpp @@ -0,0 +1,89 @@ +#include "agafonov_i_sparse_matrix_ccs/seq/include/ops_seq.hpp" + +#include +#include +#include + +namespace agafonov_i_sparse_matrix_ccs { + +SparseMatrixCCSSeq::SparseMatrixCCSSeq(const InType &in) { + SetTypeOfTask(GetStaticTypeOfTask()); + GetInput() = in; +} + +bool SparseMatrixCCSSeq::ValidationImpl() { + return GetInput().A.n == GetInput().B.m; +} + +bool SparseMatrixCCSSeq::PreProcessingImpl() { + return true; +} + +SparseMatrixCCS SparseMatrixCCSSeq::Transpose(const SparseMatrixCCS &A) { + int target_cols = A.m; + int target_rows = A.n; + SparseMatrixCCS AT(target_rows, target_cols); + + AT.col_ptr.assign(target_cols + 1, 0); + for (int i = 0; i < (int)A.row_indices.size(); ++i) { + AT.col_ptr[A.row_indices[i] + 1]++; + } + + for (int i = 0; i < target_cols; ++i) { + AT.col_ptr[i + 1] += AT.col_ptr[i]; + } + + AT.row_indices.resize(A.values.size()); + AT.values.resize(A.values.size()); + std::vector current_pos = AT.col_ptr; + + for (int col = 0; col < A.n; ++col) { + for (int j = A.col_ptr[col]; j < A.col_ptr[col + 1]; ++j) { + int row = A.row_indices[j]; + int dest_pos = current_pos[row]++; + AT.row_indices[dest_pos] = col; + AT.values[dest_pos] = A.values[j]; + } + } + return AT; +} + +bool SparseMatrixCCSSeq::RunImpl() { + auto &A = GetInput().A; + auto &B = GetInput().B; + SparseMatrixCCS AT = Transpose(A); + + auto &C = GetOutput(); + C.m = A.m; + C.n = B.n; + C.col_ptr.assign(C.n + 1, 0); + C.values.clear(); + C.row_indices.clear(); + + std::vector dense_col(A.m, 0.0); + for (int col_B = 0; col_B < B.n; ++col_B) { + for (int k_ptr = B.col_ptr[col_B]; k_ptr < B.col_ptr[col_B + 1]; ++k_ptr) { + int k = B.row_indices[k_ptr]; + double val_B = B.values[k_ptr]; + if (k < (int)AT.col_ptr.size() - 1) { + for (int i_ptr = AT.col_ptr[k]; i_ptr < AT.col_ptr[k + 1]; ++i_ptr) { + dense_col[AT.row_indices[i_ptr]] += AT.values[i_ptr] * val_B; + } + } + } + for (int i = 0; i < A.m; ++i) { + if (std::abs(dense_col[i]) > 1e-15) { + C.values.push_back(dense_col[i]); + C.row_indices.push_back(i); + dense_col[i] = 0.0; + } + } + C.col_ptr[col_B + 1] = (int)C.values.size(); + } + return true; +} + +bool SparseMatrixCCSSeq::PostProcessingImpl() { + return true; +} +} // namespace agafonov_i_sparse_matrix_ccs diff --git a/tasks/agafonov_i_sparse_matrix_ccs/settings.json b/tasks/agafonov_i_sparse_matrix_ccs/settings.json new file mode 100644 index 0000000000..b1a0d52574 --- /dev/null +++ b/tasks/agafonov_i_sparse_matrix_ccs/settings.json @@ -0,0 +1,7 @@ +{ + "tasks_type": "processes", + "tasks": { + "mpi": "enabled", + "seq": "enabled" + } +} diff --git a/tasks/agafonov_i_sparse_matrix_ccs/tests/.clang-tidy b/tasks/agafonov_i_sparse_matrix_ccs/tests/.clang-tidy new file mode 100644 index 0000000000..ef43b7aa8a --- /dev/null +++ b/tasks/agafonov_i_sparse_matrix_ccs/tests/.clang-tidy @@ -0,0 +1,13 @@ +InheritParentConfig: true + +Checks: > + -modernize-loop-convert, + -cppcoreguidelines-avoid-goto, + -cppcoreguidelines-avoid-non-const-global-variables, + -misc-use-anonymous-namespace, + -modernize-use-std-print, + -modernize-type-traits + +CheckOptions: + - key: readability-function-cognitive-complexity.Threshold + value: 50 # Relaxed for tests diff --git a/tasks/agafonov_i_sparse_matrix_ccs/tests/functional/main.cpp b/tasks/agafonov_i_sparse_matrix_ccs/tests/functional/main.cpp new file mode 100644 index 0000000000..7cd8977fe8 --- /dev/null +++ b/tasks/agafonov_i_sparse_matrix_ccs/tests/functional/main.cpp @@ -0,0 +1,116 @@ +#include + +#include +#include +#include +#include +#include + +#include "agafonov_i_sparse_matrix_ccs/common/include/common.hpp" +#include "agafonov_i_sparse_matrix_ccs/mpi/include/ops_mpi.hpp" +#include "agafonov_i_sparse_matrix_ccs/seq/include/ops_seq.hpp" +#include "util/include/func_test_util.hpp" + +namespace agafonov_i_sparse_matrix_ccs { + +typedef std::tuple TestParams; + +static SparseMatrixCCS CreateRandomSparseMatrix(int m, int n, double density) { + SparseMatrixCCS matrix(m, n); + std::mt19937 gen(42); + std::uniform_real_distribution<> dis(0.0, 1.0); + std::uniform_real_distribution<> val_dis(-100.0, 100.0); + + for (int j = 0; j < n; ++j) { + for (int i = 0; i < m; ++i) { + if (dis(gen) < density) { + matrix.values.push_back(val_dis(gen)); + matrix.row_indices.push_back(i); + } + } + matrix.col_ptr[j + 1] = static_cast(matrix.values.size()); + } + return matrix; +} + +class SparseMatrixFuncTests : public ppc::util::BaseRunFuncTests { + public: + static std::string PrintTestParam(const testing::TestParamInfo &info) { + auto params = std::get(ppc::util::GTestParamIndex::kTestParams)>(info.param); + std::string test_name = std::get<4>(params); + return test_name + "_" + std::to_string(info.index); + } + + protected: + void SetUp() override { + auto params = std::get(ppc::util::GTestParamIndex::kTestParams)>(GetParam()); + + int m = std::get<0>(params); + int k = std::get<1>(params); + int n = std::get<2>(params); + double density = std::get<3>(params); + + input_data_.A = CreateRandomSparseMatrix(m, k, density); + input_data_.B = CreateRandomSparseMatrix(k, n, density); + + SparseMatrixCCSSeq task_seq(input_data_); + task_seq.Validation(); + task_seq.PreProcessing(); + task_seq.Run(); + task_seq.PostProcessing(); + expected_output_ = task_seq.GetOutput(); + } + + bool CheckTestOutputData(OutType &output_data) final { + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + if (rank != 0) { + return true; + } + + if (output_data.values.size() != expected_output_.values.size()) { + return false; + } + if (output_data.col_ptr != expected_output_.col_ptr) { + return false; + } + if (output_data.row_indices != expected_output_.row_indices) { + return false; + } + + for (size_t i = 0; i < output_data.values.size(); ++i) { + if (std::abs(output_data.values[i] - expected_output_.values[i]) > 1e-6) { + return false; + } + } + return true; + } + + InType GetTestInputData() final { + return input_data_; + } + + private: + InType input_data_; + OutType expected_output_; +}; + +TEST_P(SparseMatrixFuncTests, MatmulTests) { + ExecuteTest(GetParam()); +} + +namespace { +const std::array kFuncTestParams = { + std::make_tuple(10, 10, 10, 0.1, "Square_Small"), std::make_tuple(32, 32, 32, 0.2, "Square_Mid"), + std::make_tuple(15, 7, 20, 0.15, "Rectangular_Diverse"), std::make_tuple(1, 50, 1, 0.3, "Inner_Product_Vector"), + std::make_tuple(50, 1, 50, 0.8, "Outer_Product_Dense"), std::make_tuple(20, 20, 20, 0.0, "Empty_Matrix")}; + +const auto kTestTasksList = std::tuple_cat( + ppc::util::AddFuncTask(kFuncTestParams, PPC_SETTINGS_agafonov_i_sparse_matrix_ccs), + ppc::util::AddFuncTask(kFuncTestParams, PPC_SETTINGS_agafonov_i_sparse_matrix_ccs)); + +INSTANTIATE_TEST_SUITE_P(SparseMatrixTests, SparseMatrixFuncTests, ppc::util::ExpandToValues(kTestTasksList), + SparseMatrixFuncTests::PrintTestParam); +} // namespace + +} // namespace agafonov_i_sparse_matrix_ccs diff --git a/tasks/agafonov_i_sparse_matrix_ccs/tests/performance/main.cpp b/tasks/agafonov_i_sparse_matrix_ccs/tests/performance/main.cpp new file mode 100644 index 0000000000..aa32f53b43 --- /dev/null +++ b/tasks/agafonov_i_sparse_matrix_ccs/tests/performance/main.cpp @@ -0,0 +1,72 @@ +#include + +#include +#include +#include +#include + +#include "agafonov_i_sparse_matrix_ccs/common/include/common.hpp" +#include "agafonov_i_sparse_matrix_ccs/mpi/include/ops_mpi.hpp" +#include "agafonov_i_sparse_matrix_ccs/seq/include/ops_seq.hpp" +#include "util/include/perf_test_util.hpp" + +namespace agafonov_i_sparse_matrix_ccs { + +static SparseMatrixCCS CreatePerfMatrix(int m, int n, double density) { + SparseMatrixCCS matrix(m, n); + std::mt19937 gen(42); + std::uniform_real_distribution<> dis(0.0, 1.0); + std::uniform_real_distribution<> val_dis(-100.0, 100.0); + for (int j = 0; j < n; ++j) { + for (int i = 0; i < m; ++i) { + if (dis(gen) < density) { + matrix.values.push_back(val_dis(gen)); + matrix.row_indices.push_back(i); + } + } + matrix.col_ptr[j + 1] = static_cast(matrix.values.size()); + } + return matrix; +} + +class SparseMatrixPerfTests : public ppc::util::BaseRunPerfTests { + protected: + void SetUp() override { + // Увеличиваем размерность для получения ненулевого времени + int m = 1500; + int k = 1500; + int n = 1500; + double density = 0.02; // 2% заполнения + + input_data_.A = CreatePerfMatrix(m, k, density); + input_data_.B = CreatePerfMatrix(k, n, density); + } + + bool CheckTestOutputData(OutType &output_data) final { + // Размеры должны соответствовать m x n результату умножения (m_A x n_B) + return output_data.m == 1500 && output_data.n == 1500; + } + + InType GetTestInputData() final { + return input_data_; + } + + private: + InType input_data_; +}; + +TEST_P(SparseMatrixPerfTests, RunPerfModes) { + auto params = GetParam(); + ExecuteTest(params); +} + +const auto kAllPerfTasks = ppc::util::MakeAllPerfTasks( + PPC_SETTINGS_agafonov_i_sparse_matrix_ccs); + +INSTANTIATE_TEST_SUITE_P(RunModeTests, SparseMatrixPerfTests, ppc::util::TupleToGTestValues(kAllPerfTasks), + [](const testing::TestParamInfo &info) { + std::string name = std::get<1>(info.param); + return name + "_" + std::to_string(info.index); + }); + +} // namespace agafonov_i_sparse_matrix_ccs From cba1ac764ac581825a0fcb63f91a86f52bfe2acc Mon Sep 17 00:00:00 2001 From: ilya-agafonov Date: Sat, 3 Jan 2026 16:47:55 +0000 Subject: [PATCH 5/7] clang-tidy fix --- .../common/include/common.hpp | 14 +- .../agafonov_i_sparse_matrix_ccs/data/pic.jpg | Bin 23 -> 0 bytes tasks/agafonov_i_sparse_matrix_ccs/info.json | 8 +- .../mpi/include/ops_mpi.hpp | 13 +- .../mpi/src/ops_mpi.cpp | 254 ++++++++++-------- tasks/agafonov_i_sparse_matrix_ccs/report.md | 55 ++++ .../seq/include/ops_seq.hpp | 3 +- .../seq/src/ops_seq.cpp | 95 ++++--- .../tests/functional/main.cpp | 16 +- .../tests/performance/main.cpp | 22 +- .../common/include/common.hpp | 21 -- tasks/agafonov_i_torus_grid/info.json | 9 - .../mpi/include/ops_mpi.hpp | 24 -- .../agafonov_i_torus_grid/mpi/src/ops_mpi.cpp | 59 ---- tasks/agafonov_i_torus_grid/report.md | 62 ----- .../seq/include/ops_seq.hpp | 22 -- .../agafonov_i_torus_grid/seq/src/ops_seq.cpp | 30 --- tasks/agafonov_i_torus_grid/settings.json | 7 - tasks/agafonov_i_torus_grid/tests/.clang-tidy | 13 - .../tests/functional/main.cpp | 80 ------ .../tests/performance/main.cpp | 46 ---- 21 files changed, 286 insertions(+), 567 deletions(-) delete mode 100644 tasks/agafonov_i_sparse_matrix_ccs/data/pic.jpg delete mode 100644 tasks/agafonov_i_torus_grid/common/include/common.hpp delete mode 100644 tasks/agafonov_i_torus_grid/info.json delete mode 100644 tasks/agafonov_i_torus_grid/mpi/include/ops_mpi.hpp delete mode 100644 tasks/agafonov_i_torus_grid/mpi/src/ops_mpi.cpp delete mode 100644 tasks/agafonov_i_torus_grid/report.md delete mode 100644 tasks/agafonov_i_torus_grid/seq/include/ops_seq.hpp delete mode 100644 tasks/agafonov_i_torus_grid/seq/src/ops_seq.cpp delete mode 100644 tasks/agafonov_i_torus_grid/settings.json delete mode 100644 tasks/agafonov_i_torus_grid/tests/.clang-tidy delete mode 100644 tasks/agafonov_i_torus_grid/tests/functional/main.cpp delete mode 100644 tasks/agafonov_i_torus_grid/tests/performance/main.cpp diff --git a/tasks/agafonov_i_sparse_matrix_ccs/common/include/common.hpp b/tasks/agafonov_i_sparse_matrix_ccs/common/include/common.hpp index 1cb57aeb56..0f66e32fbe 100644 --- a/tasks/agafonov_i_sparse_matrix_ccs/common/include/common.hpp +++ b/tasks/agafonov_i_sparse_matrix_ccs/common/include/common.hpp @@ -1,7 +1,6 @@ #pragma once -#include -#include +#include #include #include "task/include/task.hpp" @@ -9,23 +8,22 @@ namespace agafonov_i_sparse_matrix_ccs { struct SparseMatrixCCS { - int m, n; + int m = 0; + int n = 0; std::vector values; std::vector row_indices; std::vector col_ptr; - SparseMatrixCCS() : m(0), n(0) {} - SparseMatrixCCS(int _m, int _n) : m(_m), n(_n) { - col_ptr.resize(n + 1, 0); - } + SparseMatrixCCS() = default; + SparseMatrixCCS(int m_val, int n_val) : m(m_val), n(n_val), col_ptr(static_cast(n_val) + 1, 0) {} }; + struct InType { SparseMatrixCCS A; SparseMatrixCCS B; }; using OutType = SparseMatrixCCS; -using TestType = std::tuple; using BaseTask = ppc::task::Task; } // namespace agafonov_i_sparse_matrix_ccs diff --git a/tasks/agafonov_i_sparse_matrix_ccs/data/pic.jpg b/tasks/agafonov_i_sparse_matrix_ccs/data/pic.jpg deleted file mode 100644 index 637624238c89d914613ed301968bffbf462bc110..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 23 bcmWGA<1$h(;xaNd<@(RSzyQYo|NjR7KDY +#include #include "agafonov_i_sparse_matrix_ccs/common/include/common.hpp" +#include "task/include/task.hpp" namespace agafonov_i_sparse_matrix_ccs { @@ -19,8 +20,14 @@ class SparseMatrixCCSResMPI : public BaseTask { bool RunImpl() override; bool PostProcessingImpl() override; - void SendSparseMatrix(const SparseMatrixCCS &matrix, int dest, int tag); - void RecvSparseMatrix(SparseMatrixCCS &matrix, int source, int tag); + static void MultiplyColumn(int j, int dims0, const SparseMatrixCCS &at, const SparseMatrixCCS &local_b, + std::vector &dense_col, SparseMatrixCCS &local_c); + + static void BroadcastSparseMatrix(SparseMatrixCCS &m, int rank); + static void DistributeData(const SparseMatrixCCS &b, SparseMatrixCCS &local_b, int size, int rank, + const std::vector &send_counts, const std::vector &displs); + static void GatherResults(SparseMatrixCCS &local_c, int size, int rank, const std::vector &send_counts, + int dims0, int dims3); }; } // namespace agafonov_i_sparse_matrix_ccs diff --git a/tasks/agafonov_i_sparse_matrix_ccs/mpi/src/ops_mpi.cpp b/tasks/agafonov_i_sparse_matrix_ccs/mpi/src/ops_mpi.cpp index 0d5dfe7f52..d29ec293b5 100644 --- a/tasks/agafonov_i_sparse_matrix_ccs/mpi/src/ops_mpi.cpp +++ b/tasks/agafonov_i_sparse_matrix_ccs/mpi/src/ops_mpi.cpp @@ -1,9 +1,12 @@ #include "agafonov_i_sparse_matrix_ccs/mpi/include/ops_mpi.hpp" -#include +#include + #include +#include #include +#include "agafonov_i_sparse_matrix_ccs/common/include/common.hpp" #include "agafonov_i_sparse_matrix_ccs/seq/include/ops_seq.hpp" namespace agafonov_i_sparse_matrix_ccs { @@ -21,163 +24,180 @@ bool SparseMatrixCCSResMPI::PreProcessingImpl() { return true; } -bool SparseMatrixCCSResMPI::RunImpl() { - int size, rank; - MPI_Comm_size(MPI_COMM_WORLD, &size); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - - auto &A = GetInput().A; - auto &B = GetInput().B; - SparseMatrixCCS AT; - int dims[4]; // m_A, n_A, m_B, n_B - - if (rank == 0) { - dims[0] = A.m; - dims[1] = A.n; - dims[2] = B.m; - dims[3] = B.n; - AT = SparseMatrixCCSSeq::Transpose(A); +void SparseMatrixCCSResMPI::MultiplyColumn(int j, int dims0, const SparseMatrixCCS &at, const SparseMatrixCCS &local_b, + std::vector &dense_col, SparseMatrixCCS &local_c) { + for (int k_ptr = local_b.col_ptr[static_cast(j)]; k_ptr < local_b.col_ptr[static_cast(j) + 1]; + ++k_ptr) { + int k = local_b.row_indices[static_cast(k_ptr)]; + double v = local_b.values[static_cast(k_ptr)]; + if (k < static_cast(at.col_ptr.size()) - 1) { + for (int i_p = at.col_ptr[static_cast(k)]; i_p < at.col_ptr[static_cast(k) + 1]; ++i_p) { + dense_col[static_cast(at.row_indices[static_cast(i_p)])] += + at.values[static_cast(i_p)] * v; + } + } } + for (int i = 0; i < dims0; ++i) { + if (std::abs(dense_col[static_cast(i)]) > 1e-15) { + local_c.values.push_back(dense_col[static_cast(i)]); + local_c.row_indices.push_back(i); + dense_col[static_cast(i)] = 0.0; + } + } +} - MPI_Bcast(dims, 4, MPI_INT, 0, MPI_COMM_WORLD); - +void SparseMatrixCCSResMPI::BroadcastSparseMatrix(SparseMatrixCCS &m, int rank) { + int nnz = (rank == 0) ? static_cast(m.values.size()) : 0; + int cols = (rank == 0) ? m.n : 0; + MPI_Bcast(&nnz, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(&cols, 1, MPI_INT, 0, MPI_COMM_WORLD); if (rank != 0) { - AT.m = dims[1]; - AT.n = dims[0]; - B.m = dims[2]; - B.n = dims[3]; + m.n = cols; + m.values.resize(static_cast(nnz)); + m.row_indices.resize(static_cast(nnz)); + m.col_ptr.resize(static_cast(m.n) + 1); } - - auto bcast_sparse = [&](SparseMatrixCCS &m) { - int nnz = (rank == 0) ? (int)m.values.size() : 0; - int cols = (rank == 0) ? m.n : 0; - MPI_Bcast(&nnz, 1, MPI_INT, 0, MPI_COMM_WORLD); - MPI_Bcast(&cols, 1, MPI_INT, 0, MPI_COMM_WORLD); - if (rank != 0) { - m.n = cols; - m.values.resize(nnz); - m.row_indices.resize(nnz); - m.col_ptr.resize(m.n + 1); - } - if (nnz > 0) { - MPI_Bcast(m.values.data(), nnz, MPI_DOUBLE, 0, MPI_COMM_WORLD); - MPI_Bcast(m.row_indices.data(), nnz, MPI_INT, 0, MPI_COMM_WORLD); - } - MPI_Bcast(m.col_ptr.data(), m.n + 1, MPI_INT, 0, MPI_COMM_WORLD); - }; - - bcast_sparse(AT); - - int chunk = B.n / size; - int remainder = B.n % size; - std::vector send_counts(size), displs(size); - for (int i = 0; i < size; ++i) { - send_counts[i] = chunk + (i < remainder ? 1 : 0); - displs[i] = (i == 0) ? 0 : displs[i - 1] + send_counts[i - 1]; + if (nnz > 0) { + MPI_Bcast(m.values.data(), nnz, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(m.row_indices.data(), nnz, MPI_INT, 0, MPI_COMM_WORLD); } + MPI_Bcast(m.col_ptr.data(), m.n + 1, MPI_INT, 0, MPI_COMM_WORLD); +} - int local_n = send_counts[rank]; - SparseMatrixCCS local_B(B.m, local_n); - +void SparseMatrixCCSResMPI::DistributeData(const SparseMatrixCCS &b, SparseMatrixCCS &local_b, int size, int rank, + const std::vector &send_counts, const std::vector &displs) { if (rank == 0) { for (int i = 1; i < size; i++) { - int start = displs[i]; - int count = send_counts[i]; - int nnz_s = B.col_ptr[start]; - int nnz_c = B.col_ptr[start + count] - nnz_s; + int start = displs[static_cast(i)]; + int count = send_counts[static_cast(i)]; + int nnz_s = b.col_ptr[static_cast(start)]; + int nnz_c = b.col_ptr[static_cast(start) + static_cast(count)] - nnz_s; MPI_Send(&nnz_c, 1, MPI_INT, i, 0, MPI_COMM_WORLD); if (nnz_c > 0) { - MPI_Send(&B.values[nnz_s], nnz_c, MPI_DOUBLE, i, 1, MPI_COMM_WORLD); - MPI_Send(&B.row_indices[nnz_s], nnz_c, MPI_INT, i, 2, MPI_COMM_WORLD); + MPI_Send(&b.values[static_cast(nnz_s)], nnz_c, MPI_DOUBLE, i, 1, MPI_COMM_WORLD); + MPI_Send(&b.row_indices[static_cast(nnz_s)], nnz_c, MPI_INT, i, 2, MPI_COMM_WORLD); } - std::vector adj_ptr(count + 1); + std::vector adj_ptr(static_cast(count) + 1); for (int k = 0; k <= count; ++k) { - adj_ptr[k] = B.col_ptr[start + k] - nnz_s; + adj_ptr[static_cast(k)] = b.col_ptr[static_cast(start) + k] - nnz_s; } MPI_Send(adj_ptr.data(), count + 1, MPI_INT, i, 3, MPI_COMM_WORLD); } - int r_nnz = B.col_ptr[local_n]; - local_B.values.assign(B.values.begin(), B.values.begin() + r_nnz); - local_B.row_indices.assign(B.row_indices.begin(), B.row_indices.begin() + r_nnz); - local_B.col_ptr.assign(B.col_ptr.begin(), B.col_ptr.begin() + local_n + 1); + int r_nnz = b.col_ptr[static_cast(send_counts[0])]; + local_b.values.assign(b.values.begin(), b.values.begin() + r_nnz); + local_b.row_indices.assign(b.row_indices.begin(), b.row_indices.begin() + r_nnz); + local_b.col_ptr.assign(b.col_ptr.begin(), b.col_ptr.begin() + send_counts[0] + 1); } else { - int l_nnz; + int l_nnz = 0; MPI_Recv(&l_nnz, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - local_B.values.resize(l_nnz); - local_B.row_indices.resize(l_nnz); - local_B.col_ptr.resize(local_n + 1); + local_b.values.resize(static_cast(l_nnz)); + local_b.row_indices.resize(static_cast(l_nnz)); + local_b.col_ptr.resize(static_cast(send_counts[static_cast(rank)]) + 1); if (l_nnz > 0) { - MPI_Recv(local_B.values.data(), l_nnz, MPI_DOUBLE, 0, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Recv(local_B.row_indices.data(), l_nnz, MPI_INT, 0, 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - } - MPI_Recv(local_B.col_ptr.data(), local_n + 1, MPI_INT, 0, 3, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - } - - SparseMatrixCCS local_C(dims[0], local_n); - std::vector dense_col(dims[0], 0.0); - for (int j = 0; j < local_n; ++j) { - for (int k_ptr = local_B.col_ptr[j]; k_ptr < local_B.col_ptr[j + 1]; ++k_ptr) { - int k = local_B.row_indices[k_ptr]; - double v = local_B.values[k_ptr]; - if (k < (int)AT.col_ptr.size() - 1) { - for (int i_p = AT.col_ptr[k]; i_p < AT.col_ptr[k + 1]; ++i_p) { - dense_col[AT.row_indices[i_p]] += AT.values[i_p] * v; - } - } - } - for (int i = 0; i < dims[0]; ++i) { - if (std::abs(dense_col[i]) > 1e-15) { - local_C.values.push_back(dense_col[i]); - local_C.row_indices.push_back(i); - dense_col[i] = 0.0; - } + MPI_Recv(local_b.values.data(), l_nnz, MPI_DOUBLE, 0, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(local_b.row_indices.data(), l_nnz, MPI_INT, 0, 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } - local_C.col_ptr[j + 1] = (int)local_C.values.size(); + MPI_Recv(local_b.col_ptr.data(), send_counts[static_cast(rank)] + 1, MPI_INT, 0, 3, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); } +} +void SparseMatrixCCSResMPI::GatherResults(SparseMatrixCCS &local_c, int size, int rank, + const std::vector &send_counts, int dims0, int dims3) { if (rank == 0) { - SparseMatrixCCS &FC = GetOutput(); - FC.m = dims[0]; - FC.n = dims[3]; - FC.values = local_C.values; - FC.row_indices = local_C.row_indices; - FC.col_ptr = local_C.col_ptr; - + SparseMatrixCCS &fc = local_c; + fc.m = dims0; + fc.n = dims3; for (int i = 1; i < size; ++i) { - int r_nnz; + int r_nnz = 0; MPI_Recv(&r_nnz, 1, MPI_INT, i, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - std::vector rv(r_nnz); - std::vector rr(r_nnz); - int r_cols = send_counts[i]; - std::vector rp(r_cols + 1); - + std::vector rv(static_cast(r_nnz)); + std::vector rr(static_cast(r_nnz)); + int r_cols = send_counts[static_cast(i)]; + std::vector rp(static_cast(r_cols) + 1); if (r_nnz > 0) { MPI_Recv(rv.data(), r_nnz, MPI_DOUBLE, i, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE); MPI_Recv(rr.data(), r_nnz, MPI_INT, i, 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } MPI_Recv(rp.data(), r_cols + 1, MPI_INT, i, 3, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - - int current_total_nnz = (int)FC.values.size(); - FC.values.insert(FC.values.end(), rv.begin(), rv.end()); - FC.row_indices.insert(FC.row_indices.end(), rr.begin(), rr.end()); + int current_total_nnz = static_cast(fc.values.size()); + fc.values.insert(fc.values.end(), rv.begin(), rv.end()); + fc.row_indices.insert(fc.row_indices.end(), rr.begin(), rr.end()); for (int k = 1; k <= r_cols; ++k) { - FC.col_ptr.push_back(rp[k] + current_total_nnz); + fc.col_ptr.push_back(rp[static_cast(k)] + current_total_nnz); } } } else { - int l_nnz = (int)local_C.values.size(); + int l_nnz = static_cast(local_c.values.size()); MPI_Send(&l_nnz, 1, MPI_INT, 0, 0, MPI_COMM_WORLD); if (l_nnz > 0) { - MPI_Send(local_C.values.data(), l_nnz, MPI_DOUBLE, 0, 1, MPI_COMM_WORLD); - MPI_Send(local_C.row_indices.data(), l_nnz, MPI_INT, 0, 2, MPI_COMM_WORLD); + MPI_Send(local_c.values.data(), l_nnz, MPI_DOUBLE, 0, 1, MPI_COMM_WORLD); + MPI_Send(local_c.row_indices.data(), l_nnz, MPI_INT, 0, 2, MPI_COMM_WORLD); } - MPI_Send(local_C.col_ptr.data(), local_n + 1, MPI_INT, 0, 3, MPI_COMM_WORLD); + MPI_Send(local_c.col_ptr.data(), static_cast(local_c.col_ptr.size()), MPI_INT, 0, 3, MPI_COMM_WORLD); + } +} + +bool SparseMatrixCCSResMPI::RunImpl() { + int size = 0; + int rank = 0; + MPI_Comm_size(MPI_COMM_WORLD, &size); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + + auto &a = GetInput().A; + auto &b = GetInput().B; + SparseMatrixCCS at; + std::vector dims(4, 0); + + if (rank == 0) { + dims[0] = a.m; + dims[1] = a.n; + dims[2] = b.m; + dims[3] = b.n; + at = SparseMatrixCCSSeq::Transpose(a); + } + MPI_Bcast(dims.data(), 4, MPI_INT, 0, MPI_COMM_WORLD); + + if (rank != 0) { + at.m = dims[1]; + at.n = dims[0]; + b.m = dims[2]; + b.n = dims[3]; + } + + BroadcastSparseMatrix(at, rank); + + int chunk = b.n / size; + int remainder = b.n % size; + std::vector send_counts(static_cast(size)); + std::vector displs(static_cast(size)); + for (int i = 0; i < size; ++i) { + send_counts[static_cast(i)] = chunk + (i < remainder ? 1 : 0); + displs[static_cast(i)] = + (i == 0) ? 0 : displs[static_cast(i) - 1] + send_counts[static_cast(i) - 1]; + } + + SparseMatrixCCS local_b(b.m, send_counts[static_cast(rank)]); + DistributeData(b, local_b, size, rank, send_counts, displs); + + int local_n = send_counts[static_cast(rank)]; + SparseMatrixCCS local_c(dims[0], local_n); + std::vector dense_col(static_cast(dims[0]), 0.0); + for (int j = 0; j < local_n; ++j) { + MultiplyColumn(j, dims[0], at, local_b, dense_col, local_c); + local_c.col_ptr[static_cast(j) + 1] = static_cast(local_c.values.size()); + } + + if (rank == 0) { + GatherResults(local_c, size, rank, send_counts, dims[0], dims[3]); + GetOutput() = local_c; + } else { + GatherResults(local_c, size, rank, send_counts, dims[0], dims[3]); } MPI_Bcast(&GetOutput().m, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&GetOutput().n, 1, MPI_INT, 0, MPI_COMM_WORLD); - MPI_Barrier(MPI_COMM_WORLD); return true; } diff --git a/tasks/agafonov_i_sparse_matrix_ccs/report.md b/tasks/agafonov_i_sparse_matrix_ccs/report.md index e69de29bb2..6182cd660e 100644 --- a/tasks/agafonov_i_sparse_matrix_ccs/report.md +++ b/tasks/agafonov_i_sparse_matrix_ccs/report.md @@ -0,0 +1,55 @@ +# + +- Student: , group +- Technology: +- Variant: + +## 1. Introduction +Brief motivation, problem context, expected outcome. + +## 2. Problem Statement +Formal task definition, input/output format, constraints. + +## 3. Baseline Algorithm (Sequential) +Describe the base algorithm with enough detail to reproduce. + +## 4. Parallelization Scheme +- For MPI: data distribution, communication pattern/topology, rank roles. +- For threads: decomposition, scheduling, synchronization. +Diagrams or short pseudocode are welcome. + +## 5. Implementation Details +- Code structure (files, key classes/functions) +- Important assumptions and corner cases +- Memory usage considerations + +## 6. Experimental Setup +- Hardware/OS: CPU model, cores/threads, RAM, OS version +- Toolchain: compiler, version, build type (Release/RelWithDebInfo) +- Environment: PPC_NUM_THREADS / PPC_NUM_PROC, other relevant vars +- Data: how test data is generated or sourced (relative paths) + +## 7. Results and Discussion + +### 7.1 Correctness +Briefly explain how correctness was verified (reference results, invariants, unit tests). + +### 7.2 Performance +Present time, speedup and efficiency. Example table: + +| Mode | Count | Time, s | Speedup | Efficiency | +|-------------|-------|---------|---------|------------| +| seq | 1 | 1.234 | 1.00 | N/A | +| omp | 2 | 0.700 | 1.76 | 88.0% | +| omp | 4 | 0.390 | 3.16 | 79.0% | + +Optionally add plots (use relative paths), and discuss bottlenecks and scalability limits. + +## 8. Conclusions +Summarize findings and limitations. + +## 9. References +1.
+2. + +## Appendix (Optional) \ No newline at end of file diff --git a/tasks/agafonov_i_sparse_matrix_ccs/seq/include/ops_seq.hpp b/tasks/agafonov_i_sparse_matrix_ccs/seq/include/ops_seq.hpp index 32bf2780f6..754c604b25 100644 --- a/tasks/agafonov_i_sparse_matrix_ccs/seq/include/ops_seq.hpp +++ b/tasks/agafonov_i_sparse_matrix_ccs/seq/include/ops_seq.hpp @@ -1,6 +1,7 @@ #pragma once #include "agafonov_i_sparse_matrix_ccs/common/include/common.hpp" +#include "task/include/task.hpp" namespace agafonov_i_sparse_matrix_ccs { @@ -10,7 +11,7 @@ class SparseMatrixCCSSeq : public BaseTask { return ppc::task::TypeOfTask::kSEQ; } explicit SparseMatrixCCSSeq(const InType &in); - static SparseMatrixCCS Transpose(const SparseMatrixCCS &A); + static SparseMatrixCCS Transpose(const SparseMatrixCCS &matrix); private: bool ValidationImpl() override; diff --git a/tasks/agafonov_i_sparse_matrix_ccs/seq/src/ops_seq.cpp b/tasks/agafonov_i_sparse_matrix_ccs/seq/src/ops_seq.cpp index e0a6c89581..14dadc33d6 100644 --- a/tasks/agafonov_i_sparse_matrix_ccs/seq/src/ops_seq.cpp +++ b/tasks/agafonov_i_sparse_matrix_ccs/seq/src/ops_seq.cpp @@ -1,9 +1,11 @@ #include "agafonov_i_sparse_matrix_ccs/seq/include/ops_seq.hpp" -#include #include +#include #include +#include "agafonov_i_sparse_matrix_ccs/common/include/common.hpp" + namespace agafonov_i_sparse_matrix_ccs { SparseMatrixCCSSeq::SparseMatrixCCSSeq(const InType &in) { @@ -19,66 +21,70 @@ bool SparseMatrixCCSSeq::PreProcessingImpl() { return true; } -SparseMatrixCCS SparseMatrixCCSSeq::Transpose(const SparseMatrixCCS &A) { - int target_cols = A.m; - int target_rows = A.n; - SparseMatrixCCS AT(target_rows, target_cols); +SparseMatrixCCS SparseMatrixCCSSeq::Transpose(const SparseMatrixCCS &matrix) { + int target_cols = matrix.m; + int target_rows = matrix.n; + SparseMatrixCCS at(target_rows, target_cols); - AT.col_ptr.assign(target_cols + 1, 0); - for (int i = 0; i < (int)A.row_indices.size(); ++i) { - AT.col_ptr[A.row_indices[i] + 1]++; + at.col_ptr.assign(static_cast(target_cols) + 1, 0); + for (int row_indice : matrix.row_indices) { + at.col_ptr[static_cast(row_indice) + 1]++; } for (int i = 0; i < target_cols; ++i) { - AT.col_ptr[i + 1] += AT.col_ptr[i]; + at.col_ptr[static_cast(i) + 1] += at.col_ptr[static_cast(i)]; } - AT.row_indices.resize(A.values.size()); - AT.values.resize(A.values.size()); - std::vector current_pos = AT.col_ptr; + at.row_indices.resize(matrix.values.size()); + at.values.resize(matrix.values.size()); + std::vector current_pos = at.col_ptr; - for (int col = 0; col < A.n; ++col) { - for (int j = A.col_ptr[col]; j < A.col_ptr[col + 1]; ++j) { - int row = A.row_indices[j]; - int dest_pos = current_pos[row]++; - AT.row_indices[dest_pos] = col; - AT.values[dest_pos] = A.values[j]; + for (int col = 0; col < matrix.n; ++col) { + for (int j = matrix.col_ptr[static_cast(col)]; j < matrix.col_ptr[static_cast(col) + 1]; + ++j) { + int row = matrix.row_indices[static_cast(j)]; + int dest_pos = current_pos[static_cast(row)]++; + at.row_indices[static_cast(dest_pos)] = col; + at.values[static_cast(dest_pos)] = matrix.values[static_cast(j)]; } } - return AT; + return at; } bool SparseMatrixCCSSeq::RunImpl() { - auto &A = GetInput().A; - auto &B = GetInput().B; - SparseMatrixCCS AT = Transpose(A); - - auto &C = GetOutput(); - C.m = A.m; - C.n = B.n; - C.col_ptr.assign(C.n + 1, 0); - C.values.clear(); - C.row_indices.clear(); - - std::vector dense_col(A.m, 0.0); - for (int col_B = 0; col_B < B.n; ++col_B) { - for (int k_ptr = B.col_ptr[col_B]; k_ptr < B.col_ptr[col_B + 1]; ++k_ptr) { - int k = B.row_indices[k_ptr]; - double val_B = B.values[k_ptr]; - if (k < (int)AT.col_ptr.size() - 1) { - for (int i_ptr = AT.col_ptr[k]; i_ptr < AT.col_ptr[k + 1]; ++i_ptr) { - dense_col[AT.row_indices[i_ptr]] += AT.values[i_ptr] * val_B; + auto &a = GetInput().A; + auto &b = GetInput().B; + SparseMatrixCCS at = Transpose(a); + + auto &c = GetOutput(); + c.m = a.m; + c.n = b.n; + c.col_ptr.assign(static_cast(c.n) + 1, 0); + c.values.clear(); + c.row_indices.clear(); + + std::vector dense_col(static_cast(a.m), 0.0); + for (int col_b = 0; col_b < b.n; ++col_b) { + for (int k_ptr = b.col_ptr[static_cast(col_b)]; k_ptr < b.col_ptr[static_cast(col_b) + 1]; + ++k_ptr) { + int k = b.row_indices[static_cast(k_ptr)]; + double val_b = b.values[static_cast(k_ptr)]; + if (k < static_cast(at.col_ptr.size()) - 1) { + for (int i_ptr = at.col_ptr[static_cast(k)]; i_ptr < at.col_ptr[static_cast(k) + 1]; + ++i_ptr) { + dense_col[static_cast(at.row_indices[static_cast(i_ptr)])] += + at.values[static_cast(i_ptr)] * val_b; } } } - for (int i = 0; i < A.m; ++i) { - if (std::abs(dense_col[i]) > 1e-15) { - C.values.push_back(dense_col[i]); - C.row_indices.push_back(i); - dense_col[i] = 0.0; + for (int i = 0; i < a.m; ++i) { + if (std::abs(dense_col[static_cast(i)]) > 1e-15) { + c.values.push_back(dense_col[static_cast(i)]); + c.row_indices.push_back(i); + dense_col[static_cast(i)] = 0.0; } } - C.col_ptr[col_B + 1] = (int)C.values.size(); + c.col_ptr[static_cast(col_b) + 1] = static_cast(c.values.size()); } return true; } @@ -86,4 +92,5 @@ bool SparseMatrixCCSSeq::RunImpl() { bool SparseMatrixCCSSeq::PostProcessingImpl() { return true; } + } // namespace agafonov_i_sparse_matrix_ccs diff --git a/tasks/agafonov_i_sparse_matrix_ccs/tests/functional/main.cpp b/tasks/agafonov_i_sparse_matrix_ccs/tests/functional/main.cpp index 7cd8977fe8..220e691de6 100644 --- a/tasks/agafonov_i_sparse_matrix_ccs/tests/functional/main.cpp +++ b/tasks/agafonov_i_sparse_matrix_ccs/tests/functional/main.cpp @@ -1,23 +1,27 @@ #include +#include +#include #include +#include #include #include #include -#include #include "agafonov_i_sparse_matrix_ccs/common/include/common.hpp" #include "agafonov_i_sparse_matrix_ccs/mpi/include/ops_mpi.hpp" #include "agafonov_i_sparse_matrix_ccs/seq/include/ops_seq.hpp" #include "util/include/func_test_util.hpp" +#include "util/include/util.hpp" namespace agafonov_i_sparse_matrix_ccs { -typedef std::tuple TestParams; +using TestParams = std::tuple; static SparseMatrixCCS CreateRandomSparseMatrix(int m, int n, double density) { SparseMatrixCCS matrix(m, n); - std::mt19937 gen(42); + std::random_device rd; + std::mt19937 gen(rd()); std::uniform_real_distribution<> dis(0.0, 1.0); std::uniform_real_distribution<> val_dis(-100.0, 100.0); @@ -28,7 +32,7 @@ static SparseMatrixCCS CreateRandomSparseMatrix(int m, int n, double density) { matrix.row_indices.push_back(i); } } - matrix.col_ptr[j + 1] = static_cast(matrix.values.size()); + matrix.col_ptr[static_cast(j) + 1] = static_cast(matrix.values.size()); } return matrix; } @@ -62,7 +66,7 @@ class SparseMatrixFuncTests : public ppc::util::BaseRunFuncTests 1e-6) { return false; } diff --git a/tasks/agafonov_i_sparse_matrix_ccs/tests/performance/main.cpp b/tasks/agafonov_i_sparse_matrix_ccs/tests/performance/main.cpp index aa32f53b43..40e9ffa1e7 100644 --- a/tasks/agafonov_i_sparse_matrix_ccs/tests/performance/main.cpp +++ b/tasks/agafonov_i_sparse_matrix_ccs/tests/performance/main.cpp @@ -1,6 +1,7 @@ #include -#include +#include +#include #include #include #include @@ -14,7 +15,8 @@ namespace agafonov_i_sparse_matrix_ccs { static SparseMatrixCCS CreatePerfMatrix(int m, int n, double density) { SparseMatrixCCS matrix(m, n); - std::mt19937 gen(42); + std::random_device rd; + std::mt19937 gen(rd()); std::uniform_real_distribution<> dis(0.0, 1.0); std::uniform_real_distribution<> val_dis(-100.0, 100.0); for (int j = 0; j < n; ++j) { @@ -24,7 +26,7 @@ static SparseMatrixCCS CreatePerfMatrix(int m, int n, double density) { matrix.row_indices.push_back(i); } } - matrix.col_ptr[j + 1] = static_cast(matrix.values.size()); + matrix.col_ptr[static_cast(j) + 1] = static_cast(matrix.values.size()); } return matrix; } @@ -32,18 +34,16 @@ static SparseMatrixCCS CreatePerfMatrix(int m, int n, double density) { class SparseMatrixPerfTests : public ppc::util::BaseRunPerfTests { protected: void SetUp() override { - // Увеличиваем размерность для получения ненулевого времени - int m = 1500; - int k = 1500; - int n = 1500; - double density = 0.02; // 2% заполнения + const int m = 1500; + const int k = 1500; + const int n = 1500; + const double density = 0.02; input_data_.A = CreatePerfMatrix(m, k, density); input_data_.B = CreatePerfMatrix(k, n, density); } bool CheckTestOutputData(OutType &output_data) final { - // Размеры должны соответствовать m x n результату умножения (m_A x n_B) return output_data.m == 1500 && output_data.n == 1500; } @@ -56,7 +56,7 @@ class SparseMatrixPerfTests : public ppc::util::BaseRunPerfTests &info) { - std::string name = std::get<1>(info.param); + const std::string &name = std::get<1>(info.param); return name + "_" + std::to_string(info.index); }); diff --git a/tasks/agafonov_i_torus_grid/common/include/common.hpp b/tasks/agafonov_i_torus_grid/common/include/common.hpp deleted file mode 100644 index 3495b7a875..0000000000 --- a/tasks/agafonov_i_torus_grid/common/include/common.hpp +++ /dev/null @@ -1,21 +0,0 @@ -#pragma once - -#include -#include - -#include "task/include/task.hpp" - -namespace agafonov_i_torus_grid { - -struct TorusTaskData { - int value; - int source_rank; - int dest_rank; -}; - -using InType = TorusTaskData; -using OutType = int; -using TestType = std::tuple; -using BaseTask = ppc::task::Task; - -} // namespace agafonov_i_torus_grid diff --git a/tasks/agafonov_i_torus_grid/info.json b/tasks/agafonov_i_torus_grid/info.json deleted file mode 100644 index 19eb8bcfc0..0000000000 --- a/tasks/agafonov_i_torus_grid/info.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "student": { - "first_name": "Илья", - "last_name": "Агафонов", - "middle_name": "Дмитриевич", - "group_number": "3823Б1ФИ1", - "task_number": "2" - } -} diff --git a/tasks/agafonov_i_torus_grid/mpi/include/ops_mpi.hpp b/tasks/agafonov_i_torus_grid/mpi/include/ops_mpi.hpp deleted file mode 100644 index b9ccf522f8..0000000000 --- a/tasks/agafonov_i_torus_grid/mpi/include/ops_mpi.hpp +++ /dev/null @@ -1,24 +0,0 @@ -#pragma once - -#include - -#include "agafonov_i_torus_grid/common/include/common.hpp" -#include "task/include/task.hpp" - -namespace agafonov_i_torus_grid { - -class TorusGridTaskMPI : public BaseTask { - public: - static constexpr ppc::task::TypeOfTask GetStaticTypeOfTask() { - return ppc::task::TypeOfTask::kMPI; - } - explicit TorusGridTaskMPI(const InType &in); - - private: - bool ValidationImpl() override; - bool PreProcessingImpl() override; - bool RunImpl() override; - bool PostProcessingImpl() override; -}; - -} // namespace agafonov_i_torus_grid diff --git a/tasks/agafonov_i_torus_grid/mpi/src/ops_mpi.cpp b/tasks/agafonov_i_torus_grid/mpi/src/ops_mpi.cpp deleted file mode 100644 index 4d06cbe827..0000000000 --- a/tasks/agafonov_i_torus_grid/mpi/src/ops_mpi.cpp +++ /dev/null @@ -1,59 +0,0 @@ -#include "agafonov_i_torus_grid/mpi/include/ops_mpi.hpp" - -#include - -#include - -#include "agafonov_i_torus_grid/common/include/common.hpp" - -namespace agafonov_i_torus_grid { - -TorusGridTaskMPI::TorusGridTaskMPI(const InType &in) { - SetTypeOfTask(GetStaticTypeOfTask()); - GetInput() = in; -} - -bool TorusGridTaskMPI::ValidationImpl() { - int world_size = 0; - MPI_Comm_size(MPI_COMM_WORLD, &world_size); - - auto data = GetInput(); - - return (data.source_rank >= 0 && data.source_rank < world_size && data.dest_rank >= 0 && data.dest_rank < world_size); -} -bool TorusGridTaskMPI::PreProcessingImpl() { - GetOutput() = 0; - return true; -} - -bool TorusGridTaskMPI::RunImpl() { - int world_rank = 0; - MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); - - auto data = GetInput(); - int res = 0; - - if (data.source_rank == data.dest_rank) { - if (world_rank == data.source_rank) { - res = data.value; - } - } else { - if (world_rank == data.source_rank) { - MPI_Send(&data.value, 1, MPI_INT, data.dest_rank, 0, MPI_COMM_WORLD); - } else if (world_rank == data.dest_rank) { - MPI_Recv(&res, 1, MPI_INT, data.source_rank, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - } - } - - MPI_Bcast(&res, 1, MPI_INT, data.dest_rank, MPI_COMM_WORLD); - - GetOutput() = res; - - return true; -} - -bool TorusGridTaskMPI::PostProcessingImpl() { - return true; -} - -} // namespace agafonov_i_torus_grid diff --git a/tasks/agafonov_i_torus_grid/report.md b/tasks/agafonov_i_torus_grid/report.md deleted file mode 100644 index 839c8b44e7..0000000000 --- a/tasks/agafonov_i_torus_grid/report.md +++ /dev/null @@ -1,62 +0,0 @@ -# Решетка-тор - -- Student: Агафонов Илья Дмитриевич, group 3823Б1ФИ1 -- Technology: SEQ | MPI -- Variant: 9 - -## 1. Introduction -Целью данной работы является реализация алгоритма обмена данными в вычислительной сети с топологией «двумерный тор» (Torus Grid). Топология тора обеспечивает высокую степень связности и отказоустойчивость, что делает её актуальной для распределенных вычислений. Ожидаемый результат — корректная передача сообщения от заданного узла-источника к узлу-приемнику с использованием интерфейса MPI. - -## 2. Problem Statement -Необходимо реализовать передачу целочисленного значения (`int`) между двумя произвольными узлами (процессами) в логической сетке размера $N \times N$. -- **Входные данные:** Значение для передачи, ранг отправителя (`source_rank`) и ранг получателя (`dest_rank`). -- **Выходные данные:** После завершения работы значение должно быть доступно на всех процессах. -- **Ограничения:** Количество запущенных процессов $P$ должно быть полным квадратом ($P = N^2$). - -## 3. Baseline Algorithm (Sequential) -Последовательный алгоритм имитирует передачу данных путем прямого копирования значения из входной структуры в выходную переменную. Логика маршрутизации отсутствует, так как в последовательном режиме существует только один процесс, который одновременно является и источником, и приемником. - -## 4. Parallelization Scheme -Для параллельной реализации используется библиотека MPI и следующая схема взаимодействия: -1. **Топология:** Процессы интерпретируются как узлы сетки. Проверка `ValidationImpl` гарантирует, что количество процессов позволяет сформировать квадратную структуру. -2. **Точечная передача (Point-to-Point):** Процесс-источник (`source_rank`) использует `MPI_Send` для отправки данных. Процесс-приемник (`dest_rank`) использует `MPI_Recv` для их получения. -3. **Широковещательная рассылка:** Чтобы результат стал доступен всем узлам (согласно требованиям), используется `MPI_Bcast` от ранга-получателя. - -## 5. Implementation Details -- **Основные функции:** - - `ValidationImpl`: Проверяет границы рангов и квадратность сетки. - - `RunImpl`: Реализует логику `Send` -> `Recv` -> `Bcast`. -- **Линтер:** Код прошел проверку `clang-tidy-21`, исправлены замечания по упрощению логических выражений (законы Де Моргана) и чистоте заголовочных файлов. - -## 6. Experimental Setup -**Hardware/OS:** - - **Процессор:** Процессор AMD Ryzen 5 5500U, ядер: 6, логических процессоров: 12 - - **Оперативная память:** 16 ГБ DDR4 - - **Операционная система:** Windows 10 Pro 22H2 -- **Toolchain:** - - **Компилятор:** g++ 13.3.0 - - **Тип сборки:** Release (-O3 ) - - **MPI:** Open MPI 4.1.6 - -## 7. Results and Discussion - -### 7.1 Correctness -Функциональные тесты успешно пройдены для различных сценариев передачи (между узлами, самопередача). Проверка проводилась на 4 процессах с использованием `mpirun`. - -### 7.2 Performance -Результаты замера времени выполнения (`task_run`) на 4 процессах: - -| Mode | Count | Time, s | Speedup | Efficiency | -|-------------|-------|--------------|---------|------------| -| seq | 1 | 0.00000014 | 1.00 | N/A | -| mpi | 4 | 0.0000073408 | 0.00001 | 0.45% | -| mpi | 9 | 0.00155104 | 0.00009 | 0.001% | - -Низкая эффективность объясняется спецификой задачи — передача одного числа `int` происходит слишком быстро по сравнению с накладными расходами на вызов функций MPI и синхронизацию процессов в Docker-контейнере. - -## 8. Conclusions -Алгоритм обмена в топологии «Тор» реализован и верифицирован. Программа демонстрирует стабильную работу и проходит статический анализ кода. Опытным путем подтверждено, что для микро-задач коммуникационные затраты MPI значительно превышают время полезных вычислений. - -## 9. Список литературы -1. Материлы и документация по курсу -2. Документация стандарта MPI: https://www.mpi-forum.org/ diff --git a/tasks/agafonov_i_torus_grid/seq/include/ops_seq.hpp b/tasks/agafonov_i_torus_grid/seq/include/ops_seq.hpp deleted file mode 100644 index 6bbf7aa5ea..0000000000 --- a/tasks/agafonov_i_torus_grid/seq/include/ops_seq.hpp +++ /dev/null @@ -1,22 +0,0 @@ -#pragma once - -#include "agafonov_i_torus_grid/common/include/common.hpp" -#include "task/include/task.hpp" - -namespace agafonov_i_torus_grid { - -class TorusGridTaskSEQ : public BaseTask { - public: - static constexpr ppc::task::TypeOfTask GetStaticTypeOfTask() { - return ppc::task::TypeOfTask::kSEQ; - } - explicit TorusGridTaskSEQ(const InType &in); - - private: - bool ValidationImpl() override; - bool PreProcessingImpl() override; - bool RunImpl() override; - bool PostProcessingImpl() override; -}; - -} // namespace agafonov_i_torus_grid diff --git a/tasks/agafonov_i_torus_grid/seq/src/ops_seq.cpp b/tasks/agafonov_i_torus_grid/seq/src/ops_seq.cpp deleted file mode 100644 index d2a2ee2d58..0000000000 --- a/tasks/agafonov_i_torus_grid/seq/src/ops_seq.cpp +++ /dev/null @@ -1,30 +0,0 @@ -#include "agafonov_i_torus_grid/seq/include/ops_seq.hpp" - -#include "agafonov_i_torus_grid/common/include/common.hpp" - -namespace agafonov_i_torus_grid { - -TorusGridTaskSEQ::TorusGridTaskSEQ(const InType &in) { - SetTypeOfTask(GetStaticTypeOfTask()); - GetInput() = in; -} - -bool TorusGridTaskSEQ::ValidationImpl() { - return true; -} - -bool TorusGridTaskSEQ::PreProcessingImpl() { - GetOutput() = 0; - return true; -} - -bool TorusGridTaskSEQ::RunImpl() { - GetOutput() = GetInput().value; - return true; -} - -bool TorusGridTaskSEQ::PostProcessingImpl() { - return true; -} - -} // namespace agafonov_i_torus_grid diff --git a/tasks/agafonov_i_torus_grid/settings.json b/tasks/agafonov_i_torus_grid/settings.json deleted file mode 100644 index b1a0d52574..0000000000 --- a/tasks/agafonov_i_torus_grid/settings.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "tasks_type": "processes", - "tasks": { - "mpi": "enabled", - "seq": "enabled" - } -} diff --git a/tasks/agafonov_i_torus_grid/tests/.clang-tidy b/tasks/agafonov_i_torus_grid/tests/.clang-tidy deleted file mode 100644 index ef43b7aa8a..0000000000 --- a/tasks/agafonov_i_torus_grid/tests/.clang-tidy +++ /dev/null @@ -1,13 +0,0 @@ -InheritParentConfig: true - -Checks: > - -modernize-loop-convert, - -cppcoreguidelines-avoid-goto, - -cppcoreguidelines-avoid-non-const-global-variables, - -misc-use-anonymous-namespace, - -modernize-use-std-print, - -modernize-type-traits - -CheckOptions: - - key: readability-function-cognitive-complexity.Threshold - value: 50 # Relaxed for tests diff --git a/tasks/agafonov_i_torus_grid/tests/functional/main.cpp b/tasks/agafonov_i_torus_grid/tests/functional/main.cpp deleted file mode 100644 index 40b62d73fd..0000000000 --- a/tasks/agafonov_i_torus_grid/tests/functional/main.cpp +++ /dev/null @@ -1,80 +0,0 @@ -#include - -#include -#include -#include -#include - -#include "agafonov_i_torus_grid/common/include/common.hpp" -#include "agafonov_i_torus_grid/mpi/include/ops_mpi.hpp" -#include "agafonov_i_torus_grid/seq/include/ops_seq.hpp" -#include "util/include/func_test_util.hpp" -#include "util/include/util.hpp" - -namespace agafonov_i_torus_grid { - -class TorusGridFuncTests : public ppc::util::BaseRunFuncTests { - public: - TorusGridFuncTests() = default; - - static std::string PrintTestParam( - const testing::TestParamInfo> &info) { - auto params = std::get(ppc::util::GTestParamIndex::kTestParams)>(info.param); - return std::to_string(std::get<0>(params)) + "_" + std::get<1>(params); - } - - protected: - void SetUp() override { - auto params = std::get(ppc::util::GTestParamIndex::kTestParams)>(GetParam()); - int test_case = std::get<0>(params); - - if (test_case == 1) { - input_data_ = {.value = 123, .source_rank = 0, .dest_rank = 3}; - expected_output_ = 123; - } else if (test_case == 2) { - input_data_ = {.value = 555, .source_rank = 1, .dest_rank = 2}; - expected_output_ = 555; - } else { - input_data_ = {.value = 99, .source_rank = 0, .dest_rank = 0}; - expected_output_ = 99; - } - } - - bool CheckTestOutputData(OutType &output_data) final { - return output_data == expected_output_; - } - InType GetTestInputData() final { - return input_data_; - } - - private: - InType input_data_{.value = 0, .source_rank = 0, .dest_rank = 0}; - OutType expected_output_{0}; -}; - -TEST_P(TorusGridFuncTests, RunTests) { - ExecuteTest(GetParam()); -} - -namespace { - -const std::array kTestParams = { - std::make_tuple(1, "transfer_0_to_3"), std::make_tuple(2, "transfer_1_to_2"), std::make_tuple(3, "self_transfer")}; - -auto GetMpiValues() { - auto tasks = ppc::util::AddFuncTask(kTestParams, PPC_SETTINGS_agafonov_i_torus_grid); - return ppc::util::ExpandToValues(tasks); -} - -auto GetSeqValues() { - auto tasks = ppc::util::AddFuncTask(kTestParams, PPC_SETTINGS_agafonov_i_torus_grid); - return ppc::util::ExpandToValues(tasks); -} - -INSTANTIATE_TEST_SUITE_P(MPI, TorusGridFuncTests, GetMpiValues(), TorusGridFuncTests::PrintTestParam); - -INSTANTIATE_TEST_SUITE_P(SEQ, TorusGridFuncTests, GetSeqValues(), TorusGridFuncTests::PrintTestParam); - -} // namespace - -} // namespace agafonov_i_torus_grid diff --git a/tasks/agafonov_i_torus_grid/tests/performance/main.cpp b/tasks/agafonov_i_torus_grid/tests/performance/main.cpp deleted file mode 100644 index 3c3e053d73..0000000000 --- a/tasks/agafonov_i_torus_grid/tests/performance/main.cpp +++ /dev/null @@ -1,46 +0,0 @@ -#include - -#include "agafonov_i_torus_grid/common/include/common.hpp" -#include "agafonov_i_torus_grid/mpi/include/ops_mpi.hpp" -#include "agafonov_i_torus_grid/seq/include/ops_seq.hpp" -#include "util/include/perf_test_util.hpp" - -namespace agafonov_i_torus_grid { - -class TorusGridPerfTests : public ppc::util::BaseRunPerfTests { - public: - TorusGridPerfTests() = default; - - protected: - void SetUp() override { - input_data_ = {.value = 12345, .source_rank = 0, .dest_rank = 0}; - } - - bool CheckTestOutputData(OutType &output_data) final { - return output_data == 12345; - } - InType GetTestInputData() final { - return input_data_; - } - - private: - InType input_data_{.value = 0, .source_rank = 0, .dest_rank = 0}; -}; - -TEST_P(TorusGridPerfTests, RunPerfModes) { - ExecuteTest(GetParam()); -} - -namespace { - -const auto kAllPerfTasks = - ppc::util::MakeAllPerfTasks(PPC_SETTINGS_agafonov_i_torus_grid); - -const auto kGtestValues = ppc::util::TupleToGTestValues(kAllPerfTasks); -const auto kPerfTestName = TorusGridPerfTests::CustomPerfTestName; - -INSTANTIATE_TEST_SUITE_P(TorusGridPerfTests, TorusGridPerfTests, kGtestValues, kPerfTestName); - -} // namespace - -} // namespace agafonov_i_torus_grid From 6f055f419a32faa1d7f7055e58aba82c6f36a996 Mon Sep 17 00:00:00 2001 From: ilya-agafonov Date: Sat, 3 Jan 2026 17:09:46 +0000 Subject: [PATCH 6/7] add report --- tasks/agafonov_i_sparse_matrix_ccs/report.md | 84 +++++++++++++------- 1 file changed, 55 insertions(+), 29 deletions(-) diff --git a/tasks/agafonov_i_sparse_matrix_ccs/report.md b/tasks/agafonov_i_sparse_matrix_ccs/report.md index 6182cd660e..325869ea07 100644 --- a/tasks/agafonov_i_sparse_matrix_ccs/report.md +++ b/tasks/agafonov_i_sparse_matrix_ccs/report.md @@ -1,55 +1,81 @@ -# +# Умножение разреженных матриц. Элементы типа double. Формат хранения матрицы – столбцовый (CCS). -- Student: , group -- Technology: -- Variant: +- Student: Агафонов Илья Дмитриевич, group 3823Б1ФИ1 +- Technology: SEQ | MPI +- Variant: 5 ## 1. Introduction -Brief motivation, problem context, expected outcome. +Задача умножения разреженных матриц является критически важной в научных вычислениях и машинном обучении. Использование формата **CCS (Compressed Column Storage)** позволяет эффективно работать с матрицами, где большинство элементов равны нулю, экономя память и время процессора за счет обработки только ненулевых значений. ## 2. Problem Statement -Formal task definition, input/output format, constraints. +Необходимо реализовать умножение двух разреженных матриц $A$ и $B$ в формате CCS для получения результирующей матрицы $C = A \times B$. + +**Входные данные:** +- Матрицы $A (m \times k)$ и $B (k \times n)$ в формате CCS. +- Формат включает три вектора: `values` (ненулевые значения), `row_indices` (индексы строк), `col_ptr` (индексы начала столбцов). + +**Выходные данные:** +- Результирующая матрица $C (m \times n)$ в формате CCS. + +**Ограничения:** +- Тип данных — `double`. +- Число столбцов $A$ равно числу строк $B$ ($A.n == B.m$). ## 3. Baseline Algorithm (Sequential) -Describe the base algorithm with enough detail to reproduce. +Последовательный алгоритм использует "столбцовую" логику: +1. Матрица $A$ предварительно транспонируется ($A^T$) для обеспечения быстрого доступа к строкам исходной матрицы. +2. Для каждого столбца $j$ матрицы $B$: + - Инициализируется плотный вектор-аккумулятор размера $m$. + - Проход по ненулевым элементам $B_{kj}$. + - Для каждого $B_{kj}$ выбирается соответствующий столбец $k$ из $A^T$ (бывшая строка $A$) и выполняется обновление аккумулятора: $C_{ij} += A_{ik} \times B_{kj}$. + - Ненулевые значения из аккумулятора упаковываются обратно в формат CCS. ## 4. Parallelization Scheme -- For MPI: data distribution, communication pattern/topology, rank roles. -- For threads: decomposition, scheduling, synchronization. -Diagrams or short pseudocode are welcome. +Для распараллеливания используется распределение столбцов выходной матрицы между MPI-процессами: -## 5. Implementation Details -- Code structure (files, key classes/functions) -- Important assumptions and corner cases -- Memory usage considerations +- **Data Distribution**: + - Процесс 0 транспонирует матрицу $A$ и рассылает её всем узлам через `MPI_Bcast`. + - Столбцы матрицы $B$ распределяются между процессами (декомпозиция по столбцам). +- **Rank Roles**: + - **Rank 0**: Подготовка данных, распределение блоков матрицы $B$, вычисление своей части и сборка финальной матрицы $C$ через `GatherResults`. + - **Workers**: Получение своей порции столбцов $B$, вычисление локальных столбцов $C$ и отправка их мастеру. +- **Communication Pattern**: + - Групповые рассылки (`MPI_Bcast`) для передачи матрицы $A$. + - Точечные обмены (`MPI_Send`/`MPI_Recv`) для распределения работы и сбора результатов. +## 5. Implementation Details +- **Код разбит на функциональные блоки**: + - `SparseMatrixCCS`: Основная структура данных. + - `BroadcastSparseMatrix`: Кастомная функция для передачи разреженной структуры через MPI. + - `GatherResults`: Логика объединения локальных CCS-структур в одну глобальную с пересчетом индексов. ## 6. Experimental Setup -- Hardware/OS: CPU model, cores/threads, RAM, OS version -- Toolchain: compiler, version, build type (Release/RelWithDebInfo) -- Environment: PPC_NUM_THREADS / PPC_NUM_PROC, other relevant vars -- Data: how test data is generated or sourced (relative paths) +**Hardware/OS:** + - **Процессор:** Процессор AMD Ryzen 5 5500U, ядер: 6, логических процессоров: 12 + - **Оперативная память:** 16 ГБ DDR4 + - **Операционная система:** Windows 10 Pro 22H2 +- **Toolchain:** + - **Компилятор:** g++ 13.3.0 + - **Тип сборки:** Release (-O3 ) + - **MPI:** Open MPI 4.1.6 ## 7. Results and Discussion ### 7.1 Correctness -Briefly explain how correctness was verified (reference results, invariants, unit tests). +Корректность подтверждена успешным прохождением тестов `agafonov_i_sparse_matrix_ccs_func`, где результаты параллельной версии сравнивались с последовательным эталоном. Все тесты `PASSED`. ### 7.2 Performance -Present time, speedup and efficiency. Example table: +На основе замеров в режиме `Release` на 4 процессах: | Mode | Count | Time, s | Speedup | Efficiency | |-------------|-------|---------|---------|------------| -| seq | 1 | 1.234 | 1.00 | N/A | -| omp | 2 | 0.700 | 1.76 | 88.0% | -| omp | 4 | 0.390 | 3.16 | 79.0% | +| seq | 1 | 0.0195 | 1.00 | 100 | +| mpi | 4 | 0.0431 | 0.45* | 11.2% | -Optionally add plots (use relative paths), and discuss bottlenecks and scalability limits. +*\* Примечание: На матрицах данного размера (1500x1500) время пересылки данных (коммуникационная составляющая) превышает время вычислений. Для получения реального ускорения (> 1.0) требуются матрицы размерностью от 3000x3000 и выше.* ## 8. Conclusions -Summarize findings and limitations. +Алгоритм успешно реализован и проходит все тесты на корректность. Реализация MPI-версии показала работоспособность механизмов распределения и сборки разреженных данных. Для повышения эффективности на малых данных рекомендуется использовать гибридный подход (MPI+OpenMP) или увеличивать вычислительную нагрузку на каждый процесс. ## 9. References -1.
-2. - -## Appendix (Optional) \ No newline at end of file +1. Материлы и документация по курсу +2. MPI Standard Specification: [https://www.mpi-forum.org/]. From d54535c2afade3489131cc92ab1b67f12fc2f031 Mon Sep 17 00:00:00 2001 From: ilya-agafonov Date: Sat, 3 Jan 2026 17:31:19 +0000 Subject: [PATCH 7/7] fix --- tasks/agafonov_i_sparse_matrix_ccs/report.md | 4 ++-- .../tests/performance/main.cpp | 11 ++++++----- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/tasks/agafonov_i_sparse_matrix_ccs/report.md b/tasks/agafonov_i_sparse_matrix_ccs/report.md index 325869ea07..4ae0185009 100644 --- a/tasks/agafonov_i_sparse_matrix_ccs/report.md +++ b/tasks/agafonov_i_sparse_matrix_ccs/report.md @@ -69,9 +69,9 @@ | Mode | Count | Time, s | Speedup | Efficiency | |-------------|-------|---------|---------|------------| | seq | 1 | 0.0195 | 1.00 | 100 | -| mpi | 4 | 0.0431 | 0.45* | 11.2% | +| mpi | 4 | 0.2072 | 0.52 | 11.2% | -*\* Примечание: На матрицах данного размера (1500x1500) время пересылки данных (коммуникационная составляющая) превышает время вычислений. Для получения реального ускорения (> 1.0) требуются матрицы размерностью от 3000x3000 и выше.* +*\* Примечание: На матрицах время пересылки данных (коммуникационная составляющая) превышает время вычислений. Для получения ускорения требуются матрицы размерностью от 3000x3000 и выше.* ## 8. Conclusions Алгоритм успешно реализован и проходит все тесты на корректность. Реализация MPI-версии показала работоспособность механизмов распределения и сборки разреженных данных. Для повышения эффективности на малых данных рекомендуется использовать гибридный подход (MPI+OpenMP) или увеличивать вычислительную нагрузку на каждый процесс. diff --git a/tasks/agafonov_i_sparse_matrix_ccs/tests/performance/main.cpp b/tasks/agafonov_i_sparse_matrix_ccs/tests/performance/main.cpp index 40e9ffa1e7..8140f895e4 100644 --- a/tasks/agafonov_i_sparse_matrix_ccs/tests/performance/main.cpp +++ b/tasks/agafonov_i_sparse_matrix_ccs/tests/performance/main.cpp @@ -33,18 +33,19 @@ static SparseMatrixCCS CreatePerfMatrix(int m, int n, double density) { class SparseMatrixPerfTests : public ppc::util::BaseRunPerfTests { protected: + const int k_size = 4000; void SetUp() override { - const int m = 1500; - const int k = 1500; - const int n = 1500; - const double density = 0.02; + const int m = k_size; + const int k = k_size; + const int n = k_size; + const double density = 0.01; input_data_.A = CreatePerfMatrix(m, k, density); input_data_.B = CreatePerfMatrix(k, n, density); } bool CheckTestOutputData(OutType &output_data) final { - return output_data.m == 1500 && output_data.n == 1500; + return output_data.m == k_size && output_data.n == k_size; } InType GetTestInputData() final {