diff --git a/include/micm/jit/solver/jit_lu_decomposition.inl b/include/micm/jit/solver/jit_lu_decomposition.inl index 9c4c94bfd..bb061030a 100644 --- a/include/micm/jit/solver/jit_lu_decomposition.inl +++ b/include/micm/jit/solver/jit_lu_decomposition.inl @@ -89,6 +89,15 @@ namespace micm func.SetArrayElement(func.arguments_[2], U_ptr_index, JitType::Double, A_val); func.EndLoop(loop); } + else { + auto loop = func.StartLoop("Uik_eq_zero_loop", 0, L); + llvm::Value *zero_val = llvm::ConstantFP::get(*(func.context_), llvm::APFloat(0.0)); + llvm::Value *iUf = llvm::ConstantInt::get(*(func.context_), llvm::APInt(64, uik_nkj->first)); + llvm::Value *U_ptr_index[1]; + U_ptr_index[0] = func.builder_->CreateNSWAdd(loop.index_, iUf); + func.SetArrayElement(func.arguments_[2], U_ptr_index, JitType::Double, zero_val); + func.EndLoop(loop); + } for (std::size_t ikj = 0; ikj < uik_nkj->second; ++ikj) { auto loop = func.StartLoop("Uik_seq_Lij_Ujk_loop", 0, L); @@ -137,6 +146,15 @@ namespace micm func.SetArrayElement(func.arguments_[1], L_ptr_index, JitType::Double, A_val); func.EndLoop(loop); } + else { + auto loop = func.StartLoop("Lki_eq_zero_loop", 0, L); + llvm::Value *zero_val = llvm::ConstantFP::get(*(func.context_), llvm::APFloat(0.0)); + llvm::Value *iLf = llvm::ConstantInt::get(*(func.context_), llvm::APInt(64, lki_nkj->first)); + llvm::Value *L_ptr_index[1]; + L_ptr_index[0] = func.builder_->CreateNSWAdd(loop.index_, iLf); + func.SetArrayElement(func.arguments_[1], L_ptr_index, JitType::Double, zero_val); + func.EndLoop(loop); + } for (std::size_t ikj = 0; ikj < lki_nkj->second; ++ikj) { auto loop = func.StartLoop("Lki_seq_Lkj_Uji_loop", 0, L); diff --git a/include/micm/solver/lu_decomposition.hpp b/include/micm/solver/lu_decomposition.hpp index 4c4cb9ec8..0b478a4ec 100644 --- a/include/micm/solver/lu_decomposition.hpp +++ b/include/micm/solver/lu_decomposition.hpp @@ -43,6 +43,13 @@ namespace micm /// For the sparse matrix algorithm, the indices of non-zero terms are stored in /// several arrays during construction. These arrays are iterated through during /// calls to Decompose to do the actual decomposition. + /// Our LU Decomposition only assigns the values of the jacobian to the LU matrices + /// when the *jacobian* is nonzero. However, the sparsity pattern of the jacobian doesn't + /// necessarily match that of the LU matrices. There can be more nonzero elements in the LU matrices + /// than in the jacobian. When this happens, we still need to assign the value of the jacobian matrix + /// to the LU matrix. This value is implicitly zero when the sparsity pattern differs. The Fill values + /// here do this implicit assignment + /// More detail in this issue: https://github.com/NCAR/micm/issues/625 class LuDecomposition { protected: diff --git a/include/micm/solver/lu_decomposition.inl b/include/micm/solver/lu_decomposition.inl index 2786202e3..331eae0fe 100644 --- a/include/micm/solver/lu_decomposition.inl +++ b/include/micm/solver/lu_decomposition.inl @@ -194,8 +194,12 @@ namespace micm // Upper trianglur matrix for (std::size_t iU = 0; iU < inLU.second; ++iU) { - if (*(do_aik++)) + if (*(do_aik++)){ U_vector[uik_nkj->first] = A_vector[*(aik++)]; + } + else { + U_vector[uik_nkj->first] = 0; + } for (std::size_t ikj = 0; ikj < uik_nkj->second; ++ikj) { U_vector[uik_nkj->first] -= L_vector[lij_ujk->first] * U_vector[lij_ujk->second]; @@ -207,8 +211,12 @@ namespace micm L_vector[(lki_nkj++)->first] = 1.0; for (std::size_t iL = 0; iL < inLU.first; ++iL) { - if (*(do_aki++)) + if (*(do_aki++)){ L_vector[lki_nkj->first] = A_vector[*(aki++)]; + } + else { + L_vector[lki_nkj->first] = 0; + } for (std::size_t ikj = 0; ikj < lki_nkj->second; ++ikj) { L_vector[lki_nkj->first] -= L_vector[lkj_uji->first] * U_vector[lkj_uji->second]; @@ -275,6 +283,9 @@ namespace micm std::copy(A_vector + *aik, A_vector + *aik + n_cells, U_vector + uik_nkj_first); ++aik; } + else { + std::fill(U_vector + uik_nkj_first, U_vector + uik_nkj_first + n_cells, 0); + } for (std::size_t ikj = 0; ikj < uik_nkj->second; ++ikj) { const std::size_t lij_ujk_first = lij_ujk->first; @@ -297,6 +308,9 @@ namespace micm std::copy(A_vector + *aki, A_vector + *aki + n_cells, L_vector + lki_nkj_first); ++aki; } + else { + std::fill(L_vector + lki_nkj_first, L_vector + lki_nkj_first + n_cells, 0); + } for (std::size_t ikj = 0; ikj < lki_nkj->second; ++ikj) { const std::size_t lkj_uji_first = lkj_uji->first; diff --git a/include/micm/solver/rosenbrock.inl b/include/micm/solver/rosenbrock.inl index 8f463cdc2..fd9f850a1 100644 --- a/include/micm/solver/rosenbrock.inl +++ b/include/micm/solver/rosenbrock.inl @@ -23,7 +23,7 @@ namespace micm const double h_max = parameters_.h_max_ == 0.0 ? time_step : std::min(time_step, parameters_.h_max_); const double h_start = parameters_.h_start_ == 0.0 ? std::max(parameters_.h_min_, DELTA_MIN) : std::min(h_max, parameters_.h_start_); - + SolverStats stats; double present_time = 0.0; @@ -243,6 +243,7 @@ namespace micm { double alpha = 1 / (H * gamma); static_cast(this)->AlphaMinusJacobian(state.jacobian_, alpha); + linear_solver_.Factor(state.jacobian_, state.lower_matrix_, state.upper_matrix_, singular); stats.decompositions_ += 1; diff --git a/include/micm/solver/solver_builder.inl b/include/micm/solver/solver_builder.inl index ee188134e..10677664a 100644 --- a/include/micm/solver/solver_builder.inl +++ b/include/micm/solver/solver_builder.inl @@ -386,7 +386,7 @@ namespace micm auto jacobian = BuildJacobian(nonzero_elements, this->number_of_grid_cells_, number_of_species); rates.SetJacobianFlatIds(jacobian); - LinearSolverPolicy linear_solver(jacobian, 1e-30); + LinearSolverPolicy linear_solver(jacobian, 0); std::vector variable_names{ number_of_species }; for (auto& species_pair : species_map) diff --git a/include/micm/util/vector_matrix.hpp b/include/micm/util/vector_matrix.hpp index 91171c694..dbc9c1a9c 100644 --- a/include/micm/util/vector_matrix.hpp +++ b/include/micm/util/vector_matrix.hpp @@ -228,6 +228,17 @@ namespace micm return L; } + void Print() const { + for (std::size_t i = 0; i < x_dim_; ++i) + { + for (std::size_t j = 0; j < y_dim_; ++j) + { + std::cout << (*this)[i][j] << " "; + } + std::cout << std::endl; + } + } + /// @brief Set every matrix element to a given value /// @param val Value to set each element to void Fill(T val) diff --git a/src/solver/lu_decomposition.cu b/src/solver/lu_decomposition.cu index 3941cd8c1..90301c1e1 100644 --- a/src/solver/lu_decomposition.cu +++ b/src/solver/lu_decomposition.cu @@ -56,19 +56,21 @@ namespace micm auto inLU = d_niLU[i]; for (size_t iU = 0; iU < inLU.second; ++iU) { + size_t U_idx = d_uik_nkj[uik_nkj_offset].first + tid; if (d_do_aik[do_aik_offset++]) { - size_t U_idx = d_uik_nkj[uik_nkj_offset].first + tid; size_t A_idx = d_aik[aik_offset++] + tid; d_U[U_idx] = d_A[A_idx]; } + else { + d_U[U_idx] = 0; + } for (size_t ikj = 0; ikj < d_uik_nkj[uik_nkj_offset].second; ++ikj) { - size_t U_idx_1 = d_uik_nkj[uik_nkj_offset].first + tid; size_t L_idx = d_lij_ujk[lij_ujk_offset].first + tid; size_t U_idx_2 = d_lij_ujk[lij_ujk_offset].second + tid; - d_U[U_idx_1] -= d_L[L_idx] * d_U[U_idx_2]; + d_U[U_idx] -= d_L[L_idx] * d_U[U_idx_2]; ++lij_ujk_offset; } ++uik_nkj_offset; @@ -79,12 +81,16 @@ namespace micm for (size_t iL = 0; iL < inLU.first; ++iL) { + size_t L_idx = d_lki_nkj[lki_nkj_offset].first + tid; if (d_do_aki[do_aki_offset++]) { - size_t L_idx = d_lki_nkj[lki_nkj_offset].first + tid; size_t A_idx = d_aki[aki_offset++] + tid; d_L[L_idx] = d_A[A_idx]; } + else { + d_L[L_idx] = 0; + } + for (size_t ikj = 0; ikj < d_lki_nkj[lki_nkj_offset].second; ++ikj) { size_t L_idx_1 = d_lki_nkj[lki_nkj_offset].first + tid; diff --git a/src/version.hpp.in b/src/version.hpp.in index 0cb11638d..45abc056c 100644 --- a/src/version.hpp.in +++ b/src/version.hpp.in @@ -1,3 +1,5 @@ +// Copyright (C) 2023-2024 National Center for Atmospheric Research +// SPDX-License-Identifier: Apache-2.0 // clang-format off #pragma once diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt index 8d68575cc..20a87f802 100644 --- a/test/unit/CMakeLists.txt +++ b/test/unit/CMakeLists.txt @@ -1,4 +1,4 @@ -if(MICM_ENABLE_JSON) +if(MICM_ENABLE_CONFIG_READER) add_subdirectory(configure) endif() if(MICM_ENABLE_CUDA) diff --git a/test/unit/configure/process/test_user_defined_config.cpp b/test/unit/configure/process/test_user_defined_config.cpp index ae2f02b40..ad37679c4 100644 --- a/test/unit/configure/process/test_user_defined_config.cpp +++ b/test/unit/configure/process/test_user_defined_config.cpp @@ -46,9 +46,9 @@ TEST(UserDefinedConfig, ParseConfig) // first reaction { EXPECT_EQ(process_vector[0].reactants_.size(), 3); - EXPECT_EQ(process_vector[0].reactants_[0].name_, "bar"); + EXPECT_EQ(process_vector[0].reactants_[0].name_, "foo"); EXPECT_EQ(process_vector[0].reactants_[1].name_, "bar"); - EXPECT_EQ(process_vector[0].reactants_[2].name_, "foo"); + EXPECT_EQ(process_vector[0].reactants_[2].name_, "bar"); EXPECT_EQ(process_vector[0].products_.size(), 2); EXPECT_EQ(process_vector[0].products_[0].first.name_, "baz"); EXPECT_EQ(process_vector[0].products_[0].second, 1.4); diff --git a/test/unit/cuda/solver/test_cuda_linear_solver.cpp b/test/unit/cuda/solver/test_cuda_linear_solver.cpp index 43430f566..81a4cf817 100644 --- a/test/unit/cuda/solver/test_cuda_linear_solver.cpp +++ b/test/unit/cuda/solver/test_cuda_linear_solver.cpp @@ -41,7 +41,7 @@ std::vector linearSolverGenerator(std::size_t number_of_blocks) auto gen_bool = std::bind(std::uniform_int_distribution<>(0, 1), std::default_random_engine()); auto get_double = std::bind(std::lognormal_distribution(-2.0, 2.0), std::default_random_engine()); - auto builder = SparseMatrixPolicy::Create(10).SetNumberOfBlocks(number_of_blocks).InitialValue(1.0e-30); + auto builder = SparseMatrixPolicy::Create(10).SetNumberOfBlocks(number_of_blocks).InitialValue(0); for (std::size_t i = 0; i < 10; ++i) for (std::size_t j = 0; j < 10; ++j) if (i == j || gen_bool()) @@ -66,9 +66,9 @@ std::vector linearSolverGenerator(std::size_t number_of_blocks) CopyToDeviceDense(b); CopyToDeviceDense(x); - LinearSolverPolicy solver = LinearSolverPolicy(A, 1.0e-30); + LinearSolverPolicy solver = LinearSolverPolicy(A, 0); std::pair lu = - micm::LuDecomposition::GetLUMatrices(A, 1.0e-30); + micm::LuDecomposition::GetLUMatrices(A, 0); SparseMatrixPolicy lower_matrix = std::move(lu.first); SparseMatrixPolicy upper_matrix = std::move(lu.second); @@ -138,3 +138,17 @@ TEST(CudaLinearSolver, RandomMatrixVectorOrderingForGPU) { verify_gpu_against_cpu(); } + +TEST(CudaLinearSolver, AgnosticToInitialValue) +{ + double initial_values[5] = { -INFINITY, -1.0, 0.0, 1.0, INFINITY }; + for(auto initial_value : initial_values) + { + testExtremeInitialValue>(1, initial_value); + testExtremeInitialValue>(20, initial_value); + testExtremeInitialValue>( + 300, initial_value); + testExtremeInitialValue>( + 4000, initial_value); + } +} \ No newline at end of file diff --git a/test/unit/cuda/solver/test_cuda_lu_decomposition.cpp b/test/unit/cuda/solver/test_cuda_lu_decomposition.cpp index 17bae734f..a8a57d541 100644 --- a/test/unit/cuda/solver/test_cuda_lu_decomposition.cpp +++ b/test/unit/cuda/solver/test_cuda_lu_decomposition.cpp @@ -20,7 +20,7 @@ void testCudaRandomMatrix(size_t n_grids) auto gen_bool = std::bind(std::uniform_int_distribution<>(0, 1), std::default_random_engine()); auto get_double = std::bind(std::lognormal_distribution(-2.0, 2.0), std::default_random_engine()); - auto builder = CPUSparseMatrixPolicy::Create(10).SetNumberOfBlocks(n_grids).InitialValue(1.0e-30); + auto builder = CPUSparseMatrixPolicy::Create(10).SetNumberOfBlocks(n_grids).InitialValue(0); for (std::size_t i = 0; i < 10; ++i) for (std::size_t j = 0; j < 10; ++j) if (i == j || gen_bool()) @@ -29,17 +29,24 @@ void testCudaRandomMatrix(size_t n_grids) CPUSparseMatrixPolicy cpu_A(builder); GPUSparseMatrixPolicy gpu_A(builder); + // for nvhpc, the lognormal distribution produces significantly different values + // for very large numbers of grid cells + // To keep the accuracy on the check results function small, we only generat 1 blocks worth of + // random values and then copy that into every other block for (std::size_t i = 0; i < 10; ++i) for (std::size_t j = 0; j < 10; ++j) - if (!cpu_A.IsZero(i, j)) - for (std::size_t i_block = 0; i_block < n_grids; ++i_block) + if (!cpu_A.IsZero(i, j)) { + cpu_A[0][i][j] = get_double(); + gpu_A[0][i][j] = cpu_A[0][i][j]; + for (std::size_t i_block = 1; i_block < n_grids; ++i_block) { - cpu_A[i_block][i][j] = get_double(); - gpu_A[i_block][i][j] = cpu_A[i_block][i][j]; + cpu_A[i_block][i][j] = cpu_A[0][i][j]; + gpu_A[i_block][i][j] = cpu_A[0][i][j]; } + } micm::CudaLuDecomposition gpu_lud(gpu_A); - auto gpu_LU = micm::CudaLuDecomposition::GetLUMatrices(gpu_A, 1.0e-30); + auto gpu_LU = micm::CudaLuDecomposition::GetLUMatrices(gpu_A, 0); gpu_A.CopyToDevice(); gpu_LU.first.CopyToDevice(); gpu_LU.second.CopyToDevice(); @@ -47,10 +54,10 @@ void testCudaRandomMatrix(size_t n_grids) gpu_LU.first.CopyToHost(); gpu_LU.second.CopyToHost(); check_results( - gpu_A, gpu_LU.first, gpu_LU.second, [&](const double a, const double b) -> void { EXPECT_NEAR(a, b, 1.0e-5); }); + gpu_A, gpu_LU.first, gpu_LU.second, [&](const double a, const double b) -> void { EXPECT_NEAR(a, b, 1.0e-10); }); micm::LuDecomposition cpu_lud = micm::LuDecomposition::Create(cpu_A); - auto cpu_LU = micm::LuDecomposition::GetLUMatrices(cpu_A, 1.0e-30); + auto cpu_LU = micm::LuDecomposition::GetLUMatrices(cpu_A, 0); bool singular{ false }; cpu_lud.Decompose(cpu_A, cpu_LU.first, cpu_LU.second, singular); @@ -65,13 +72,13 @@ void testCudaRandomMatrix(size_t n_grids) { auto gpu_L = gpu_L_vector[i]; auto cpu_L = cpu_L_vector[i]; - EXPECT_LT(std::abs((gpu_L - cpu_L) / cpu_L), 1.0e-5); + EXPECT_LT(std::abs((gpu_L-cpu_L)/cpu_L), 1.0e-10); }; for (int j = 0; j < U_size; ++j) { auto gpu_U = gpu_U_vector[j]; auto cpu_U = cpu_U_vector[j]; - EXPECT_LT(std::abs((gpu_U - cpu_U) / cpu_U), 1.0e-5); + EXPECT_LT(std::abs((gpu_U-cpu_U)/cpu_U), 1.0e-10); }; } @@ -91,4 +98,15 @@ TEST(CudaLuDecomposition, RandomMatrixVectorOrdering) testCudaRandomMatrix(100); testCudaRandomMatrix(1000); testCudaRandomMatrix(100000); +} + +TEST(CudaLuDecomposition, AgnosticToInitialValue) +{ + double initial_values[5] = { -INFINITY, -1.0, 0.0, 1.0, INFINITY }; + for(auto& value : initial_values) { + testExtremeValueInitialization(1, value); + testExtremeValueInitialization(100, value); + testExtremeValueInitialization(1000, value); + testExtremeValueInitialization(100000, value); + } } \ No newline at end of file diff --git a/test/unit/jit/solver/test_jit_linear_solver.cpp b/test/unit/jit/solver/test_jit_linear_solver.cpp index abeccee02..795b73152 100644 --- a/test/unit/jit/solver/test_jit_linear_solver.cpp +++ b/test/unit/jit/solver/test_jit_linear_solver.cpp @@ -42,4 +42,12 @@ TEST(JitLinearSolver, DiagonalMatrixVectorOrdering) testDiagonalMatrix>(2); testDiagonalMatrix>(3); testDiagonalMatrix>(4); +} + +TEST(JitLinearSolver, AgnosticToInitialValue) +{ + double initial_values[5] = { -INFINITY, INFINITY }; + for(auto initial_value : initial_values) { + testExtremeInitialValue>(1, initial_value); + } } \ No newline at end of file diff --git a/test/unit/jit/solver/test_jit_lu_decomposition.cpp b/test/unit/jit/solver/test_jit_lu_decomposition.cpp index 455a39bf7..7901f4855 100644 --- a/test/unit/jit/solver/test_jit_lu_decomposition.cpp +++ b/test/unit/jit/solver/test_jit_lu_decomposition.cpp @@ -42,3 +42,14 @@ TEST(JitLuDecomposition, DiagonalMatrixVectorOrdering) testDiagonalMatrix>(3); testDiagonalMatrix>(4); } + +TEST(JitLuDecomposition, AgnosticToInitialValue) +{ + double initial_values[5] = { -INFINITY, -1.0, 0.0, 1.0, INFINITY }; + for(auto& value : initial_values) { + testExtremeValueInitialization>(1, value); + testExtremeValueInitialization>(2, value); + testExtremeValueInitialization>(3, value); + testExtremeValueInitialization>(4, value); + } +} \ No newline at end of file diff --git a/test/unit/solver/test_linear_solver.cpp b/test/unit/solver/test_linear_solver.cpp index c1268934c..4062220b9 100644 --- a/test/unit/solver/test_linear_solver.cpp +++ b/test/unit/solver/test_linear_solver.cpp @@ -35,6 +35,13 @@ TEST(LinearSolver, DiagonalMarkowitzReorder) testMarkowitzReordering, SparseMatrixTest>(); } +TEST(LinearSolver, StandardOrderingAgnosticToInitialValue) +{ + double initial_values[5] = { -INFINITY, -1.0, 0.0, 1.0, INFINITY }; + for(auto initial_value : initial_values) + testExtremeInitialValue>(5, initial_value); +} + using Group1VectorMatrix = micm::VectorMatrix; using Group2VectorMatrix = micm::VectorMatrix; using Group3VectorMatrix = micm::VectorMatrix; @@ -61,6 +68,17 @@ TEST(LinearSolver, RandomMatrixVectorOrdering) testRandomMatrix>(5); } +TEST(LinearSolver, VectorOrderingAgnosticToInitialValue) +{ + double initial_values[5] = { -INFINITY, -1.0, 0.0, 1.0, INFINITY }; + for(auto initial_value : initial_values) { + testExtremeInitialValue>(1, initial_value); + testExtremeInitialValue>(2, initial_value); + testExtremeInitialValue>(5, initial_value); + testExtremeInitialValue>(5, initial_value); + } +} + TEST(LinearSolver, DiagonalMatrixVectorOrdering) { testDiagonalMatrix>(5); diff --git a/test/unit/solver/test_linear_solver_policy.hpp b/test/unit/solver/test_linear_solver_policy.hpp index cc6bd31f3..a8db34e16 100644 --- a/test/unit/solver/test_linear_solver_policy.hpp +++ b/test/unit/solver/test_linear_solver_policy.hpp @@ -96,7 +96,7 @@ void testDenseMatrix() using FloatingPointType = typename MatrixPolicy::value_type; SparseMatrixPolicy A = SparseMatrixPolicy(SparseMatrixPolicy::Create(3) - .InitialValue(1.0e-30) + .InitialValue(0) .WithElement(0, 0) .WithElement(0, 1) .WithElement(0, 2) @@ -130,8 +130,8 @@ void testDenseMatrix() CopyToDeviceDense(b); CopyToDeviceDense(x); - LinearSolverPolicy solver = LinearSolverPolicy(A, 1.0e-30); - auto lu = micm::LuDecomposition::GetLUMatrices(A, 1.0e-30); + LinearSolverPolicy solver = LinearSolverPolicy(A, 0); + auto lu = micm::LuDecomposition::GetLUMatrices(A, 0); auto lower_matrix = std::move(lu.first); auto upper_matrix = std::move(lu.second); bool is_singular = false; @@ -158,7 +158,7 @@ void testRandomMatrix(std::size_t number_of_blocks) auto gen_bool = std::bind(std::uniform_int_distribution<>(0, 1), std::default_random_engine()); auto get_double = std::bind(std::lognormal_distribution(-2.0, 2.0), std::default_random_engine()); - auto builder = SparseMatrixPolicy::Create(10).SetNumberOfBlocks(number_of_blocks).InitialValue(1.0e-30); + auto builder = SparseMatrixPolicy::Create(10).SetNumberOfBlocks(number_of_blocks).InitialValue(0); for (std::size_t i = 0; i < 10; ++i) for (std::size_t j = 0; j < 10; ++j) if (i == j || gen_bool()) @@ -184,8 +184,8 @@ void testRandomMatrix(std::size_t number_of_blocks) CopyToDeviceSparse(A); CopyToDeviceDense(x); - LinearSolverPolicy solver = LinearSolverPolicy(A, 1.0e-30); - auto lu = micm::LuDecomposition::GetLUMatrices(A, 1.0e-30); + LinearSolverPolicy solver = LinearSolverPolicy(A, 0); + auto lu = micm::LuDecomposition::GetLUMatrices(A, 0); auto lower_matrix = std::move(lu.first); auto upper_matrix = std::move(lu.second); bool is_singular = false; @@ -201,7 +201,75 @@ void testRandomMatrix(std::size_t number_of_blocks) CopyToHostDense(x); check_results( - A, b, x, [&](const FloatingPointType a, const FloatingPointType b) -> void { EXPECT_NEAR(a, b, 1.0e-5); }); + A, b, x, [&](const FloatingPointType a, const FloatingPointType b) -> void { EXPECT_NEAR(a, b, 1.0e-6); }); +} + +template +void testExtremeInitialValue(std::size_t number_of_blocks, double initial_value) +{ + using FloatingPointType = typename MatrixPolicy::value_type; + + const unsigned int seed = 12345; + std::mt19937 generator(seed); + const double point_five = 0.5; + const double two = 2.0; + + auto gen_bool = std::bind(std::bernoulli_distribution(point_five), generator); + auto get_double = std::bind(std::lognormal_distribution(-two, two), generator); + const size_t size = 30; + + auto builder = SparseMatrixPolicy::Create(size).SetNumberOfBlocks(number_of_blocks).InitialValue(0); + for (std::size_t i = 0; i < size; ++i) { + for (std::size_t j = 0; j < size; ++j) { + if (i == j || gen_bool()) { + builder = builder.WithElement(i, j); + } + } + } + + SparseMatrixPolicy A(builder); + MatrixPolicy b(number_of_blocks, size, 0.0); + MatrixPolicy x(number_of_blocks, size, 0.0); + + for (std::size_t i = 0; i < size; ++i) + for (std::size_t j = 0; j < size; ++j) + if (!A.IsZero(i, j)) + for (std::size_t i_block = 0; i_block < number_of_blocks; ++i_block) + A[i_block][i][j] = get_double(); + + for (std::size_t i = 0; i < size; ++i) + for (std::size_t i_block = 0; i_block < number_of_blocks; ++i_block) + b[i_block][i] = get_double(); + + x = b; + + // Only copy the data to the device when it is a CudaMatrix + CopyToDeviceSparse(A); + CopyToDeviceDense(x); + + LinearSolverPolicy solver = LinearSolverPolicy(A, initial_value); + auto lu = micm::LuDecomposition::GetLUMatrices(A, initial_value); + auto lower_matrix = std::move(lu.first); + auto upper_matrix = std::move(lu.second); + bool is_singular = false; + + // Only copy the data to the device when it is a CudaMatrix + CopyToDeviceSparse(lower_matrix); + CopyToDeviceSparse(upper_matrix); + + solver.Factor(A, lower_matrix, upper_matrix, is_singular); + + // Only copy the data to the host when it is a CudaMatrix + CopyToHostDense(lower_matrix); + CopyToHostDense(upper_matrix); + + solver.template Solve(x, lower_matrix, upper_matrix); + + // Only copy the data to the host when it is a CudaMatrix + CopyToHostDense(x); + + check_results( + A, b, x, [&](const FloatingPointType a, const FloatingPointType b) -> void { EXPECT_NEAR(a, b, 2.0e-06); }); } template @@ -211,7 +279,7 @@ void testDiagonalMatrix(std::size_t number_of_blocks) auto get_double = std::bind(std::lognormal_distribution(-2.0, 4.0), std::default_random_engine()); - auto builder = SparseMatrixPolicy::Create(6).SetNumberOfBlocks(number_of_blocks).InitialValue(1.0e-30); + auto builder = SparseMatrixPolicy::Create(6).SetNumberOfBlocks(number_of_blocks).InitialValue(0); for (std::size_t i = 0; i < 6; ++i) builder = builder.WithElement(i, i); @@ -233,8 +301,8 @@ void testDiagonalMatrix(std::size_t number_of_blocks) CopyToDeviceSparse(A); CopyToDeviceDense(x); - LinearSolverPolicy solver = LinearSolverPolicy(A, 1.0e-30); - auto lu = micm::LuDecomposition::GetLUMatrices(A, 1.0e-30); + LinearSolverPolicy solver = LinearSolverPolicy(A, 0); + auto lu = micm::LuDecomposition::GetLUMatrices(A, 0); auto lower_matrix = std::move(lu.first); auto upper_matrix = std::move(lu.second); bool is_singular = false; @@ -298,4 +366,4 @@ void testMarkowitzReordering() EXPECT_GT( orig_LU.first.RowIdsVector().size() + orig_LU.second.RowIdsVector().size(), reordered_LU.first.RowIdsVector().size() + reordered_LU.second.RowIdsVector().size()); -} \ No newline at end of file +} diff --git a/test/unit/solver/test_lu_decomposition.cpp b/test/unit/solver/test_lu_decomposition.cpp index 577ac567e..fff36d369 100644 --- a/test/unit/solver/test_lu_decomposition.cpp +++ b/test/unit/solver/test_lu_decomposition.cpp @@ -25,6 +25,7 @@ TEST(LuDecomposition, SingularMatrixStandardOrdering) TEST(LuDecomposition, RandomMatrixStandardOrdering) { + testRandomMatrix(1); testRandomMatrix(5); } @@ -33,6 +34,14 @@ TEST(LuDecomposition, DiagonalMatrixStandardOrdering) testDiagonalMatrix(5); } +TEST(LuDecomposition, AgnosticToInitialValue) +{ + double initial_values[5] = { -INFINITY, -1.0, 0.0, 1.0, INFINITY }; + for(auto& value : initial_values) { + testExtremeValueInitialization(5, value); + } +} + TEST(LuDecomposition, DenseMatrixVectorOrdering) { testDenseMatrix(); @@ -64,3 +73,14 @@ TEST(LuDecomposition, DiagonalMatrixVectorOrdering) testDiagonalMatrix(5); testDiagonalMatrix(5); } + +TEST(LuDecomposition, VectorOrderingAgnosticToInitialValue) +{ + double initial_values[5] = { -INFINITY, -1.0, 0.0, 1.0, INFINITY }; + for(auto& value : initial_values) { + testExtremeValueInitialization(5, value); + testExtremeValueInitialization(5, value); + testExtremeValueInitialization(5, value); + testExtremeValueInitialization(5, value); + } +} \ No newline at end of file diff --git a/test/unit/solver/test_lu_decomposition_policy.hpp b/test/unit/solver/test_lu_decomposition_policy.hpp index 041282fee..b62838652 100644 --- a/test/unit/solver/test_lu_decomposition_policy.hpp +++ b/test/unit/solver/test_lu_decomposition_policy.hpp @@ -7,6 +7,28 @@ #include #include +template +void CopyToDevice(MatrixPolicy& matrix) +{ + if constexpr (requires { + { + matrix.CopyToDevice() + } -> std::same_as; + }) + matrix.CopyToDevice(); +} + +template +void CopyToHost(MatrixPolicy& matrix) +{ + if constexpr (requires { + { + matrix.CopyToHost() + } -> std::same_as; + }) + matrix.CopyToHost(); +} + template void check_results( const SparseMatrixPolicy& A, @@ -77,7 +99,7 @@ template void testDenseMatrix() { SparseMatrixPolicy A = SparseMatrixPolicy(SparseMatrixPolicy::Create(3) - .InitialValue(1.0e-30) + .InitialValue(0) .WithElement(0, 0) .WithElement(0, 1) .WithElement(0, 2) @@ -99,18 +121,18 @@ void testDenseMatrix() A[0][2][2] = 8; LuDecompositionPolicy lud = LuDecompositionPolicy(A); - auto LU = micm::LuDecomposition::GetLUMatrices(A, 1.0e-30); + auto LU = micm::LuDecomposition::GetLUMatrices(A, 0); bool is_singular{ false }; lud.template Decompose(A, LU.first, LU.second, is_singular); check_results( - A, LU.first, LU.second, [&](const double a, const double b) -> void { EXPECT_NEAR(a, b, 1.0e-5); }); + A, LU.first, LU.second, [&](const double a, const double b) -> void { EXPECT_NEAR(a, b, 1.0e-10); }); } template void testSingularMatrix() { SparseMatrixPolicy A = SparseMatrixPolicy( - SparseMatrixPolicy::Create(2).InitialValue(1.0e-30).WithElement(0, 0).WithElement(0, 1).WithElement(1, 0).WithElement( + SparseMatrixPolicy::Create(2).InitialValue(0).WithElement(0, 0).WithElement(0, 1).WithElement(1, 0).WithElement( 1, 1)); A[0][0][0] = 0; @@ -134,7 +156,7 @@ void testRandomMatrix(std::size_t number_of_blocks) auto gen_bool = std::bind(std::uniform_int_distribution<>(0, 1), std::default_random_engine()); auto get_double = std::bind(std::lognormal_distribution(-2.0, 2.0), std::default_random_engine()); - auto builder = SparseMatrixPolicy::Create(10).SetNumberOfBlocks(number_of_blocks).InitialValue(1.0e-30); + auto builder = SparseMatrixPolicy::Create(10).SetNumberOfBlocks(number_of_blocks).InitialValue(0); for (std::size_t i = 0; i < 10; ++i) for (std::size_t j = 0; j < 10; ++j) if (i == j || gen_bool()) @@ -149,11 +171,61 @@ void testRandomMatrix(std::size_t number_of_blocks) A[i_block][i][j] = get_double(); LuDecompositionPolicy lud = LuDecompositionPolicy(A); - auto LU = micm::LuDecomposition::GetLUMatrices(A, 1.0e-30); + auto LU = micm::LuDecomposition::GetLUMatrices(A, 0); + bool is_singular{ false }; + lud.template Decompose(A, LU.first, LU.second, is_singular); + check_results( + A, LU.first, LU.second, [&](const double a, const double b) -> void { + EXPECT_NEAR(a, b, 1.0e-9); + }); +} + +template +void testExtremeValueInitialization(std::size_t number_of_blocks, double initial_value) +{ + auto gen_bool = std::bind(std::uniform_int_distribution<>(0, 1), std::default_random_engine()); + auto get_double = std::bind(std::lognormal_distribution(-2.0, 2.0), std::default_random_engine()); + auto size = 10; + + auto builder = SparseMatrixPolicy::Create(10).SetNumberOfBlocks(number_of_blocks).InitialValue(initial_value); + for (std::size_t i = 0; i < size; ++i) + for (std::size_t j = 0; j < size; ++j) + if (i == j || gen_bool()) + builder = builder.WithElement(i, j); + + SparseMatrixPolicy A(builder); + + // for nvhpc, the lognormal distribution produces significantly different values + // for very large numbers of grid cells + // To keep the accuracy on the check results function small, we only generat 1 blocks worth of + // random values and then copy that into every other block + for (std::size_t i = 0; i < size; ++i) + for (std::size_t j = 0; j < size; ++j) + if (!A.IsZero(i, j)){ + A[0][i][j] = get_double(); + for (std::size_t i_block = 1; i_block < number_of_blocks; ++i_block) + A[i_block][i][j] = A[0][i][j]; + } + + LuDecompositionPolicy lud = LuDecompositionPolicy(A); + + auto LU = micm::LuDecomposition::GetLUMatrices(A, initial_value); + + CopyToDevice(A); + CopyToDevice(LU.first); + CopyToDevice(LU.second); + bool is_singular{ false }; + lud.template Decompose(A, LU.first, LU.second, is_singular); + + CopyToHost(LU.first); + CopyToHost(LU.second); + check_results( - A, LU.first, LU.second, [&](const double a, const double b) -> void { EXPECT_NEAR(a, b, 1.0e-5); }); + A, LU.first, LU.second, [&](const double a, const double b) -> void { + EXPECT_NEAR(a, b, 1.0e-09); + }); } template @@ -161,7 +233,7 @@ void testDiagonalMatrix(std::size_t number_of_blocks) { auto get_double = std::bind(std::lognormal_distribution(-2.0, 4.0), std::default_random_engine()); - auto builder = SparseMatrixPolicy::Create(6).SetNumberOfBlocks(number_of_blocks).InitialValue(1.0e-30); + auto builder = SparseMatrixPolicy::Create(6).SetNumberOfBlocks(number_of_blocks).InitialValue(0); for (std::size_t i = 0; i < 6; ++i) builder = builder.WithElement(i, i); @@ -172,9 +244,9 @@ void testDiagonalMatrix(std::size_t number_of_blocks) A[i_block][i][i] = get_double(); LuDecompositionPolicy lud = LuDecompositionPolicy(A); - auto LU = micm::LuDecomposition::GetLUMatrices(A, 1.0e-30); + auto LU = micm::LuDecomposition::GetLUMatrices(A, 0); bool is_singular{ false }; lud.template Decompose(A, LU.first, LU.second, is_singular); check_results( - A, LU.first, LU.second, [&](const double a, const double b) -> void { EXPECT_NEAR(a, b, 1.0e-5); }); + A, LU.first, LU.second, [&](const double a, const double b) -> void { EXPECT_NEAR(a, b, 1.0e-10); }); } \ No newline at end of file