NCAR · K20shores · Sep 24, 2024 · Sep 20, 2024 · Sep 20, 2024 · Sep 20, 2024
diff --git a/include/micm/jit/solver/jit_lu_decomposition.inl b/include/micm/jit/solver/jit_lu_decomposition.inl
@@ -89,6 +89,15 @@ namespace micm
           func.SetArrayElement(func.arguments_[2], U_ptr_index, JitType::Double, A_val);
           func.EndLoop(loop);
         }
+        else {
+          auto loop = func.StartLoop("Uik_eq_zero_loop", 0, L);
+          llvm::Value *zero_val = llvm::ConstantFP::get(*(func.context_), llvm::APFloat(0.0));
+          llvm::Value *iUf = llvm::ConstantInt::get(*(func.context_), llvm::APInt(64, uik_nkj->first));
+          llvm::Value *U_ptr_index[1];
+          U_ptr_index[0] = func.builder_->CreateNSWAdd(loop.index_, iUf);
+          func.SetArrayElement(func.arguments_[2], U_ptr_index, JitType::Double, zero_val);
+          func.EndLoop(loop);
+        }
         for (std::size_t ikj = 0; ikj < uik_nkj->second; ++ikj)
         {
           auto loop = func.StartLoop("Uik_seq_Lij_Ujk_loop", 0, L);
@@ -137,6 +146,15 @@ namespace micm
           func.SetArrayElement(func.arguments_[1], L_ptr_index, JitType::Double, A_val);
           func.EndLoop(loop);
         }
+        else {
+          auto loop = func.StartLoop("Lki_eq_zero_loop", 0, L);
+          llvm::Value *zero_val = llvm::ConstantFP::get(*(func.context_), llvm::APFloat(0.0));
+          llvm::Value *iLf = llvm::ConstantInt::get(*(func.context_), llvm::APInt(64, lki_nkj->first));
+          llvm::Value *L_ptr_index[1];
+          L_ptr_index[0] = func.builder_->CreateNSWAdd(loop.index_, iLf);
+          func.SetArrayElement(func.arguments_[1], L_ptr_index, JitType::Double, zero_val);
+          func.EndLoop(loop);
+        }
         for (std::size_t ikj = 0; ikj < lki_nkj->second; ++ikj)
         {
           auto loop = func.StartLoop("Lki_seq_Lkj_Uji_loop", 0, L);

diff --git a/include/micm/solver/lu_decomposition.hpp b/include/micm/solver/lu_decomposition.hpp
@@ -43,6 +43,13 @@ namespace micm
   /// For the sparse matrix algorithm, the indices of non-zero terms are stored in
   /// several arrays during construction. These arrays are iterated through during
   /// calls to Decompose to do the actual decomposition.
+  /// Our LU Decomposition only assigns the values of the jacobian to the LU matrices
+  /// when the *jacobian* is nonzero. However, the sparsity pattern of the jacobian doesn't
+  /// necessarily match that of the LU matrices. There can be more nonzero elements in the LU matrices
+  /// than in the jacobian. When this happens, we still need to assign the value of the jacobian matrix
+  /// to the LU matrix. This value is implicitly zero when the sparsity pattern differs. The Fill values
+  /// here do this implicit assignment
+  /// More detail in this issue: https://github.com/NCAR/micm/issues/625
   class LuDecomposition
   {
    protected:

diff --git a/include/micm/solver/lu_decomposition.inl b/include/micm/solver/lu_decomposition.inl
@@ -194,8 +194,12 @@ namespace micm
         // Upper trianglur matrix
         for (std::size_t iU = 0; iU < inLU.second; ++iU)
         {
-          if (*(do_aik++))
+          if (*(do_aik++)){
             U_vector[uik_nkj->first] = A_vector[*(aik++)];
+          }
+          else {
+            U_vector[uik_nkj->first] = 0;
+          }
           for (std::size_t ikj = 0; ikj < uik_nkj->second; ++ikj)
           {
             U_vector[uik_nkj->first] -= L_vector[lij_ujk->first] * U_vector[lij_ujk->second];
@@ -207,8 +211,12 @@ namespace micm
         L_vector[(lki_nkj++)->first] = 1.0;
         for (std::size_t iL = 0; iL < inLU.first; ++iL)
         {
-          if (*(do_aki++))
+          if (*(do_aki++)){
             L_vector[lki_nkj->first] = A_vector[*(aki++)];
+          }
+          else {
+            L_vector[lki_nkj->first] = 0;
+          }
           for (std::size_t ikj = 0; ikj < lki_nkj->second; ++ikj)
           {
             L_vector[lki_nkj->first] -= L_vector[lkj_uji->first] * U_vector[lkj_uji->second];
@@ -275,6 +283,9 @@ namespace micm
             std::copy(A_vector + *aik, A_vector + *aik + n_cells, U_vector + uik_nkj_first);
             ++aik;
           }
+          else {
+            std::fill(U_vector + uik_nkj_first, U_vector + uik_nkj_first + n_cells, 0);
+          }
           for (std::size_t ikj = 0; ikj < uik_nkj->second; ++ikj)
           {
             const std::size_t lij_ujk_first = lij_ujk->first;
@@ -297,6 +308,9 @@ namespace micm
             std::copy(A_vector + *aki, A_vector + *aki + n_cells, L_vector + lki_nkj_first);
             ++aki;
           }
+          else {
+            std::fill(L_vector + lki_nkj_first, L_vector + lki_nkj_first + n_cells, 0);
+          }
           for (std::size_t ikj = 0; ikj < lki_nkj->second; ++ikj)
           {
             const std::size_t lkj_uji_first = lkj_uji->first;

diff --git a/include/micm/solver/rosenbrock.inl b/include/micm/solver/rosenbrock.inl
@@ -23,7 +23,7 @@ namespace micm
     const double h_max = parameters_.h_max_ == 0.0 ? time_step : std::min(time_step, parameters_.h_max_);
     const double h_start =
         parameters_.h_start_ == 0.0 ? std::max(parameters_.h_min_, DELTA_MIN) : std::min(h_max, parameters_.h_start_);
-
+    
     SolverStats stats;
 
     double present_time = 0.0;
@@ -243,6 +243,7 @@ namespace micm
     {
       double alpha = 1 / (H * gamma);
       static_cast<const Derived*>(this)->AlphaMinusJacobian(state.jacobian_, alpha);
+
       linear_solver_.Factor(state.jacobian_, state.lower_matrix_, state.upper_matrix_, singular);
       stats.decompositions_ += 1;
 

diff --git a/include/micm/solver/solver_builder.inl b/include/micm/solver/solver_builder.inl
@@ -386,7 +386,7 @@ namespace micm
     auto jacobian = BuildJacobian<SparseMatrixPolicy>(nonzero_elements, this->number_of_grid_cells_, number_of_species);
 
     rates.SetJacobianFlatIds(jacobian);
-    LinearSolverPolicy linear_solver(jacobian, 1e-30);
+    LinearSolverPolicy linear_solver(jacobian, 0);
 
     std::vector<std::string> variable_names{ number_of_species };
     for (auto& species_pair : species_map)

diff --git a/include/micm/util/vector_matrix.hpp b/include/micm/util/vector_matrix.hpp
@@ -228,6 +228,17 @@ namespace micm
       return L;
     }
 
+    void print() const {
+      for (std::size_t i = 0; i < x_dim_; ++i)
+      {
+        for (std::size_t j = 0; j < y_dim_; ++j)
+        {
+          std::cout << (*this)[i][j] << " ";
+        }
+        std::cout << std::endl;
+      }
+    }
+
     /// @brief Set every matrix element to a given value
     /// @param val Value to set each element to
     void Fill(T val)

diff --git a/src/solver/lu_decomposition.cu b/src/solver/lu_decomposition.cu
@@ -56,19 +56,21 @@ namespace micm
           auto inLU = d_niLU[i];
           for (size_t iU = 0; iU < inLU.second; ++iU)
           {
+            size_t U_idx = d_uik_nkj[uik_nkj_offset].first + tid;
             if (d_do_aik[do_aik_offset++])
             {
-              size_t U_idx = d_uik_nkj[uik_nkj_offset].first + tid;
               size_t A_idx = d_aik[aik_offset++] + tid;
               d_U[U_idx] = d_A[A_idx];
             }
+            else {
+              d_U[U_idx] = 0;
+            }
 
             for (size_t ikj = 0; ikj < d_uik_nkj[uik_nkj_offset].second; ++ikj)
             {
-              size_t U_idx_1 = d_uik_nkj[uik_nkj_offset].first + tid;
               size_t L_idx = d_lij_ujk[lij_ujk_offset].first + tid;
               size_t U_idx_2 = d_lij_ujk[lij_ujk_offset].second + tid;
-              d_U[U_idx_1] -= d_L[L_idx] * d_U[U_idx_2];
+              d_U[U_idx] -= d_L[L_idx] * d_U[U_idx_2];
               ++lij_ujk_offset;
             }
             ++uik_nkj_offset;
@@ -85,6 +87,11 @@ namespace micm
               size_t A_idx = d_aki[aki_offset++] + tid;
               d_L[L_idx] = d_A[A_idx];
             }
+            else {
+              size_t L_idx = d_lki_nkj[lki_nkj_offset].first + tid;
+              d_L[L_idx] = 0;
+            }
+
             for (size_t ikj = 0; ikj < d_lki_nkj[lki_nkj_offset].second; ++ikj)
             {
               size_t L_idx_1 = d_lki_nkj[lki_nkj_offset].first + tid;

diff --git a/src/version.hpp.in b/src/version.hpp.in
@@ -1,3 +1,5 @@
+// Copyright (C) 2023-2024 National Center for Atmospheric Research
+// SPDX-License-Identifier: Apache-2.0
 // clang-format off
 #pragma once
 

diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt
@@ -1,4 +1,4 @@
-if(MICM_ENABLE_JSON)
+if(MICM_ENABLE_CONFIG_READER)
   add_subdirectory(configure)
 endif()
 if(MICM_ENABLE_CUDA)

diff --git a/test/unit/configure/process/test_user_defined_config.cpp b/test/unit/configure/process/test_user_defined_config.cpp
@@ -46,9 +46,9 @@ TEST(UserDefinedConfig, ParseConfig)
   // first reaction
   {
     EXPECT_EQ(process_vector[0].reactants_.size(), 3);
-    EXPECT_EQ(process_vector[0].reactants_[0].name_, "bar");
+    EXPECT_EQ(process_vector[0].reactants_[0].name_, "foo");
     EXPECT_EQ(process_vector[0].reactants_[1].name_, "bar");
-    EXPECT_EQ(process_vector[0].reactants_[2].name_, "foo");
+    EXPECT_EQ(process_vector[0].reactants_[2].name_, "bar");
     EXPECT_EQ(process_vector[0].products_.size(), 2);
     EXPECT_EQ(process_vector[0].products_[0].first.name_, "baz");
     EXPECT_EQ(process_vector[0].products_[0].second, 1.4);

diff --git a/test/unit/cuda/solver/test_cuda_linear_solver.cpp b/test/unit/cuda/solver/test_cuda_linear_solver.cpp
@@ -41,7 +41,7 @@ std::vector<double> linearSolverGenerator(std::size_t number_of_blocks)
   auto gen_bool = std::bind(std::uniform_int_distribution<>(0, 1), std::default_random_engine());
   auto get_double = std::bind(std::lognormal_distribution(-2.0, 2.0), std::default_random_engine());
 
-  auto builder = SparseMatrixPolicy::Create(10).SetNumberOfBlocks(number_of_blocks).InitialValue(1.0e-30);
+  auto builder = SparseMatrixPolicy::Create(10).SetNumberOfBlocks(number_of_blocks).InitialValue(0);
   for (std::size_t i = 0; i < 10; ++i)
     for (std::size_t j = 0; j < 10; ++j)
       if (i == j || gen_bool())
@@ -66,9 +66,9 @@ std::vector<double> linearSolverGenerator(std::size_t number_of_blocks)
   CopyToDeviceDense<MatrixPolicy>(b);
   CopyToDeviceDense<MatrixPolicy>(x);
 
-  LinearSolverPolicy solver = LinearSolverPolicy(A, 1.0e-30);
+  LinearSolverPolicy solver = LinearSolverPolicy(A, 0);
   std::pair<SparseMatrixPolicy, SparseMatrixPolicy> lu =
-      micm::LuDecomposition::GetLUMatrices<SparseMatrixPolicy>(A, 1.0e-30);
+      micm::LuDecomposition::GetLUMatrices<SparseMatrixPolicy>(A, 0);
   SparseMatrixPolicy lower_matrix = std::move(lu.first);
   SparseMatrixPolicy upper_matrix = std::move(lu.second);
 
@@ -138,3 +138,17 @@ TEST(CudaLinearSolver, RandomMatrixVectorOrderingForGPU)
 {
   verify_gpu_against_cpu();
 }
+
+TEST(CudaLinearSolver, AgnosticToInitialValue)
+{
+  double initial_values[5] = { -INFINITY, -1.0, 0.0, 1.0, INFINITY };
+  for(auto initial_value : initial_values)
+  {
+    testExtremeInitialValue<Group1CudaDenseMatrix, Group1CudaSparseMatrix, micm::CudaLinearSolver<Group1CudaSparseMatrix>>(1, initial_value);
+    testExtremeInitialValue<Group20CudaDenseMatrix, Group20CudaSparseMatrix, micm::CudaLinearSolver<Group20CudaSparseMatrix>>(20, initial_value);
+    testExtremeInitialValue<Group300CudaDenseMatrix, Group300CudaSparseMatrix, micm::CudaLinearSolver<Group300CudaSparseMatrix>>(
+        300, initial_value);
+    testExtremeInitialValue<Group4000CudaDenseMatrix, Group4000CudaSparseMatrix, micm::CudaLinearSolver<Group4000CudaSparseMatrix>>(
+        4000, initial_value);
+  }
+}
diff --git a/test/unit/cuda/solver/test_cuda_lu_decomposition.cpp b/test/unit/cuda/solver/test_cuda_lu_decomposition.cpp
@@ -20,7 +20,7 @@ void testCudaRandomMatrix(size_t n_grids)
   auto gen_bool = std::bind(std::uniform_int_distribution<>(0, 1), std::default_random_engine());
   auto get_double = std::bind(std::lognormal_distribution(-2.0, 2.0), std::default_random_engine());
 
-  auto builder = CPUSparseMatrixPolicy::Create(10).SetNumberOfBlocks(n_grids).InitialValue(1.0e-30);
+  auto builder = CPUSparseMatrixPolicy::Create(10).SetNumberOfBlocks(n_grids).InitialValue(0);
   for (std::size_t i = 0; i < 10; ++i)
     for (std::size_t j = 0; j < 10; ++j)
       if (i == j || gen_bool())
@@ -29,28 +29,35 @@ void testCudaRandomMatrix(size_t n_grids)
   CPUSparseMatrixPolicy cpu_A(builder);
   GPUSparseMatrixPolicy gpu_A(builder);
 
+  // for nvhpc, the lognormal distribution produces significantly different values 
+  // for very large numbers of grid cells
+  // To keep the accuracy on the check results function small, we only generat 1 blocks worth of
+  // random values and then copy that into every other block
   for (std::size_t i = 0; i < 10; ++i)
     for (std::size_t j = 0; j < 10; ++j)
-      if (!cpu_A.IsZero(i, j))
-        for (std::size_t i_block = 0; i_block < n_grids; ++i_block)
+      if (!cpu_A.IsZero(i, j)) {
+        cpu_A[0][i][j] = get_double();
+        gpu_A[0][i][j] = cpu_A[0][i][j];
+        for (std::size_t i_block = 1; i_block < n_grids; ++i_block)
         {
-          cpu_A[i_block][i][j] = get_double();
-          gpu_A[i_block][i][j] = cpu_A[i_block][i][j];
+          cpu_A[i_block][i][j] = cpu_A[0][i][j];
+          gpu_A[i_block][i][j] = cpu_A[0][i][j];
         }
+      }  
 
   micm::CudaLuDecomposition gpu_lud(gpu_A);
-  auto gpu_LU = micm::CudaLuDecomposition::GetLUMatrices(gpu_A, 1.0e-30);
+  auto gpu_LU = micm::CudaLuDecomposition::GetLUMatrices(gpu_A, 0);
   gpu_A.CopyToDevice();
   gpu_LU.first.CopyToDevice();
   gpu_LU.second.CopyToDevice();
   gpu_lud.Decompose<GPUSparseMatrixPolicy>(gpu_A, gpu_LU.first, gpu_LU.second);
   gpu_LU.first.CopyToHost();
   gpu_LU.second.CopyToHost();
   check_results<typename GPUSparseMatrixPolicy::value_type, GPUSparseMatrixPolicy>(
-      gpu_A, gpu_LU.first, gpu_LU.second, [&](const double a, const double b) -> void { EXPECT_NEAR(a, b, 1.0e-5); });
+      gpu_A, gpu_LU.first, gpu_LU.second, [&](const double a, const double b) -> void { EXPECT_NEAR(a, b, 1.0e-10); });
 
   micm::LuDecomposition cpu_lud = micm::LuDecomposition::Create<CPUSparseMatrixPolicy>(cpu_A);
-  auto cpu_LU = micm::LuDecomposition::GetLUMatrices<CPUSparseMatrixPolicy>(cpu_A, 1.0e-30);
+  auto cpu_LU = micm::LuDecomposition::GetLUMatrices<CPUSparseMatrixPolicy>(cpu_A, 0);
   bool singular{ false };
   cpu_lud.Decompose<CPUSparseMatrixPolicy>(cpu_A, cpu_LU.first, cpu_LU.second, singular);
 
@@ -65,13 +72,13 @@ void testCudaRandomMatrix(size_t n_grids)
   {
     auto gpu_L = gpu_L_vector[i];
     auto cpu_L = cpu_L_vector[i];
-    EXPECT_LT(std::abs((gpu_L - cpu_L) / cpu_L), 1.0e-5);
+    EXPECT_LT(std::abs((gpu_L-cpu_L)/cpu_L), 1.0e-10);
   };
   for (int j = 0; j < U_size; ++j)
   {
     auto gpu_U = gpu_U_vector[j];
     auto cpu_U = cpu_U_vector[j];
-    EXPECT_LT(std::abs((gpu_U - cpu_U) / cpu_U), 1.0e-5);
+    EXPECT_LT(std::abs((gpu_U-cpu_U)/cpu_U), 1.0e-10);
   };
 }
 
@@ -91,4 +98,15 @@ TEST(CudaLuDecomposition, RandomMatrixVectorOrdering)
   testCudaRandomMatrix<Group100CPUSparseVectorMatrix, Group100CudaSparseMatrix>(100);
   testCudaRandomMatrix<Group1000CPUSparseVectorMatrix, Group1000CudaSparseMatrix>(1000);
   testCudaRandomMatrix<Group100000CPUSparseVectorMatrix, Group100000CudaSparseMatrix>(100000);
+}
+
+TEST(CudaLuDecomposition, AgnosticToInitialValue)
+{
+  double initial_values[5] = { -INFINITY, -1.0, 0.0, 1.0, INFINITY };
+  for(auto& value : initial_values) {
+    testExtremeValueInitialization<Group1CudaSparseMatrix, micm::CudaLuDecomposition>(1, value);
+    testExtremeValueInitialization<Group100CudaSparseMatrix, micm::CudaLuDecomposition>(100, value);
+    testExtremeValueInitialization<Group1000CudaSparseMatrix, micm::CudaLuDecomposition>(1000, value);
+    testExtremeValueInitialization<Group100000CudaSparseMatrix, micm::CudaLuDecomposition>(100000, value);
+  }
 }
diff --git a/test/unit/jit/solver/test_jit_linear_solver.cpp b/test/unit/jit/solver/test_jit_linear_solver.cpp
@@ -42,4 +42,12 @@ TEST(JitLinearSolver, DiagonalMatrixVectorOrdering)
   testDiagonalMatrix<Group2VectorMatrix, Group2SparseVectorMatrix, micm::JitLinearSolver<2, Group2SparseVectorMatrix>>(2);
   testDiagonalMatrix<Group3VectorMatrix, Group3SparseVectorMatrix, micm::JitLinearSolver<3, Group3SparseVectorMatrix>>(3);
   testDiagonalMatrix<Group4VectorMatrix, Group4SparseVectorMatrix, micm::JitLinearSolver<4, Group4SparseVectorMatrix>>(4);
+}
+
+TEST(JitLinearSolver, AgnosticToInitialValue)
+{
+  double initial_values[5] = { -INFINITY, INFINITY };
+  for(auto initial_value : initial_values) {
+    testExtremeInitialValue<Group1VectorMatrix, Group1SparseVectorMatrix, micm::JitLinearSolver<1, Group1SparseVectorMatrix>>(1, initial_value);
+  }
 }
diff --git a/test/unit/jit/solver/test_jit_lu_decomposition.cpp b/test/unit/jit/solver/test_jit_lu_decomposition.cpp
@@ -42,3 +42,14 @@ TEST(JitLuDecomposition, DiagonalMatrixVectorOrdering)
   testDiagonalMatrix<Group3SparseVectorMatrix, micm::JitLuDecomposition<3>>(3);
   testDiagonalMatrix<Group4SparseVectorMatrix, micm::JitLuDecomposition<4>>(4);
 }
+
+TEST(JitLuDecomposition, AgnosticToInitialValue)
+{
+  double initial_values[5] = { -INFINITY, -1.0, 0.0, 1.0, INFINITY };
+  for(auto& value : initial_values) {
+    testExtremeValueInitialization<Group1SparseVectorMatrix, micm::JitLuDecomposition<1>>(1, value);
+    testExtremeValueInitialization<Group2SparseVectorMatrix, micm::JitLuDecomposition<2>>(2, value);
+    testExtremeValueInitialization<Group3SparseVectorMatrix, micm::JitLuDecomposition<3>>(3, value);
+    testExtremeValueInitialization<Group4SparseVectorMatrix, micm::JitLuDecomposition<4>>(4, value);
+  }
+}
diff --git a/test/unit/solver/test_linear_solver.cpp b/test/unit/solver/test_linear_solver.cpp
@@ -35,6 +35,13 @@ TEST(LinearSolver, DiagonalMarkowitzReorder)
   testMarkowitzReordering<micm::Matrix<int>, SparseMatrixTest>();
 }
 
+TEST(LinearSolver, StandardOrderingAgnosticToInitialValue)
+{
+  double initial_values[5] = { -INFINITY, -1.0, 0.0, 1.0, INFINITY };
+  for(auto initial_value : initial_values)
+    testExtremeInitialValue<DenseMatrixTest, SparseMatrixTest, micm::LinearSolver<SparseMatrixTest>>(5, initial_value);
+}
+
 using Group1VectorMatrix = micm::VectorMatrix<FloatingPointType, 1>;
 using Group2VectorMatrix = micm::VectorMatrix<FloatingPointType, 2>;
 using Group3VectorMatrix = micm::VectorMatrix<FloatingPointType, 3>;
@@ -61,6 +68,17 @@ TEST(LinearSolver, RandomMatrixVectorOrdering)
   testRandomMatrix<Group4VectorMatrix, Group4SparseVectorMatrix, micm::LinearSolver<Group4SparseVectorMatrix>>(5);
 }
 
+TEST(LinearSolver, VectorOrderingAgnosticToInitialValue)
+{
+  double initial_values[5] = { -INFINITY, -1.0, 0.0, 1.0, INFINITY };
+  for(auto initial_value : initial_values) {
+    testExtremeInitialValue<Group1VectorMatrix, Group1SparseVectorMatrix, micm::LinearSolver<Group1SparseVectorMatrix>>(1, initial_value);
+    testExtremeInitialValue<Group2VectorMatrix, Group2SparseVectorMatrix, micm::LinearSolver<Group2SparseVectorMatrix>>(2, initial_value);
+    testExtremeInitialValue<Group3VectorMatrix, Group3SparseVectorMatrix, micm::LinearSolver<Group3SparseVectorMatrix>>(5, initial_value);
+    testExtremeInitialValue<Group4VectorMatrix, Group4SparseVectorMatrix, micm::LinearSolver<Group4SparseVectorMatrix>>(5, initial_value);
+  }
+}
+
 TEST(LinearSolver, DiagonalMatrixVectorOrdering)
 {
   testDiagonalMatrix<Group1VectorMatrix, Group1SparseVectorMatrix, micm::LinearSolver<Group1SparseVectorMatrix>>(5);