kokkos · lucbv · Nov 4, 2024 · Jun 4, 2024 · Aug 14, 2024 · Sep 3, 2024
diff --git a/ode/impl/KokkosODE_BDF_impl.hpp b/ode/impl/KokkosODE_BDF_impl.hpp
@@ -142,7 +142,7 @@ struct BDF_system_wrapper2 {
     if (compute_jac) {
       mySys.evaluate_jacobian(t, dt, y, jac);
 
-      // J = I - dt*(dy/dy)
+      // J = I - dt*(df/dy)
       for (int rowIdx = 0; rowIdx < neqs; ++rowIdx) {
         for (int colIdx = 0; colIdx < neqs; ++colIdx) {
           jac(rowIdx, colIdx) = -dt * jac(rowIdx, colIdx);

diff --git a/ode/impl/KokkosODE_RungeKuttaTables_impl.hpp b/ode/impl/KokkosODE_RungeKuttaTables_impl.hpp
@@ -257,6 +257,59 @@ struct ButcherTableau<4, 6>  // Referred to as DOPRI5 or RKDP
                                     11.0 / 84.0 - 187.0 / 2100.0, -1.0 / 40.0}};
 };
 
+// Coefficients obtained from:
+// J. H. Verner
+// "Explicit Runge-Kutta methods with estimates of the local truncation error",
+// Journal of Numerical Analysis, Volume 15, Issue 4, 1978,
+// https://doi.org/10.1137/0715051.
+template <>
+struct ButcherTableau<5, 7>  // Referred to as Verner 5-6 or VER56
+{
+  static constexpr int order   = 6;
+  static constexpr int nstages = 8;
+  Kokkos::Array<double, (nstages * nstages + nstages) / 2> a{{0.0,
+                                                              1.0 / 6.0,
+                                                              0.0,
+                                                              4.0 / 75.0,
+                                                              16.0 / 75.0,
+                                                              0.0,
+                                                              5.0 / 6.0,
+                                                              -8.0 / 3.0,
+                                                              5.0 / 2.0,
+                                                              0.0,
+                                                              -165.0 / 64.0,
+                                                              55.0 / 6.0,
+                                                              -425.0 / 64.0,
+                                                              85.0 / 96.0,
+                                                              0.0,
+                                                              12.0 / 5.0,
+                                                              -8.0,
+                                                              4015.0 / 612.0,
+                                                              -11.0 / 36.0,
+                                                              88.0 / 255.0,
+                                                              0.0,
+                                                              -8263.0 / 15000.0,
+                                                              124.0 / 75.0,
+                                                              -643.0 / 680.0,
+                                                              -81.0 / 250.0,
+                                                              2484.0 / 10625.0,
+                                                              0.0,
+                                                              0.0,
+                                                              3501.0 / 1720.0,
+                                                              -300.0 / 43.0,
+                                                              297275.0 / 52632.0,
+                                                              -319.0 / 2322.0,
+                                                              24068.0 / 84065.0,
+                                                              3850.0 / 26703.0,
+                                                              0.0}};
+  Kokkos::Array<double, nstages> b{
+      {3.0 / 4.0, 0.0, 875.0 / 2244.0, 23.0 / 72.0, 264.0 / 1955.0, 0.0, 125.0 / 11592.0, 43.0 / 616.0}};
+  Kokkos::Array<double, nstages> c{{0.0, 1.0 / 6.0, 4.0 / 15.0, 2.0 / 3.0, 5.0 / 6.0, 1.0, 1.0 / 15.0, 1.0}};
+  Kokkos::Array<double, nstages> e{{3.0 / 4.0 - 13.0 / 160.0, 0.0, 875.0 / 2244.0 - 2375.0 / 5984.0,
+                                    23.0 / 72.0 - 5.0 / 16.0, 264.0 / 1955.0 - 12.0 / 85.0, -3.0 / 44.0,
+                                    125.0 / 11592.0, 43.0 / 616.0}};
+};
+
 }  // namespace Impl
 }  // namespace KokkosODE
 

diff --git a/ode/impl/KokkosODE_RungeKutta_impl.hpp b/ode/impl/KokkosODE_RungeKutta_impl.hpp
@@ -23,19 +23,84 @@
 #include "KokkosODE_RungeKuttaTables_impl.hpp"
 #include "KokkosODE_Types.hpp"
 
+#include "iostream"
+
 namespace KokkosODE {
 namespace Impl {
 
+// This algorithm is mostly derived from
+// E. Hairer, S. P. Norsett G. Wanner,
+// "Solving Ordinary Differential Equations I:
+// Nonstiff Problems", Sec. II.4.
+// Note that all floating point values below
+// have been heuristically selected for
+// convergence performance.
+template <class ode_type, class mat_type, class vec_type, class res_type, class scalar_type>
+KOKKOS_FUNCTION void first_step_size(const ode_type ode, const int order, const scalar_type t0, const scalar_type atol,
+                                     const scalar_type rtol, const vec_type& y0, const res_type& f0, const vec_type y1,
+                                     const mat_type temp, scalar_type& dt_ini) {
+  using KAT = Kokkos::ArithTraits<scalar_type>;
+
+  // Extract subviews to store intermediate data
+  auto f1 = Kokkos::subview(temp, 1, Kokkos::ALL());
+
+  // Compute norms for y0 and f0
+  double n0 = KAT::zero(), n1 = KAT::zero(), dt0, scale;
+  for (int eqIdx = 0; eqIdx < ode.neqs; ++eqIdx) {
+    scale = atol + rtol * Kokkos::abs(y0(eqIdx));
+    n0 += Kokkos::pow(y0(eqIdx) / scale, 2);
+    n1 += Kokkos::pow(f0(eqIdx) / scale, 2);
+  }
+  n0 = Kokkos::sqrt(n0) / Kokkos::sqrt(ode.neqs);
+  n1 = Kokkos::sqrt(n1) / Kokkos::sqrt(ode.neqs);
+
+  // Select dt0
+  if ((n0 < 1e-5) || (n1 < 1e-5)) {
+    dt0 = 1e-6;
+  } else {
+    dt0 = 0.01 * n0 / n1;
+  }
+
+  // Estimate y at t0 + dt0
+  for (int eqIdx = 0; eqIdx < ode.neqs; ++eqIdx) {
+    y1(eqIdx) = y0(eqIdx) + dt0 * f0(eqIdx);
+  }
+
+  // Compute f at t0+dt0 and y1,
+  // then compute the norm of f(t0+dt0, y1) - f(t0, y0)
+  scalar_type n2 = KAT::zero();
+  ode.evaluate_function(t0 + dt0, dt0, y1, f1);
+  for (int eqIdx = 0; eqIdx < ode.neqs; ++eqIdx) {
+    n2 += Kokkos::pow((f1(eqIdx) - f0(eqIdx)) / (atol + rtol * Kokkos::abs(y0(eqIdx))), 2);
+  }
+  n2 = Kokkos::sqrt(n2) / (dt0 * Kokkos::sqrt(ode.neqs));
+
+  // Finally select initial time step dt_ini
+  if ((n1 <= 1e-15) && (n2 <= 1e-15)) {
+    dt_ini = Kokkos::max(1e-6, dt0 * 1e-3);
+  } else {
+    dt_ini = Kokkos::pow(0.01 / Kokkos::max(n1, n2), KAT::one() / order);
+  }
+
+  dt_ini = Kokkos::min(100 * dt0, dt_ini);
+
+  // Zero out temp variables just to be safe...
+  for (int eqIdx = 0; eqIdx < ode.neqs; ++eqIdx) {
+    f0(eqIdx) = 0.0;
+    y1(eqIdx) = 0.0;
+    f1(eqIdx) = 0.0;
+  }
+}  // first_step_size
+
 // y_new = y_old + dt*sum(b_i*k_i)    i in [1, nstages]
 // k_i = f(t+c_i*dt, y_old+sum(a_{ij}*k_i))  j in [1, i-1]
 // we need to compute the k_i and store them as we go
 // to use them for k_{i+1} computation.
 template <class ode_type, class table_type, class vec_type, class mv_type, class scalar_type>
-KOKKOS_FUNCTION void RKStep(ode_type& ode, const table_type& table, const bool adaptivity, scalar_type t,
-                            scalar_type dt, const vec_type& y_old, const vec_type& y_new, const vec_type& temp,
-                            const mv_type& k_vecs) {
-  const int neqs    = ode.neqs;
-  const int nstages = table.nstages;
+KOKKOS_FUNCTION void RKStep(ode_type& ode, const table_type& table, scalar_type t, scalar_type dt,
+                            const vec_type& y_old, const vec_type& y_new, const vec_type& temp, const mv_type& k_vecs) {
+  const int neqs        = ode.neqs;
+  constexpr int nstages = table_type::nstages;
 
   // first set y_new = y_old
   for (int eqIdx = 0; eqIdx < ode.neqs; ++eqIdx) {
@@ -72,34 +137,42 @@ KOKKOS_FUNCTION void RKStep(ode_type& ode, const table_type& table, const bool a
       y_new(eqIdx) += dt * table.b[stageIdx] * k(eqIdx);
     }
   }
-
-  // Compute estimation of the error using k_vecs and table.e
-  if (adaptivity == true) {
-    for (int eqIdx = 0; eqIdx < neqs; ++eqIdx) {
-      temp(eqIdx) = 0;
-      for (int stageIdx = 0; stageIdx < nstages; ++stageIdx) {
-        temp(eqIdx) += dt * table.e[stageIdx] * k_vecs(stageIdx, eqIdx);
-      }
-    }
-  }
 }  // RKStep
 
+// Note that the control values for
+// time step increase/decrease are
+// heuristically chosen based on
+// L. F. Shampine and M. W. Reichelt
+// "The Matlab ODE suite" SIAM J. Sci.
+// Comput. Vol. 18, No. 1, pp. 1-22
+// Jan. 1997
 template <class ode_type, class table_type, class vec_type, class mv_type, class scalar_type>
 KOKKOS_FUNCTION Experimental::ode_solver_status RKSolve(const ode_type& ode, const table_type& table,
                                                         const KokkosODE::Experimental::ODE_params& params,
                                                         const scalar_type t_start, const scalar_type t_end,
                                                         const vec_type& y0, const vec_type& y, const vec_type& temp,
-                                                        const mv_type& k_vecs) {
+                                                        const mv_type& k_vecs, int* const step_count) {
   constexpr scalar_type error_threshold = 1;
-  bool adapt                            = params.adaptivity;
+  scalar_type error_n;
+  bool adapt = params.adaptivity;
   bool dt_was_reduced;
-  if (std::is_same_v<table_type, ButcherTableau<0, 0>>) {
+  if constexpr (std::is_same_v<table_type, ButcherTableau<0, 0>>) {
     adapt = false;
   }
 
   // Set current time and initial time step
-  scalar_type t_now = t_start;
-  scalar_type dt    = (t_end - t_start) / params.max_steps;
+  scalar_type t_now = t_start, dt = 0.0;
+  if (adapt == true) {
+    ode.evaluate_function(t_start, 0, y0, temp);
+    first_step_size(ode, table_type::order, t_start, params.abs_tol, params.rel_tol, y0, temp, y, k_vecs, dt);
+    if (dt < params.min_step_size) {
+      dt = params.min_step_size;
+    }
+  } else {
+    dt = (t_end - t_start) / params.num_steps;
+  }
+
+  *step_count = 0;
 
   // Loop over time steps to integrate ODE
   for (int stepIdx = 0; (stepIdx < params.max_steps) && (t_now <= t_end); ++stepIdx) {
@@ -119,28 +192,33 @@ KOKKOS_FUNCTION Experimental::ode_solver_status RKSolve(const ode_type& ode, con
     // Take tentative steps until the requested error
     // is met. This of course only works for adaptive
     // solvers, for fix time steps we simply do not
-    // compute and check what error of the current step
+    // compute and check the error of the current step
     while (error_threshold < error) {
       // Take a step of Runge-Kutta integrator
-      RKStep(ode, table, adapt, t_now, dt, y0, y, temp, k_vecs);
+      RKStep(ode, table, t_now, dt, y0, y, temp, k_vecs);
 
       // Compute the largest error and decide on
       // the size of the next time step to take.
       error = 0;
-      if (adapt) {
+
+      // Compute estimation of the error using k_vecs and table.e
+      if (adapt == true) {
         // Compute the error
         for (int eqIdx = 0; eqIdx < ode.neqs; ++eqIdx) {
-          error = Kokkos::max(error, Kokkos::abs(temp(eqIdx)));
-          tol   = Kokkos::max(
-              tol, params.abs_tol + params.rel_tol * Kokkos::max(Kokkos::abs(y(eqIdx)), Kokkos::abs(y0(eqIdx))));
+          tol     = params.abs_tol + params.rel_tol * Kokkos::max(Kokkos::abs(y(eqIdx)), Kokkos::abs(y0(eqIdx)));
+          error_n = 0;
+          for (int stageIdx = 0; stageIdx < table.nstages; ++stageIdx) {
+            error_n += dt * table.e[stageIdx] * k_vecs(stageIdx, eqIdx);
+          }
+          error += (error_n * error_n) / (tol * tol);
         }
-        error = error / tol;
+        error = Kokkos::sqrt(error / ode.neqs);
 
         // Reduce the time step if error
         // is too large and current step
         // is rejected.
         if (error > 1) {
-          dt             = dt * Kokkos::max(0.2, 0.8 / Kokkos::pow(error, 1 / table.order));
+          dt             = dt * Kokkos::max(0.2, 0.8 * Kokkos::pow(error, -1.0 / table.order));
           dt_was_reduced = true;
         }
 
@@ -150,14 +228,15 @@ KOKKOS_FUNCTION Experimental::ode_solver_status RKSolve(const ode_type& ode, con
 
     // Update time and initial condition for next time step
     t_now += dt;
+    *step_count += 1;
     for (int eqIdx = 0; eqIdx < ode.neqs; ++eqIdx) {
       y0(eqIdx) = y(eqIdx);
     }
 
     if (t_now < t_end) {
       if (adapt && !dt_was_reduced && error < 0.5) {
         // Compute new time increment
-        dt = dt * Kokkos::min(10.0, Kokkos::max(2.0, 0.9 * Kokkos::pow(error, 1 / table.order)));
+        dt = dt * Kokkos::min(10.0, Kokkos::max(2.0, 0.9 * Kokkos::pow(error, -1.0 / table.order)));
       }
     } else {
       return Experimental::ode_solver_status::SUCCESS;

diff --git a/ode/src/KokkosODE_BDF.hpp b/ode/src/KokkosODE_BDF.hpp
@@ -93,6 +93,7 @@ struct BDF {
 
     const double dt = (t_end - t_start) / num_steps;
     double t        = t_start;
+    int count       = 0;
 
     // Load y0 into y_vecs(:, 0)
     for (int eqIdx = 0; eqIdx < ode.neqs; ++eqIdx) {
@@ -107,7 +108,7 @@ struct BDF {
     }
     KokkosODE::Experimental::ODE_params params(table.order - 1);
     for (int stepIdx = 0; stepIdx < init_steps; ++stepIdx) {
-      KokkosODE::Experimental::RungeKutta<RK_type::RKF45>::Solve(ode, params, t, t + dt, y0, y, update, kstack);
+      KokkosODE::Experimental::RungeKutta<RK_type::RKF45>::Solve(ode, params, t, t + dt, y0, y, update, kstack, &count);
 
       for (int eqIdx = 0; eqIdx < ode.neqs; ++eqIdx) {
         y_vecs(eqIdx, stepIdx + 1) = y(eqIdx);

diff --git a/ode/src/KokkosODE_RungeKutta.hpp b/ode/src/KokkosODE_RungeKutta.hpp
@@ -38,7 +38,8 @@ enum RK_type : int {
   RK4   = 4,  ///< Runge-Kutta classic order 4 method
   RKF45 = 5,  ///< Fehlberg order 5 method
   RKCK  = 6,  ///< Cash-Karp method
-  RKDP  = 7   ///< Dormand-Prince method
+  RKDP  = 7,  ///< Dormand-Prince method
+  VER56 = 8   ///< Verner order 6 method
 };
 
 template <RK_type T>
@@ -86,6 +87,11 @@ struct RK_Tableau_helper<RK_type::RKDP> {
   using table_type = KokkosODE::Impl::ButcherTableau<4, 6>;
 };
 
+template <>
+struct RK_Tableau_helper<RK_type::VER56> {
+  using table_type = KokkosODE::Impl::ButcherTableau<5, 7>;
+};
+
 /// \brief Unspecialized version of the RungeKutta solvers
 ///
 /// \tparam RK_type an RK_type enum value used to specify
@@ -128,9 +134,10 @@ struct RungeKutta {
   template <class ode_type, class vec_type, class mv_type, class scalar_type>
   KOKKOS_FUNCTION static ode_solver_status Solve(const ode_type& ode, const KokkosODE::Experimental::ODE_params& params,
                                                  const scalar_type t_start, const scalar_type t_end, const vec_type& y0,
-                                                 const vec_type& y, const vec_type& temp, const mv_type& k_vecs) {
+                                                 const vec_type& y, const vec_type& temp, const mv_type& k_vecs,
+                                                 int* const count) {
     table_type table;
-    return KokkosODE::Impl::RKSolve(ode, table, params, t_start, t_end, y0, y, temp, k_vecs);
+    return KokkosODE::Impl::RKSolve(ode, table, params, t_start, t_end, y0, y, temp, k_vecs, count);
   }
 };
 

diff --git a/ode/src/KokkosODE_Types.hpp b/ode/src/KokkosODE_Types.hpp
@@ -27,12 +27,21 @@ struct ODE_params {
   int num_steps, max_steps;
   double abs_tol, rel_tol, min_step_size;
 
+  KOKKOS_FUNCTION
+  ODE_params()
+      : adaptivity(true), num_steps(100), max_steps(10000), abs_tol(1e-12), rel_tol(1e-6), min_step_size(1e-9) {}
+
   // Constructor that only specify the desired number of steps.
   // In this case no adaptivity is provided, the time step will
   // be constant such that dt = (tend - tstart) / num_steps;
   KOKKOS_FUNCTION
   ODE_params(const int num_steps_)
-      : adaptivity(false), num_steps(num_steps_), max_steps(num_steps_), abs_tol(0), rel_tol(0), min_step_size(0) {}
+      : adaptivity(false),
+        num_steps(num_steps_),
+        max_steps(num_steps_ + 1),
+        abs_tol(1e-12),
+        rel_tol(1e-6),
+        min_step_size(0) {}
 
   /// ODE_parms construtor for adaptive time stepping.
   KOKKOS_FUNCTION

diff --git a/ode/unit_test/Test_ODE.hpp b/ode/unit_test/Test_ODE.hpp
@@ -19,6 +19,7 @@
 // Explicit integrators
 #include "Test_ODE_RK.hpp"
 #include "Test_ODE_RK_chem.hpp"
+#include "Test_ODE_RK_counts.hpp"
 
 // Implicit integrators
 #include "Test_ODE_Newton.hpp"