From 4542a699db09f54f8fbd9a47dce2ef85638ec24e Mon Sep 17 00:00:00 2001
From: Lars Tingelstad <lars.tingelstad@ntnu.no>
Date: Mon, 15 Aug 2016 17:05:36 +0200
Subject: [PATCH] Added hand-optimized benchmark

---
 CMakeLists.txt                        |  1 +
 game                                  | 86 +++++++++++++++------------
 src/benchmark.cpp                     | 61 +++++++++++++++++++
 src/motor_estimation.cpp              | 60 +++++++++++++++++++
 src/vsr/versor_dual_line_pybind11.cpp |  2 +
 5 files changed, 171 insertions(+), 39 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 008d6dd..4e97b17 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -55,6 +55,7 @@ add_executable(benchmark
   )
 target_link_libraries(benchmark
   adept
+  pthread
   ${CERES_LIBRARIES}
   ${BENCHMARK_LIBRARY}
   )
diff --git a/game b/game
index 002202b..27162b1 100755
--- a/game
+++ b/game
@@ -12,19 +12,23 @@ import getpass
 import hashlib
 import random
 
+
 # From: https://github.com/jupyter/notebook/blob/master/notebook/auth/security.py
 def no_code(x, encoding=None):
     return x
 
+
 def encode(u, encoding=None):
     encoding = encoding or DEFAULT_ENCODING
     return u.encode(encoding, "replace")
 
+
 def cast_bytes(s, encoding=None):
     if not isinstance(s, bytes):
         return encode(s, encoding)
     return s
 
+
 if sys.version_info[0] >= 3:
     str_to_bytes = encode
 else:
@@ -33,6 +37,8 @@ else:
 # Length of the salt in nr of hex chars, which implies salt_len * 4
 # bits of randomness.
 salt_len = 12
+
+
 def passwd(passphrase=None, algorithm='sha1'):
     """Generate hashed password and salt for use in notebook configuration.
     In the notebook configuration, set `c.NotebookApp.password` to
@@ -72,21 +78,34 @@ def passwd(passphrase=None, algorithm='sha1'):
 
     return ':'.join((algorithm, salt, h.hexdigest()))
 
+
 if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description="Geometric Algebra Multivector Estimation", prog='game')
+    parser = argparse.ArgumentParser(
+        description="Geometric Algebra Multivector Estimation",
+        prog='game')
     subparser = parser.add_subparsers(help='commands', dest='verb')
     run_parser = subparser.add_parser('run')
-    run_parser.add_argument('--use-password', default=True, help='Password protect notebook',
+    run_parser.add_argument('--use-password',
+                            default=True,
+                            help='Password protect notebook',
                             dest='usepassword')
-    run_parser.add_argument('--hostname', default="docker-game", help='Container host name')
+    run_parser.add_argument('--hostname',
+                            default="docker-game",
+                            help='Container host name')
+    run_parser.add_argument(
+        '--tag',
+        default='tingelst/game',
+        help='Name and optionally a tag in the "name:tag" format')
     build_parser = subparser.add_parser('build')
-    build_parser.add_argument('--tag',
-                              default='tingelst/game',
-                              help='Name and optionally a tag in the "name:tag" format')
+    build_parser.add_argument(
+        '--tag',
+        default='tingelst/game',
+        help='Name and optionally a tag in the "name:tag" format')
     build_container_parser = subparser.add_parser('build-container')
-    build_container_parser.add_argument('--tag',
-                              default='tingelst/game',
-                              help='Name and optionally a tag in the "name:tag" format')
+    build_container_parser.add_argument(
+        '--tag',
+        default='tingelst/game',
+        help='Name and optionally a tag in the "name:tag" format')
 
     if len(sys.argv) == 1:
         parser.print_help()
@@ -100,33 +119,27 @@ if __name__ == '__main__':
         if not os.path.isdir('build'):
             try:
                 os.makedirs('build')
-            except OSError as exc: # Python >2.5
+            except OSError as exc:  # Python >2.5
                 if exc.errno == errno.EEXIST and os.path.isdir('build'):
                     pass
-                else: raise
-            with subprocess.Popen(['docker', 'run',
-                                   '--volume',
-                                   '{pwd}:/home/game/game'.format(pwd=os.getcwd()),
-                                   '--workdir',
-                                   '/home/game/game/build',
-                                   args.tag,
-                                   'zsh',
-                                   '-c',
-                                   'source /home/game/python/bin/activate && cmake -GNinja ..']) as p:
+                else:
+                    raise
+            with subprocess.Popen(
+                ['docker', 'run', '--volume',
+                 '{pwd}:/home/game/game'.format(pwd=os.getcwd()), '--workdir',
+                 '/home/game/game/build', args.tag, 'zsh', '-c',
+                 'source /home/game/python/bin/activate && cmake -GNinja ..'
+                 ]) as p:
                 try:
                     p.wait()
                 except KeyboardInterrupt:
                     p.terminate()
 
-        with subprocess.Popen(['docker', 'run',
-                               '--volume',
-                               '{pwd}:/home/game/game'.format(pwd=os.getcwd()),
-                               '--workdir',
-                               '/home/game/game/build',
-                               args.tag,
-                               'zsh',
-                               '-c',
-                               'source /home/game/python/bin/activate && cmake --build .']) as p:
+        with subprocess.Popen(
+            ['docker', 'run', '--volume',
+             '{pwd}:/home/game/game'.format(pwd=os.getcwd()), '--workdir',
+             '/home/game/game/build', args.tag, 'zsh', '-c',
+             'source /home/game/python/bin/activate && cmake --build .']) as p:
             try:
                 p.wait()
             except KeyboardInterrupt:
@@ -135,17 +148,12 @@ if __name__ == '__main__':
     elif args.verb == 'run':
         if args.usepassword:
             h = passwd()
-        with subprocess.Popen(['docker', 'run',
-                               '-h', args.hostname,
-                               '-it',
-                               '--rm',
-                               '-v', '{pwd}:/home/game/game'.format(pwd=os.getcwd()),
-                               '-e', "USE_HTTPS=yes",
-                               '-e', "PASSWORD={}".format(h),
-                               '-p', '8888:8888',
-                               'tingelst/game']) as p:
+        with subprocess.Popen(
+            ['docker', 'run', '-h', args.hostname, '-it', '--rm', '-v',
+             '{pwd}:/home/game/game'.format(pwd=os.getcwd()), '-e',
+             "USE_HTTPS=yes", '-e', "PASSWORD={}".format(h), '-p', '8888:8888',
+             args.tag]) as p:
             try:
                 p.wait()
             except KeyboardInterrupt:
                 p.terminate()
-
diff --git a/src/benchmark.cpp b/src/benchmark.cpp
index 111361e..718bf2f 100644
--- a/src/benchmark.cpp
+++ b/src/benchmark.cpp
@@ -465,6 +465,21 @@ static void BM_AdeptRotorMatrixJacobianReverse(benchmark::State &state) {
     g_stack.jacobian_reverse(jac, true);
   }
 }
+
+struct DiffRotorHandFunctor {
+  template <typename T> bool operator()(const T *th, const T *a, T *b) const {
+    T st = sin(th[0] / 2.0);
+    T ct = cos(th[0] / 2.0);
+    T stst = st * st;
+    T ctct = ct * ct;
+    T ctst = ct * st;
+    b[0] = (-(a[0] * stst)) - 2.0 * a[1] * ctst + a[0] * ctct; // e1
+    b[1] = (-(a[1] * stst)) + 2.0 * a[0] * ctst + a[1] * ctct; // e2
+    b[2] = a[2] * stst + a[2] * ctct;                          // e3
+    return true;
+  }
+};
+
 struct DiffRotorGaalopFunctor {
   template <typename T> bool operator()(const T *th, const T *a, T *b) const {
     b[0] = (-(a[0] * sin(th[0] / 2.0) * sin(th[0] / 2.0))) -
@@ -509,6 +524,49 @@ static void BM_AdeptRotorGaalopJacobianForward(benchmark::State &state) {
   }
 }
 
+static void BM_AdeptRotorHandJacobianReverse(benchmark::State &state) {
+  double jac[3];
+  adept::adouble theta{0.5};
+  adept::adouble point[3];
+  adept::set_values(point, 3, g_point);
+  while (state.KeepRunning()) {
+    g_stack.new_recording();
+    adept::adouble res[3] = {0.0, 0.0, 0.0};
+    DiffRotorHandFunctor()(&theta, &point[0], &res[0]);
+    g_stack.independent(&theta, 1);
+    g_stack.dependent(res, 3);
+    g_stack.jacobian_reverse(jac, true);
+  }
+}
+
+static void BM_AdeptRotorHandJacobianForward(benchmark::State &state) {
+  double jac[3];
+  adept::adouble theta{0.5};
+  adept::adouble point[3];
+  adept::set_values(point, 3, g_point);
+  while (state.KeepRunning()) {
+    g_stack.new_recording();
+    adept::adouble res[3] = {0.0, 0.0, 0.0};
+    DiffRotorHandFunctor()(&theta, &point[0], &res[0]);
+    g_stack.set_max_jacobian_threads(3);
+    g_stack.independent(&theta, 1);
+    g_stack.dependent(res, 3);
+    // g_stack.jacobian_forward_openmp(jac, true);
+    g_stack.jacobian_forward(jac, true);
+  }
+}
+
+static void BM_CeresRotorHandJacobian(benchmark::State &state) {
+  double theta = 0.5;
+  double jac[3];
+  const double *parameters[2] = {&theta, &g_point[0]};
+  double *jacobians[2] = {jac, nullptr};
+  while (state.KeepRunning()) {
+    ceres::AutoDiffCostFunction<DiffRotorHandFunctor, 3, 1, 3>(
+        new DiffRotorHandFunctor())
+        .Evaluate(parameters, &g_point_spin_motor[0], jacobians);
+  }
+}
 static void BM_CeresRotorGaalopJacobian(benchmark::State &state) {
   double theta = 0.5;
   double jac[3];
@@ -541,6 +599,7 @@ BENCHMARK(BM_CeresRotorMatrixJacobian);
 BENCHMARK(BM_CeresRotorVersorJacobian);
 BENCHMARK(BM_CeresRotorHepGAJacobian);
 BENCHMARK(BM_CeresRotorGaalopJacobian);
+BENCHMARK(BM_CeresRotorHandJacobian);
 BENCHMARK(BM_AdeptRotorMatrixJacobianForward);
 BENCHMARK(BM_AdeptRotorMatrixJacobianReverse);
 BENCHMARK(BM_AdeptRotorVersorJacobianForward);
@@ -549,5 +608,7 @@ BENCHMARK(BM_AdeptRotorHepGAJacobianForward);
 BENCHMARK(BM_AdeptRotorHepGAJacobianReverse);
 BENCHMARK(BM_AdeptRotorGaalopJacobianReverse);
 BENCHMARK(BM_AdeptRotorGaalopJacobianForward);
+BENCHMARK(BM_AdeptRotorHandJacobianReverse);
+BENCHMARK(BM_AdeptRotorHandJacobianForward);
 
 BENCHMARK_MAIN()
diff --git a/src/motor_estimation.cpp b/src/motor_estimation.cpp
index e13966b..b41584e 100644
--- a/src/motor_estimation.cpp
+++ b/src/motor_estimation.cpp
@@ -253,6 +253,54 @@ class MotorEstimationSolver {
     const Pnt b_;
   };
 
+  struct GaalopPointCorrespondencesCostFunctor {
+    GaalopPointCorrespondencesCostFunctor(const Pnt &a, const Pnt &b)
+        : a_(a), b_(b) {}
+
+    template <typename T>
+    void Calculate(const T &a1, const T &a2, const T &a3, const T &a4,
+                   const T &a5, const T &b1, const T &b2, const T &b3,
+                   const T &b4, const T &b5, const T &m1, const T &m2,
+                   const T &m3, const T &m4, const T &m5, const T &m6,
+                   const T &m7, const T &m8, T *residual) const {
+
+      residual[0] = ((((-(2.0 * a4 * m4 * m8)) - 2.0 * a4 * m3 * m7 -
+                       2.0 * a4 * m2 * m6 - 2.0 * a4 * m1 * m5 + a1 * m4 * m4 +
+                       (2.0 * a3 * m2 - 2.0 * a2 * m3) * m4) -
+                      a1 * m3 * m3 + 2.0 * a3 * m1 * m3) -
+                     a1 * m2 * m2 + 2.0 * a2 * m1 * m2 + a1 * m1 * m1) -
+                    b1; // e1
+      residual[1] = (((2.0 * a4 * m3 * m8 - 2.0 * a4 * m4 * m7 -
+                       2.0 * a4 * m1 * m6 + 2.0 * a4 * m2 * m5) -
+                      a2 * m4 * m4 + (2.0 * a3 * m1 - 2.0 * a1 * m3) * m4 +
+                      a2 * m3 * m3) -
+                     2.0 * a3 * m2 * m3 - a2 * m2 * m2 - 2.0 * a1 * m1 * m2 +
+                     a2 * m1 * m1) -
+                    b2; // e2
+      residual[2] = ((((-(2.0 * a4 * m2 * m8)) - 2.0 * a4 * m1 * m7 +
+                       2.0 * a4 * m4 * m6 + 2.0 * a4 * m3 * m5) -
+                      a3 * m4 * m4 + (2.0 * a1 * m2 - 2.0 * a2 * m1) * m4) -
+                     a3 * m3 * m3 + ((-(2.0 * a1 * m1)) - 2.0 * a2 * m2) * m3 +
+                     a3 * m2 * m2 + a3 * m1 * m1) -
+                    b3; // e3
+    }
+
+    template <typename T>
+    auto operator()(const T *const motor, T *residual) const -> bool {
+
+      Calculate(T(a_[0]), T(a_[1]), T(a_[2]), T(a_[3]), T(a_[4]), T(b_[0]),
+                T(b_[1]), T(b_[2]), T(b_[3]), T(b_[4]), T(motor[0]),
+                T(motor[1]), T(motor[2]), T(motor[3]), T(motor[4]), T(motor[5]),
+                T(motor[6]), T(motor[7]), residual);
+
+      return true;
+    }
+
+  private:
+    const Pnt a_;
+    const Pnt b_;
+  };
+
   struct PointCorrespondencesCostFunctor {
     PointCorrespondencesCostFunctor(const Pnt &a, const Pnt &b)
         : a_(a), b_(b) {}
@@ -330,6 +378,16 @@ class MotorEstimationSolver {
     return true;
   }
 
+  auto AddGaalopPointCorrespondencesResidualBlock(const Pnt &a, const Pnt &b)
+      -> bool {
+    ceres::CostFunction *cost_function =
+        new ceres::AutoDiffCostFunction<GaalopPointCorrespondencesCostFunctor,
+                                        3, 8>(
+            new GaalopPointCorrespondencesCostFunctor(a, b));
+    problem_.AddResidualBlock(cost_function, NULL, &motor_[0]);
+    return true;
+  }
+
   auto AddAdeptPointCorrespondencesResidualBlock(const Pnt &a, const Pnt &b)
       -> bool {
     ceres::CostFunction *cost_function =
@@ -426,6 +484,8 @@ PYBIND11_PLUGIN(motor_estimation) {
            &MotorEstimationSolver::AddLineAngleDistanceNormResidualBlock)
       .def("add_point_correspondences_residual_block",
            &MotorEstimationSolver::AddPointCorrespondencesResidualBlock)
+      .def("add_gaalop_point_correspondences_residual_block",
+           &MotorEstimationSolver::AddGaalopPointCorrespondencesResidualBlock)
       .def("add_adept_point_correspondences_residual_block",
            &MotorEstimationSolver::AddAdeptPointCorrespondencesResidualBlock)
       .def("add_point_distance_residual_block",
diff --git a/src/vsr/versor_dual_line_pybind11.cpp b/src/vsr/versor_dual_line_pybind11.cpp
index ee56add..c961055 100644
--- a/src/vsr/versor_dual_line_pybind11.cpp
+++ b/src/vsr/versor_dual_line_pybind11.cpp
@@ -68,6 +68,8 @@ void AddDualLine(py::module &m) {
       .def("__sub__", [](const Dll &lhs, const Dll &rhs) { return lhs - rhs; })
       .def("__mul__", [](const Dll &lhs, const Mot &rhs) { return lhs * rhs; })
       .def("__mul__", [](const Dll &lhs, const Dll &rhs) { return lhs * rhs; })
+      .def("__le__",
+           [](const Dll &lhs, const Dll &rhs) { return (lhs <= rhs)[0]; })
       .def("__mul__", [](const Dll &lhs, double rhs) { return lhs * rhs; })
       .def("__div__", [](const Dll &lhs, double rhs) { return lhs / rhs; })
       .def("__repr__",