Add align functions (#32)

moble · web-flow · commit c208506aa7b9 · 2021-10-07T19:59:27.000Z
* Add `align` functions

* Bump #minor

* Upload coverage in github actions
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -32,6 +32,6 @@ jobs:
       - uses: julia-actions/julia-runtest@latest
       - uses: julia-actions/julia-processcoverage@latest
       - uses: codecov/codecov-action@v1
-        if: "matrix.version == 'nightly' && matrix.os == 'ubuntu-latest'"
+        if: "matrix.version == '1' && matrix.os == 'ubuntu-latest'"
         with:
           file: lcov.info
diff --git a/CITATION.cff b/CITATION.cff
@@ -6,6 +6,6 @@ authors:
     orcid: https://orcid.org/0000-0002-5075-5116
 title: "Quaternionic.jl"
 license: MIT
-version: v0.2.1
+version: v0.3.0
 doi: 10.5281/zenodo.5514302
 date-released: 2021-09-17
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "Quaternionic"
 uuid = "0756cd96-85bf-4b6f-a009-b5012ea7a443"
 authors = ["Michael Boyle <michael.oliver.boyle@gmail.com>"]
-version = "0.2.3"
+version = "0.3.0"
 
 [deps]
 LaTeXStrings = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f"
diff --git a/docs/src/manual.md b/docs/src/manual.md
@@ -130,8 +130,8 @@ satisfy.  For any quaternions ``q_1`` and ``q_2`` and any *unit* quaternion
   * identity: ``d(q_1, q_1) = 0``
   * positive-definiteness: ``d(q_1, q_2) > 0`` whenever ``q_1 ≠ q_2``
 
-(Of course, it should be noted that these criteria all hold in the *exact*
-case; when using floating-point numbers, will likely be violated near edge
+(Of course, it should be noted that these criteria all hold in the *exact* case;
+when using floating-point numbers, they will likely be violated near edge
 cases.)
 
 It is not hard to see that these criteria can be satisfied by any of
@@ -145,15 +145,33 @@ If ``q_1`` and ``q_2`` are interpreted as rotations, we frequently don't care
 about their signs, and just want the *smallest* distance between them, for any
 choice of sign.  Furthermore, in the multiplicative case, the `log` functions
 will involve calculation of the `log` of the magnitudes of the quaternions,
-which should be 1.  In this case, we relax the "positive-definiteness"
-criterion to allow ``d(q_1, q_2)`` to equal zero when ``q_1`` and ``q_2`` are
-related by a nonzero scalar multiple.
+which should be 1.  In this case, we relax the "positive-definiteness" criterion
+to allow ``d(q_1, q_2)`` to equal zero when ``q_1`` and ``q_2`` are related by a
+nonzero scalar multiple.
 
-For `Rotor` types, the latter two multiplicative options are most relevant, while for
-other types the additive options are more relevant.  These are the default
-behaviors of the `distance` and `distance2` functions.
+For `Rotor` types, the latter two multiplicative options are most relevant,
+while for other types the additive options are more relevant.  These are the
+default behaviors of the `distance` and `distance2` functions.
 
 ```@autodocs
 Modules = [Quaternionic]
 Pages   = ["distance.jl"]
 ```
+
+
+## Alignment
+
+There are many ways to optimize alignment with rotations.  In particular, we can
+seek the optimal rotation that takes one set of points onto a corresponding set
+of points, or the optimal quaternion that takes one set of quaternions onto a
+corresponding set of quaternions.  In both cases, the "optimal" value depends on
+the metric being used.  The simplest and most robust results are obtained when
+the metric is the standard Euclidean metric (in the case of points), or the
+magnitude of the difference (in the case of quaternions).  Here, we assume that
+`QuatVec`s represent points, and any other type of quaternion should be treated
+as rotors.
+
+```@autodocs
+Modules = [Quaternionic]
+Pages   = ["alignment.jl"]
+```
diff --git a/src/Quaternionic.jl b/src/Quaternionic.jl
@@ -16,6 +16,7 @@ export from_float_array, to_float_array,
     from_spherical_coordinates, to_spherical_coordinates,
     from_rotation_matrix, to_rotation_matrix
 export distance, distance2
+export align
 export unflip, unflip!, slerp, squad
 export ∂log, log∂log, ∂exp, exp∂exp, slerp∂slerp, slerp∂slerp∂τ, squad∂squad∂t
 export precessing_nutating_example
@@ -30,6 +31,7 @@ include("math.jl")
 include("random.jl")
 include("conversion.jl")
 include("distance.jl")
+include("alignment.jl")
 include("interpolation.jl")
 include("gradients.jl")
 include("examples.jl")
diff --git a/src/alignment.jl b/src/alignment.jl
@@ -0,0 +1,141 @@
+@doc raw"""
+    align(a⃗, b⃗, [w])
+
+Solve [Wahba's problem](https://en.wikipedia.org/wiki/Wahba%27s_problem),
+finding a rotation that aligns the set of points `a⃗` to a corresponding set of
+points `b⃗` by minimizing the distance between the first set and the rotated
+second set.
+
+Here, `a⃗` and `b⃗` must be equally sized arrays of `QuatVec`s.  If present, `w`
+must be an equally sized array of real numbers; if not, it is taken to be 1.
+We define the loss function
+```math
+L(ℛ) ≔ Σᵢ wᵢ ‖a⃗ᵢ - ℛ b⃗ᵢ‖²
+```
+where ``ℛ`` is a rotation operator, and return the quaternion corresponding to
+the optimal ``ℛ`` that minimizes this function.
+
+Note that it is possible that the points do not uniquely determine a rotation —
+as when one or both sets of points is rotationally symmetric.  In that case, the
+loss function ``L(ℛ)`` will still be minimized and the points will still be
+optimally aligned by the output quaternion, but that quaternion will not be
+unique.
+
+
+# Notes
+
+In their book [_Fundamentals of Spacecraft Attitude Determination and Control_
+(2014)](https://doi.org/10.1007/978-1-4939-0802-8), Markley and Crassidis say
+that "Davenport’s method remains the best method for solving Wahba’s problem".
+This method provides the optimal quaternion as the dominant eigenvector (the one
+with the largest eigenvalue) of a certain matrix.  We start by defining the
+supplementary matrix
+```math
+S ≔ Σᵢ wᵢ a⃗ᵢ b⃗ᵢᵀ
+```
+and vector
+```math
+s⃗ ≔ \begin{bmatrix}
+S₂₃-S₃₂ \\
+S₃₁-S₁₃ \\
+S₁₂-S₂₁
+\end{bmatrix}.
+```
+Then the key matrix is
+```math
+M ≔ \begin{bmatrix}
+S + Sᵀ - (\mathrm{tr}S)\, I₃ & s⃗ \\
+s⃗ᵀ & \mathrm{tr}S
+\end{bmatrix}
+```
+It is possible for this matrix to have degenerate eigenvalues, corresponding to
+cases where the points do not uniquely determine the rotation, as described
+above.
+
+"""
+function align(a⃗::AbstractArray{<:QuatVec}, b⃗::AbstractArray{<:QuatVec}, w::AbstractArray{<:Real})
+    # This is Eq. (5.11) from Markley and Crassidis
+    S = sum(w[i] * a⃗[i].vec * b⃗[i].vec' for i in eachindex(a⃗, b⃗, w))
+    return _align_Wahba(S)
+end
+
+function align(a⃗::AbstractArray{<:QuatVec}, b⃗::AbstractArray{<:QuatVec})
+    # This is Eq. (5.11) from Markley and Crassidis
+    S = sum(a⃗[i].vec * b⃗[i].vec' for i in eachindex(a⃗, b⃗))
+    return _align_Wahba(S)
+end
+
+function _align_Wahba(S)
+    # This is Eq. (5.17) from Markley and Crassidis, modified to suit our
+    # conventions by flipping the sign of ``z``, and moving the final dimension
+    # to the first dimension.
+    M = Symmetric([
+            S[1,1]+S[2,2]+S[3,3]      S[3,2]-S[2,3]         S[1,3]-S[3,1]           S[2,1]-S[1,2]    
+                S[3,2]-S[2,3]      S[1,1]-S[2,2]-S[3,3]     S[1,2]+S[2,1]           S[1,3]+S[3,1]    
+                S[1,3]-S[3,1]         S[1,2]+S[2,1]      -S[1,1]+S[2,2]-S[3,3]      S[2,3]+S[3,2]    
+                S[2,1]-S[1,2]         S[1,3]+S[3,1]         S[2,3]+S[3,2]       -S[1,1]-S[2,2]+S[3,3]
+    ])
+    # This extracts the dominant eigenvector, and interprets it as a Rotor.  In
+    # particular, note that the _last_ eigenvector output by `eigen` (the 4th)
+    # has the largest eigenvalue.
+    return Rotor(eigen(M, 4:4).vectors[:, 1]...)
+end
+
+
+@doc raw"""
+    align(A, B, [w])
+
+Find a `Rotor` that aligns the set of rotors `A` to a corresponding set `B` by
+minimizing the distance between the first set and the rotated second set.
+
+Here, `A` and `B` must be equally sized arrays of `AbstractQuaternion`s.  If
+present, `w` must be an equally sized array of real numbers; if not, it is
+taken to be 1.  We define the loss function
+```math
+L(R) ≔ Σᵢ wᵢ |Aᵢ - R Bᵢ|²
+```
+where ``R`` is a `Rotor`, and return the quaternion corresponding to the optimal
+``R`` that minimizes this function.
+
+Note that it is possible that the input data do not uniquely determine a rotor,
+which will happen when sum below is zero.  When this happens, the result will
+contain `NaN`s, but no error will be raised.  When the sum is very close to —
+but not exactly — zero, the accuracy of the result will be limited.  However,
+the loss function will not depend strongly on the result in that case.
+
+Be aware that this function _is_ sensitive to the signs of the input
+quaternions.  See the [`unflip`](@ref) function for one way to avoid problems
+related to signs.
+
+
+## Notes
+
+We can ensure that the loss function is minimized by multiplying ``R`` by an
+exponential, differentiating with respect to the argument of the exponential,
+and setting that argument to 0.  This derivative should be 0 at the minimum.  We
+have
+```math
+∂ⱼ Σᵢ wᵢ |Aᵢ - \exp[vⱼ] R Bᵢ|²  →  2 ⟨ eⱼ R Σᵢ wᵢ Bᵢ Āᵢ ⟩₀
+```
+where → denotes taking ``vⱼ→0``, the symbol ``⟨⟩₀`` denotes taking the scalar
+part, and ``eⱼ`` is the unit quaternionic vector in the ``j`` direction.  The
+only way for this quantity to be zero for each choice of ``j`` is if
+```math
+R Σᵢ wᵢ Bᵢ Āᵢ
+```
+is itself a pure scalar.  This, in turn, can only happen if either (1) the sum
+is 0 or (2) if ``R`` is proportional to the _conjugate_ of the sum:
+```math
+R ∝ Σᵢ wᵢ Aᵢ B̄ᵢ
+```
+Now, since we want ``R`` to be a rotor, we simply define it to be the normalized
+sum.
+
+"""
+function align(A, B, w)
+    Rotor(sum(w[i] * A[i] * conj(B[i]) for i in eachindex(A, B, w)))
+end
+
+function align(A, B)
+    Rotor(sum(A[i] * conj(B[i]) for i in eachindex(A, B)))
+end
diff --git a/test/alignment.jl b/test/alignment.jl
@@ -0,0 +1,100 @@
+@testset verbose=true "Alignment" begin
+    Random.seed!(1234)
+    @testset verbose=true "Align QuatVec{$T}" for T in [Float16, Float32, Float64]
+        for N in [1, 2, 3, 4, 5, 10, 20]
+            a⃗ = randn(QuatVec{T}, N)
+            R = randn(Rotor{T})
+
+            # Test the exact result
+            b⃗ = R .* a⃗ .* conj(R)
+            R′ = align(a⃗, b⃗)
+            if N > 1
+                @test distance(R, conj(R′)) < 25eps(T)
+            end
+            @test maximum(abs, a⃗ - R′ .* b⃗ .* conj(R′)) < 40eps(T)
+            @test_throws DimensionMismatch align(a⃗, b⃗[2:end])
+
+            # Uniform weights
+            w = 17ones(T, size(a⃗)) / T(3)
+            R′′ = align(a⃗, b⃗, w)
+            if N > 1
+                @test distance(R, conj(R′′)) < 25eps(T)
+            end
+            @test maximum(abs, a⃗ - R′′ .* b⃗ .* conj(R′′)) < 40eps(T)
+            @test_throws DimensionMismatch align(a⃗, b⃗, w[2:end])
+
+            # Perturb b⃗ slightly
+            δ = √eps(T)
+            b⃗′′′ = [b⃗i + QuatVec((2*(rand(T, 3) .- 1/T(2)) * δ/√T(3))...) for b⃗i in b⃗]
+            R′′′ = align(a⃗, b⃗′′′)
+            if N > 1
+                @test distance(R, conj(R′′′)) < 25δ
+            end
+            @test maximum(abs, a⃗ - R′′′ .* b⃗′′′ .* conj(R′′′)) < 40δ
+
+            # Change first third, but use weights to ignore
+            if N > 3
+                N′ = N ÷ 3
+                b⃗′ = copy(b⃗)
+                w′ = copy(w)
+                b⃗′[1:N′] = randn(QuatVec{T}, N′)
+                w′[1:N′] .= 0
+                R1 = align(a⃗, b⃗, w′)
+                R2 = align(a⃗, b⃗′, w′)
+                @test distance(R, conj(R1)) < 25eps(T)
+                @test distance(R, conj(R2)) < 25eps(T)
+                @test maximum(abs, (a⃗ - R1 .* b⃗ .* conj(R1))[N′+1:end]) < 40eps(T)
+                @test maximum(abs, (a⃗ - R2 .* b⃗ .* conj(R2))[N′+1:end]) < 40eps(T)
+            end
+        end
+    end
+
+    @testset verbose=true "Align Rotor{$T}" for T in [Float16, Float32, Float64]
+        for N in [1, 2, 3, 4, 5, 10, 20]
+            A = randn(Rotor{T}, N)
+            R = randn(Rotor{T})
+
+            # Test the exact result
+            B = R .* A
+            R′ = align(A, B)
+            if N > 1
+                @test distance(R, conj(R′)) < 25eps(T)
+            end
+            @test maximum(abs, A - R′ .* B) < 40eps(T)
+            @test_throws DimensionMismatch align(A, B[2:end])
+
+            # Uniform weights
+            w = 17ones(T, size(A)) / T(3)
+            R′′ = align(A, B, w)
+            if N > 1
+                @test distance(R, conj(R′′)) < 25eps(T)
+            end
+            @test maximum(abs, A - R′′ .* B) < 40eps(T)
+            @test_throws DimensionMismatch align(A, B, w[2:end])
+
+            # Perturb B slightly
+            δ = √eps(T)
+            B′′′ = [Rotor(Bi + Quaternion((2*(rand(T, 4) .- 1/T(2)) * δ/√T(3))...)) for Bi in B]
+            R′′′ = align(A, B′′′)
+            if N > 1
+                @test distance(R, conj(R′′′)) < 25δ
+            end
+            @test maximum(abs, A - R′′′ .* B′′′) < 40δ
+
+            # Change first third, but use weights to ignore
+            if N > 3
+                N′ = N ÷ 3
+                B′ = copy(B)
+                w′ = copy(w)
+                B′[1:N′] = randn(Rotor{T}, N′)
+                w′[1:N′] .= 0
+                R1 = align(A, B, w′)
+                R2 = align(A, B′, w′)
+                @test distance(R, conj(R1)) < 25eps(T)
+                @test distance(R, conj(R2)) < 25eps(T)
+                @test maximum(abs, (A - R1 .* B)[N′+1:end]) < 40eps(T)
+                @test maximum(abs, (A - R2 .* B)[N′+1:end]) < 40eps(T)
+            end
+        end
+    end
+end
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -67,6 +67,7 @@ end
     addtests("random.jl")
     addtests("conversion.jl")
     addtests("distance.jl")
+    addtests("alignment.jl")
     addtests("interpolation.jl")
     addtests("gradients.jl")
     addtests("doctests.jl")