From a11a575676b72e075d9f1930e1592b1d2a46cc5c Mon Sep 17 00:00:00 2001
From: Jonathan Doucette <jdoucette@physics.ubc.ca>
Date: Thu, 4 Apr 2024 17:30:28 -0700
Subject: [PATCH] more nnls updates; more tests, minor speedup

---
 src/NNLS.jl      | 247 ++++++++++++++++++++++++++++++++++-------------
 src/lsqnonneg.jl |  18 +++-
 test/nnls.jl     |  50 +++++++---
 3 files changed, 229 insertions(+), 86 deletions(-)

diff --git a/src/NNLS.jl b/src/NNLS.jl
index 7421536..a6d329c 100644
--- a/src/NNLS.jl
+++ b/src/NNLS.jl
@@ -82,6 +82,20 @@ function NNLSWorkspace(::Type{T}, m::Int, n::Int) where {T}
     )
 end
 
+function Base.copy(w::NNLSWorkspace)
+    return NNLSWorkspace(
+        copy(w.A),
+        copy(w.b),
+        copy(w.x),
+        copy(w.w),
+        copy(w.zz),
+        copy(w.idx),
+        Ref(w.rnorm[]),
+        Ref(w.mode[]),
+        Ref(w.nsetp[]),
+    )
+end
+
 function NNLSWorkspace(A::AbstractMatrix{T}, b::AbstractVector{T}) where {T}
     m, n = size(A)
     @assert size(b) == (m,)
@@ -104,6 +118,9 @@ function checkargs(work::NNLSWorkspace)
     @assert size(work.w) == (n,)
     @assert size(work.zz) == (m,)
     @assert size(work.idx) == (n,)
+    @assert 0 <= work.rnorm[]
+    @assert 0 <= work.mode[]
+    @assert 0 <= work.nsetp[] <= min(m, n)
 end
 
 struct NormalEquationCholesky{T, W <: NNLSWorkspace{T}} <: Factorization{T}
@@ -148,9 +165,9 @@ References:
     - Lawson, C.L. and R.J. Hanson, Solving Least-Squares Problems
     - Prentice-Hall, Chapter 23, p. 161, 1974
 """
-function nnls(A::AbstractMatrix{T}, b::AbstractVector{T}; kwargs...) where {T}
+function nnls(A::AbstractMatrix{T}, b::AbstractVector{T}, args...; kwargs...) where {T}
     work = NNLSWorkspace(A, b)
-    return nnls!(work, A, b; kwargs...)
+    return nnls!(work, A, b, args...; kwargs...)
 end
 
 function nnls!(
@@ -168,7 +185,7 @@ end
 
 """
 CONSTRUCTION AND/OR APPLICATION OF A SINGLE
-HOUSEHOLDER TRANSFORMATION..     Q = I + U*(U**T)/B
+HOUSEHOLDER TRANSFORMATION Q = I + U*(U**T)/B
 
 The original version of this code was developed by
 Charles L. Lawson and Richard J. Hanson at Jet Propulsion Laboratory
@@ -176,7 +193,10 @@ Charles L. Lawson and Richard J. Hanson at Jet Propulsion Laboratory
 "SOLVING LEAST SQUARES PROBLEMS", Prentice-HalL, 1974.
 Revised FEB 1995 to accompany reprinting of the book by SIAM.
 """
-function construct_householder!(u::AbstractVector{T}, up::T) where {T}
+function construct_householder!(
+    u::AbstractVector{T},
+    up::T,
+) where {T}
     if length(u) <= 1
         return up
     end
@@ -198,7 +218,7 @@ end
 
 """
 CONSTRUCTION AND/OR APPLICATION OF A SINGLE
-HOUSEHOLDER TRANSFORMATION..     Q = I + U*(U**T)/B
+HOUSEHOLDER TRANSFORMATION Q = I + U*(U**T)/B
 
 The original version of this code was developed by
 Charles L. Lawson and Richard J. Hanson at Jet Propulsion Laboratory
@@ -206,7 +226,11 @@ Charles L. Lawson and Richard J. Hanson at Jet Propulsion Laboratory
 "SOLVING LEAST SQUARES PROBLEMS", Prentice-HalL, 1974.
 Revised FEB 1995 to accompany reprinting of the book by SIAM.
 """
-function apply_householder!(u::AbstractVector{T}, up::T, c::AbstractVector{T}) where {T}
+function apply_householder!(
+    u::AbstractVector{T},
+    up::T,
+    c::AbstractVector{T},
+) where {T}
     m = length(u)
     if m <= 1
         return nothing
@@ -234,7 +258,13 @@ function apply_householder!(u::AbstractVector{T}, up::T, c::AbstractVector{T}) w
     end
 end
 
-function apply_householder!(A::AbstractMatrix{T}, up::T, idx::AbstractVector{Int}, nsetp::Int, jup::Int) where {T}
+function apply_householder!(
+    A::AbstractMatrix{T},
+    up::T,
+    idx::AbstractVector{Int},
+    nsetp::Int,
+    jup::Int,
+) where {T}
     m, n = size(A)
     if m - nsetp <= 0
         return nothing
@@ -287,39 +317,136 @@ function apply_householder!(A::AbstractMatrix{T}, up::T, idx::AbstractVector{Int
     return nothing
 end
 
+function apply_householder_dual!(
+    A::AbstractMatrix{T},
+    w::AbstractVector{T},
+    b::AbstractVector{T},
+    up::T,
+    idx::AbstractVector{Int},
+    nsetp::Int,
+    jup::Int,
+) where {T}
+    m, n = size(A)
+    if m - nsetp <= 0
+        return nothing
+    end
+
+    @inbounds u1 = A[nsetp, jup]
+
+    @assert abs(u1) > 0
+    up_u1 = up * u1
+    if up_u1 >= 0
+        return nothing
+    end
+
+    @inbounds A[nsetp, jup] = up
+
+    ipstart = nsetp + 1
+    iprem = ipstart + 4 * ((n - ipstart) ÷ 4) - 1
+    @inbounds for ip in ipstart:4:iprem # unroll over columns
+        @nexprs 4 α -> j_α = idx[ip+(α-1)]
+        @nexprs 4 α -> sm_α = zero(T)
+        @simd for l in nsetp:m
+            Al = A[l, jup]
+            @nexprs 4 α -> sm_α = sm_α + A[l, j_α] * Al
+        end
+        @nexprs 4 α -> sm_α /= up_u1
+        @nexprs 4 α -> w_α = zero(T)
+        Al = A[nsetp, jup]
+        @nexprs 4 α -> A[nsetp, j_α] = A[nsetp, j_α] + sm_α * Al
+        @simd for l in nsetp+1:m
+            bl = b[l]
+            Al = A[l, jup]
+            @nexprs 4 α -> A_α = A[l, j_α] + sm_α * Al
+            @nexprs 4 α -> w_α = w_α + A_α * bl
+            @nexprs 4 α -> A[l, j_α] = A_α
+        end
+        @nexprs 4 α -> w[j_α] = w_α
+    end
+
+    if iprem != n # remainder loop
+        @inbounds for ip in iprem+1:n
+            j = idx[ip]
+            sm = zero(T)
+            @simd for l in nsetp:m
+                sm = sm + A[l, j] * A[l, jup]
+            end
+            if sm != 0
+                sm /= up_u1
+                wj = zero(T)
+                A[nsetp, j] = A[nsetp, j] + sm * A[nsetp, jup]
+                @simd for l in nsetp+1:m
+                    Al = A[l, j] + sm * A[l, jup]
+                    wj = wj + Al * b[l]
+                    A[l, j] = Al
+                end
+                w[j] = wj
+            end
+        end
+    end
+
+    @inbounds A[nsetp, jup] = u1
+
+    return nothing
+end
+
+"""
+COMPUTE COMPONENTS OF THE DUAL (NEGATIVE GRADIENT) VECTOR W().
 """
-COMPUTE ORTHOGONAL ROTATION MATRIX..
+@inline function compute_dual!(
+    w::AbstractVector{T},
+    A::AbstractMatrix{T},
+    b::AbstractVector{T},
+    idx::AbstractVector{<:Integer},
+    nsetp::Integer,
+) where {T}
+    m, n = size(A)
+
+    ipstart = nsetp + 1
+    iprem = ipstart + 4 * ((n - ipstart) ÷ 4) - 1
+    @inbounds for ip in ipstart:4:iprem # unroll over columns
+        @nexprs 4 α -> j_α = idx[ip+(α-1)]
+        @nexprs 4 α -> sm_α = zero(T)
+        @simd for l in ipstart:m
+            bl = b[l]
+            @nexprs 4 α -> sm_α = sm_α + A[l, j_α] * bl
+        end
+        @nexprs 4 α -> w[j_α] = sm_α
+    end
+
+    if iprem != n # remainder loop
+        @inbounds for ip in iprem+1:n
+            j = idx[ip]
+            sm = zero(T)
+            @simd for l in ipstart:m
+                sm = sm + A[l, j] * b[l]
+            end
+            w[j] = sm
+        end
+    end
+
+    return w
+end
+
+"""
+COMPUTE ORTHOGONAL ROTATION MATRIX
 The original version of this code was developed by
 Charles L. Lawson and Richard J. Hanson at Jet Propulsion Laboratory
 1973 JUN 12, and published in the book
 "SOLVING LEAST SQUARES PROBLEMS", Prentice-HalL, 1974.
 Revised FEB 1995 to accompany reprinting of the book by SIAM.
 
-    COMPUTE.. MATRIX    (C, S) SO THAT (C, S)(A) = (SQRT(A**2+B**2))
-                        (-S,C)         (-S,C)(B)   (   0          )
+    COMPUTE MATRIX  (C, S) SO THAT (C, S)(A) = (SQRT(A**2+B**2))
+                    (-S,C)         (-S,C)(B)   (   0          )
     COMPUTE SIG = SQRT(A**2+B**2)
         SIG IS COMPUTED LAST TO ALLOW FOR THE POSSIBILITY THAT
         SIG MAY BE IN THE SAME LOCATION AS A OR B .
 """
 @inline function orthogonal_rotmat(a::T, b::T) where {T}
-    if abs(a) > abs(b)
-        xr = b / a
-        yr = sqrt(one(T) + xr * xr)
-        c = sign(a) / yr # note: sign(0) = 0 is consistent
-        s = c * xr
-        sig = abs(a) * yr
-    elseif b != 0
-        xr = a / b
-        yr = sqrt(one(T) + xr * xr)
-        s = sign(b) / yr # note: sign(0) = 0 is consistent
-        c = s * xr
-        sig = abs(b) * yr
-    else
-        sig = zero(T)
-        c = zero(T)
-        s = one(T)
-    end
-    return c, s, sig
+    σ = hypot(a, b)
+    c = a / σ
+    s = b / σ
+    return c, s, σ
 end
 
 @inline function orthogonal_rotmatvec(c::T, s::T, a::T, b::T) where {T}
@@ -399,9 +526,9 @@ Charles L. Lawson and Richard J. Hanson at Jet Propulsion Laboratory
 "SOLVING LEAST SQUARES PROBLEMS", Prentice-HalL, 1974.
 Revised FEB 1995 to accompany reprinting of the book by SIAM.
 
-GIVEN AN M BY N MATRIX, A, AND AN M-VECTOR, B,  COMPUTE AN
+GIVEN AN M BY N MATRIX, A, AND AN M-VECTOR, B, COMPUTE AN
 N-VECTOR, X, THAT SOLVES THE LEAST SQUARES PROBLEM
-A * X = B  SUBJECT TO X .GE. 0
+A * X = B SUBJECT TO X .GE. 0
 """
 function unsafe_nnls!(
     work::NNLSWorkspace{T},
@@ -421,6 +548,9 @@ function unsafe_nnls!(
     work.mode[] = 1
     terminated = false
 
+    # COMPUTE COMPONENTS OF THE DUAL (NEGATIVE GRADIENT) VECTOR W().
+    compute_dual!(w, A, b, idx, nsetp) #TODO: use `mul!(w, A', b)` with `uview`s?
+
     # ******  MAIN LOOP BEGINS HERE  ******
     @inbounds while true
         local i_curr, j_curr, i_maxdual, j_maxdual
@@ -432,30 +562,6 @@ function unsafe_nnls!(
             break
         end
 
-        # COMPUTE COMPONENTS OF THE DUAL (NEGATIVE GRADIENT) VECTOR W().
-        ipstart = nsetp + 1
-        iprem = ipstart + 8 * ((n - ipstart) ÷ 8) - 1
-        @inbounds for ip in ipstart:8:iprem # unroll over columns
-            @nexprs 8 α -> j_α = idx[ip+(α-1)]
-            @nexprs 8 α -> sm_α = zero(T)
-            @simd for l in ipstart:m
-                bl = b[l]
-                @nexprs 8 α -> sm_α = sm_α + A[l, j_α] * bl
-            end
-            @nexprs 8 α -> w[j_α] = sm_α
-        end
-
-        if iprem != n # remainder loop
-            @inbounds for ip in iprem+1:n
-                j = idx[ip]
-                sm = zero(T)
-                @simd for l in ipstart:m
-                    sm = sm + A[l, j] * b[l]
-                end
-                w[j] = sm
-            end
-        end
-
         @inbounds while true
             # FIND LARGEST POSITIVE W(J).
             wmax, i_wmax = largest_positive_dual(w, idx, nsetp)
@@ -477,7 +583,7 @@ function unsafe_nnls!(
             up = construct_householder!(uview(A, nsetp+1:m, j_maxdual), up)
 
             if abs(A[nsetp+1, j_maxdual]) > 0
-                # COL J IS SUFFICIENTLY INDEPENDENT.  COPY B INTO ZZ, UPDATE ZZ
+                # COL J IS SUFFICIENTLY INDEPENDENT. COPY B INTO ZZ, UPDATE ZZ
                 # AND SOLVE FOR ZTEST ( = PROPOSED NEW VALUE FOR X(J) ).
                 copyto!(zz, b)
                 apply_householder!(uview(A, nsetp+1:m, j_maxdual), up, uview(zz, nsetp+1:m))
@@ -499,10 +605,10 @@ function unsafe_nnls!(
             break
         end
 
-        # THE INDEX  J=INDEX(IZ)  HAS BEEN SELECTED TO BE MOVED FROM
-        # SET Z TO SET P.    UPDATE B,  UPDATE INDICES,  APPLY HOUSEHOLDER
-        # TRANSFORMATIONS TO COLS IN NEW SET Z,  ZERO SUBDIAGONAL ELTS IN
-        # COL J,  SET W(J)=0.
+        # THE INDEX J=INDEX(IZ) HAS BEEN SELECTED TO BE MOVED FROM
+        # SET Z TO SET P. UPDATE B, UPDATE INDICES, APPLY HOUSEHOLDER
+        # TRANSFORMATIONS TO COLS IN NEW SET Z, ZERO SUBDIAGONAL ELTS IN
+        # COL J, SET W(J)=0.
         copyto!(b, zz)
 
         idx[i_maxdual] = idx[nsetp+1]
@@ -510,7 +616,7 @@ function unsafe_nnls!(
         nsetp          += 1
 
         if nsetp + 1 <= n
-            apply_householder!(A, up, idx, nsetp, j_maxdual)
+            apply_householder_dual!(A, w, b, up, idx, nsetp, j_maxdual)
         end
 
         if nsetp != m
@@ -528,9 +634,9 @@ function unsafe_nnls!(
             i_curr = idx[1]
         end
 
-        # ******  SECONDARY LOOP BEGINS HERE ******
-        #
-        # ITERATION COUNTER.
+        # ******  SECONDARY LOOP BEGINS HERE  ******
+        dual_flag = false
+
         @inbounds while true
             iter += 1
             if iter > max_iter
@@ -554,10 +660,11 @@ function unsafe_nnls!(
             end
 
             # IF ALL NEW CONSTRAINED COEFFS ARE FEASIBLE THEN ALPHA WILL
-            # STILL = 2.    IF SO EXIT FROM SECONDARY LOOP TO MAIN LOOP.
+            # STILL = 2. IF SO EXIT FROM SECONDARY LOOP TO MAIN LOOP.
             if alpha == 2
                 break
             end
+            dual_flag = true
 
             # OTHERWISE USE ALPHA WHICH WILL BE BETWEEN 0 AND 1 TO
             # INTERPOLATE BETWEEN THE OLD X AND THE NEW ZZ.
@@ -598,9 +705,9 @@ function unsafe_nnls!(
                 nsetp -= 1
                 idx[nsetp+1] = j_curr
 
-                # SEE IF THE REMAINING COEFFS IN SET P ARE FEASIBLE.  THEY SHOULD
+                # SEE IF THE REMAINING COEFFS IN SET P ARE FEASIBLE. THEY SHOULD
                 # BE BECAUSE OF THE WAY ALPHA WAS DETERMINED.
-                # IF ANY ARE INFEASIBLE IT IS DUE TO ROUND-OFF ERROR.  ANY
+                # IF ANY ARE INFEASIBLE IT IS DUE TO ROUND-OFF ERROR. ANY
                 # THAT ARE NONPOSITIVE WILL BE SET TO ZERO
                 # AND MOVED FROM SET P TO SET Z.
                 allfeasible = true
@@ -617,7 +724,7 @@ function unsafe_nnls!(
                 end
             end
 
-            # COPY B( ) INTO ZZ( ).  THEN SOLVE AGAIN AND LOOP BACK.
+            # COPY B( ) INTO ZZ( ). THEN SOLVE AGAIN AND LOOP BACK.
             copyto!(zz, b)
             solve_triangular_system!(zz, A, idx, nsetp)
             @inbounds if nsetp > 0
@@ -629,10 +736,14 @@ function unsafe_nnls!(
         end
         # ******  END OF SECONDARY LOOP  ******
 
+        if dual_flag
+            compute_dual!(w, A, b, idx, nsetp)
+        end
+
         @inbounds for ip in 1:nsetp
             x[idx[ip]] = zz[ip]
         end
-        # ALL NEW COEFFS ARE POSITIVE.  LOOP BACK TO BEGINNING.
+        # ALL NEW COEFFS ARE POSITIVE. LOOP BACK TO BEGINNING.
     end
 
     # ******  END OF MAIN LOOP  ******
diff --git a/src/lsqnonneg.jl b/src/lsqnonneg.jl
index c6a6642..bc87b03 100644
--- a/src/lsqnonneg.jl
+++ b/src/lsqnonneg.jl
@@ -738,7 +738,8 @@ function lcurve_corner(f::LCurveCornerCachedFunction{T}, xlow::T = -8.0, xhigh::
 end
 
 function initial_state(f::LCurveCornerCachedFunction{T}, x₁::T, x₄::T) where {T}
-    x₂ = (T(φ) * x₁ + x₄) / (T(φ) + 1)
+    φ = T(Base.MathConstants.φ)
+    x₂ = (φ * x₁ + x₄) / (φ + 1)
     x₃ = x₁ + (x₄ - x₂)
     x⃗ = SA[x₁, x₂, x₃, x₄]
     P⃗ = SA[f(x₁), f(x₂), f(x₃), f(x₄)]
@@ -750,7 +751,8 @@ is_converged(state::LCurveCornerState; xtol, Ptol) = abs(state.x⃗[4] - state.x
 
 function move_left(f::LCurveCornerCachedFunction{T}, state::LCurveCornerState{T}) where {T}
     (; x⃗, P⃗) = state
-    x⃗ = SA[x⃗[1], (T(φ)*x⃗[1]+x⃗[3])/(T(φ)+1), x⃗[2], x⃗[3]]
+    φ = T(Base.MathConstants.φ)
+    x⃗ = SA[x⃗[1], (φ*x⃗[1]+x⃗[3])/(φ+1), x⃗[2], x⃗[3]]
     P⃗ = SA[P⃗[1], f(x⃗[2]), P⃗[2], P⃗[3]] # only P⃗[2] is recalculated
     return LCurveCornerState{T}(x⃗, P⃗)
 end
@@ -830,7 +832,8 @@ end
 
 function menger(xⱼ::T, xₖ::T, xₗ::T, Pⱼ::V, Pₖ::V, Pₗ::V; interp_uniform = true, linear_deriv = true) where {T, V <: SVector{2, T}}
     if interp_uniform
-        h = min(abs(xₖ - xⱼ), abs(xₗ - xₖ)) / T(φ)
+        φ = T(Base.MathConstants.φ)
+        h = min(abs(xₖ - xⱼ), abs(xₗ - xₖ)) / φ
         h₋ = h₊ = h
         x₋, x₀, x₊ = xₖ - h, xₖ, xₖ + h
         P₀ = Pₖ
@@ -991,6 +994,7 @@ end
 
 @inline solution(work::NNLSGCVRegProblem) = solution(work.nnls_prob_smooth_cache[])
 @inline ncomponents(work::NNLSGCVRegProblem) = ncomponents(work.nnls_prob_smooth_cache[])
+@inline LinearAlgebra.svdvals!(work::NNLSGCVRegProblem, A = work.A) = svdvals!(work.svd_work, A)
 
 @doc raw"""
     lsqnonneg_gcv(A::AbstractMatrix, b::AbstractVector)
@@ -1043,7 +1047,7 @@ function lsqnonneg_gcv!(work::NNLSGCVRegProblem{T}; method = :brent, init = -4.0
     logμ₀ = T(init)
 
     # Precompute singular values for GCV computation
-    svdvals!(work.svd_work, work.A)
+    svdvals!(work)
 
     # Non-zero lower bound for GCV to avoid log(0) in the objective function
     gcv_low = gcv_lower_bound(work)
@@ -1051,7 +1055,9 @@ function lsqnonneg_gcv!(work::NNLSGCVRegProblem{T}; method = :brent, init = -4.0
     # Objective functions
     reset_cache!(work.nnls_prob_smooth_cache)
     function log𝒢(logμ)
-        return log(max(gcv!(work, logμ), gcv_low))
+        𝒢 = gcv!(work, logμ)
+        𝒢 = max(𝒢, gcv_low)
+        return log(𝒢)
     end
     function log𝒢_and_∇log𝒢(logμ)
         𝒢, ∇𝒢 = gcv_and_∇gcv!(work, logμ)
@@ -1113,6 +1119,7 @@ end
 # where here L = Id and λ = μ.
 function gcv!(work::NNLSGCVRegProblem, logμ)
     # Unpack buffers
+    #   NOTE: assumes `svdvals!(work)` has been called and that the singular values `work.γ` are ready
     (; m, n, γ) = work
 
     # Solve regularized NNLS problem
@@ -1130,6 +1137,7 @@ end
 
 function gcv_and_∇gcv!(work::NNLSGCVRegProblem, logμ)
     # Unpack buffers
+    #   NOTE: assumes `svdvals!(work)` has been called and that the singular values `work.γ` are ready
     (; m, n, γ) = work
 
     # Solve regularized NNLS problem
diff --git a/test/nnls.jl b/test/nnls.jl
index 0236c60..173c4f0 100644
--- a/test/nnls.jl
+++ b/test/nnls.jl
@@ -45,7 +45,7 @@ function verify_NNLS(m, n)
         # Solution
         @test all(>(0), x₊)
         @test all(==(0), x₀)
-        @test x₊ ≈ (A₊'A₊) \ (A₊'b)
+        @test x₊ ≈ (A₊' * A₊) \ (A₊' * b)
         @test x₊ ≈ A₊ \ b
 
         # Dual (i.e. gradient)
@@ -95,8 +95,8 @@ function verify_NNLS(m, n)
         end
 
         # Cholesky factors
-        @test A₊'A₊ ≈ U'U
-        @test A₊'A₊ ≈ L * L'
+        @test A₊' * A₊ ≈ U' * U
+        @test A₊' * A₊ ≈ L * L'
 
         F = cholesky!(NNLS.NormalEquation(), work)
         if n₊ > 0
@@ -106,9 +106,33 @@ function verify_NNLS(m, n)
             @test b′ ≈ b′′
             @test !(x′ ≈ x′′)
             @test x′ == F \ b′
-            @test x′ ≈ (A₊'A₊) \ b′
+            @test x′ ≈ (A₊' * A₊) \ b′
             @test @allocated(ldiv!(x′, cholesky!(NNLS.NormalEquation(), work), b′)) == 0
         end
+
+        # QR factors
+        #   Note: qr(A) = QR relates to cholesky(A'A) = LL' = U'U via:
+        #   A'A = U'U = (QR)'(QR) = R'R => R = U (up to row sign)
+        posdiag(R) = UpperTriangular(Diagonal(sign.(diag(R))) * R) # ensure diagonal is positive
+        R = qr(A₊).R
+        @test posdiag(R) ≈ posdiag(U)
+
+        # Test that solve is independent of initial x, idx, nsetp
+        work′ = copy(work)
+        work′.x .= rand(n)
+        work′.idx .= Random.randperm(n)
+        work′.nsetp[] = rand(0:min(m, n))
+        NNLS.nnls!(work′, A, b)
+
+        @test work′.A ≈ work.A
+        @test work′.b ≈ work.b
+        @test work′.x ≈ work.x
+        @test work′.w ≈ work.w
+        @test work′.zz ≈ work.zz
+        @test work′.idx == work.idx
+        @test work′.rnorm[] ≈ work.rnorm[]
+        @test work′.mode[] == work.mode[]
+        @test work′.nsetp[] == work.nsetp[]
     end # GC.@preserve
 
     return work
@@ -219,7 +243,7 @@ function test_lsqnonneg_gcv(m, n)
     @test work.γ == svdvals(A) # should match exactly (same underlying LAPACK call)
 
     # Test GCV degrees of freedom
-    @test DECAES.gcv_dof(A, μ) ≈ tr(I - A * ((A'A + μ^2 * I) \ A')) # "degrees of freedom" of normal equation matrix
+    @test DECAES.gcv_dof(A, μ) ≈ tr(I - A * ((A' * A + μ^2 * I) \ A')) # "degrees of freedom" of normal equation matrix
     @test DECAES.∇gcv_dof(A, μ) ≈ ∇logfinitediff(_logμ -> DECAES.gcv_dof(A, exp(_logμ)), logμ, 1e-6) atol = 1e-4 rtol = 1e-4
 
     # Test GCV loss function
@@ -311,7 +335,7 @@ d/dμ ||x(μ)||^2:
     for (m, n) in NNLS_SIZES
         A, b = rand_NNLS_data(m, n)
         A_μ = μ -> [A; μ * LinearAlgebra.I]
-        B_μ = μ -> LinearAlgebra.cholesky!(LinearAlgebra.Symmetric(A'A + μ^2 * I))
+        B_μ = μ -> LinearAlgebra.cholesky!(LinearAlgebra.Symmetric(A' * A + μ^2 * I))
         x_μ = μ -> A_μ(μ) \ [b; zeros(n)]
 
         μ = 0.99
@@ -321,26 +345,26 @@ d/dμ ||x(μ)||^2:
         # Derivative of solution x w.r.t. μ
         f = _μ -> x_μ(_μ)
         dx_dμ = -2 * μ * (B \ x)
-        @test dx_dμ ≈ -2 * μ * (B \ (B \ (A'b)))
+        @test dx_dμ ≈ -2 * μ * (B \ (B \ (A' * b)))
         @test dx_dμ ≈ ∇logfinitediff(f ∘ exp, log(μ), 1e-6) rtol = 1e-4
 
         # Derivative of A*x (or equivalently, A*x-b) w.r.t. μ
         f = _μ -> A * x_μ(_μ)
         dAx_dμ = A * dx_dμ
-        @test dAx_dμ ≈ -2 * μ * (A * (B \ (B \ (A'b))))
+        @test dAx_dμ ≈ -2 * μ * (A * (B \ (B \ (A' * b))))
         @test dAx_dμ ≈ ∇logfinitediff(f ∘ exp, log(μ), 1e-6) rtol = 1e-4
 
         # Derivative of solution l2-norm ||x||^2 w.r.t. μ
         f = _μ -> sum(abs2, x_μ(_μ))
-        dx²_dμ = 2 * x'dx_dμ
-        @test dx²_dμ ≈ (-4 * μ) * b' * (A * (B \ (B \ (B \ (A'b)))))
+        dx²_dμ = 2 * dot(x, dx_dμ)
+        @test dx²_dμ ≈ -4 * μ * dot(b, (A * (B \ (B \ (B \ (A' * b))))))
         @test dx²_dμ ≈ ∇logfinitediff(f ∘ exp, log(μ), 1e-6) rtol = 1e-4
 
         # Derivative of residual l2-norm ||A*x-b||^2 w.r.t. μ
         f = _μ -> sum(abs2, A * x_μ(_μ) - b)
-        dAxb_dμ = -2μ^2 * x'dx_dμ
-        @test dAxb_dμ ≈ 2 * (A * x - b)'dAx_dμ
-        @test dAxb_dμ ≈ -4 * μ * ((A * x - b)' * (A * (B \ (B \ (A'b)))))
+        dAxb_dμ = -2μ^2 * dot(x, dx_dμ)
+        @test dAxb_dμ ≈ 2 * dot(A * x - b, dAx_dμ)
+        @test dAxb_dμ ≈ -4 * μ * dot(A * x - b, A * (B \ (B \ (A' * b))))
         @test dAxb_dμ ≈ ∇logfinitediff(f ∘ exp, log(μ), 1e-6) rtol = 1e-4
     end
 end