ZIB-IOL · matbesancon · Mar 4, 2025 · Feb 17, 2025 · Feb 17, 2025 · Feb 17, 2025
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -28,7 +28,7 @@ jobs:
         with:
           version: ${{ matrix.version }}
           arch: ${{ matrix.arch }}
-      - uses: actions/cache@v1
+      - uses: actions/cache@v4
         env:
           cache-name: cache-artifacts
         with:

diff --git a/.gitignore b/.gitignore
@@ -28,3 +28,5 @@ cc/*
 docs/src/contributing.md
 docs/src/index.md
 .DS_Store
+examples/heavy_ball_tests/data/*
+src/heavyball_dirty.jl
diff --git a/Project.toml b/Project.toml
@@ -11,11 +11,18 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 MathOptInterface = "b8f27783-ece8-5eb3-8dc8-9495eed66fee"
 Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
+ProximalCore = "dc4f5ac2-75d1-4f31-931e-60435d74994b"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Setfield = "efcf1570-3423-57d1-acb7-fd33fddbac46"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f"
 
+[weakdeps]
+ProximalOperators = "a725b495-10eb-56fe-b38b-717eba820537"
+
+[extensions]
+FrankWolfeProxExt = "ProximalOperators"
+
 [compat]
 Arpack = "0.5"
 DoubleFloats = "1.1"
@@ -27,7 +34,10 @@ MultivariatePolynomials = "0.5"
 PlotThemes = "3"
 Plots = "1.10"
 ProgressMeter = "1.4"
+ProximalCore = "0.1"
+ProximalOperators = "0.16"
 Setfield = "1"
+StableRNGs = "1"
 TimerOutputs = "0.5"
 ZipFile = "0.9.4"
 julia = "1"
@@ -53,11 +63,12 @@ MultivariatePolynomials = "102ac46a-7ee4-5c85-9060-abc95bfdeaa3"
 PlotThemes = "ccf2f8ad-2431-5c83-bf29-c5338b663b6a"
 Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
 Polyhedra = "67491407-f73d-577b-9b50-8179a7c68029"
+ProximalOperators = "a725b495-10eb-56fe-b38b-717eba820537"
 ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
+StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 Tullio = "bc48ee85-29a4-5162-ae0b-a64e1601d4bc"
 ZipFile = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea"
-StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
 
 [targets]
-test = ["CSV", "Combinatorics", "DataFrames", "Distributions", "DoubleFloats", "DynamicPolynomials", "FiniteDifferences", "ForwardDiff", "GLPK", "HiGHS", "JSON", "JuMP", "LaTeXStrings", "MAT", "MultivariatePolynomials", "Plots", "PlotThemes", "Polyhedra", "ReverseDiff", "ZipFile", "Test", "Tullio", "Clp", "Hypatia", "StableRNGs"]
+test = ["CSV", "Combinatorics", "DataFrames", "Distributions", "DoubleFloats", "DynamicPolynomials", "FiniteDifferences", "ForwardDiff", "GLPK", "HiGHS", "JSON", "JuMP", "LaTeXStrings", "MAT", "MultivariatePolynomials", "Plots", "PlotThemes", "Polyhedra", "ProximalOperators", "ReverseDiff", "StableRNGs", "ZipFile", "Test", "Tullio", "Clp", "Hypatia"]
diff --git a/docs/src/reference/5_gradient_descent.md b/docs/src/reference/5_gradient_descent.md
@@ -0,0 +1,9 @@
+# Adaptive Proximal Gradient Descent Methods
+
+This package implements several variants of adaptive proximal gradient descent methods.
+Their primary use is internal to FrankWolfe.jl, specifically for the Blended Conditional Gradients algorithm, but they can also be used as standalone algorithms.
+
+```@autodocs
+Modules = [FrankWolfe]
+Pages = ["gradient_descent.jl"]
+```
diff --git a/examples/ada_gradient.jl b/examples/ada_gradient.jl
@@ -0,0 +1,83 @@
+using FrankWolfe
+using LinearAlgebra
+using Random
+
+max_iter = Int(1e5)
+print_iter = max_iter // 100
+epsilon = 1e-10
+
+"""
+Simple quadratic function f(x) = 1/2 * x'Qx + b'x
+"""
+function quadratic_oracle(x, Q, b)
+    return 0.5 * dot(x, Q, x) + dot(b, x)
+end
+
+"""
+Gradient of quadratic function ∇f(x) = Qx + b
+"""
+function quadratic_gradient!(storage, x, Q, b)
+    mul!(storage, Q, x)
+    storage .+= b
+    return storage
+end
+
+# Set random seed for reproducibility.
+Random.seed!(42)
+
+# Problem dimension
+n = 10000
+
+# Generate positive definite Q matrix and random b vector
+Q = rand(n, n)
+Q = Q' * Q + I  # Make Q positive definite
+b = rand(n)
+
+# Create objective function and gradient
+f(x) = quadratic_oracle(x, Q, b)
+grad!(storage, x) = quadratic_gradient!(storage, x, Q, b)
+
+# Initial point
+x0 = 10 * rand(n)
+
+println("Testing Adaptive Gradient Descent (variant 1)")
+println("============================================")
+
+x1, f1, hist1 = FrankWolfe.adaptive_gradient_descent(
+    f,
+    grad!,
+    x0;
+    step0=0.1,
+    max_iteration=max_iter,
+    print_iter=print_iter,
+    epsilon=epsilon,
+    verbose=true,
+)
+
+println("\nFinal objective value: $(f1)")
+println("Final gradient norm: $(norm(grad!(similar(x0), x1)))")
+
+println("\nTesting Adaptive Gradient Descent (variant 2)")
+println("============================================")
+
+x2, f2, hist2 = FrankWolfe.adaptive_gradient_descent2(
+    f,
+    grad!,
+    x0;
+    step0=0.1,
+    max_iteration=max_iter,
+    print_iter=print_iter,
+    epsilon=epsilon,
+    verbose=true,
+)
+
+println("\nFinal objective value: $(f2)")
+println("Final gradient norm: $(norm(grad!(similar(x0), x2)))")
+
+# Compare the two methods
+println("\nComparison")
+println("==========")
+println("Method 1 final objective: $(f1)")
+println("Method 2 final objective: $(f2)")
+println("Objective difference: $(abs(f1 - f2))")
+println("Solution difference norm: $(norm(x1 - x2))")
diff --git a/examples/ada_gradient_conditioned.jl b/examples/ada_gradient_conditioned.jl
@@ -0,0 +1,95 @@
+using FrankWolfe
+using LinearAlgebra
+using Random
+
+max_iter = Int(1e5)
+print_iter = max_iter // 20
+epsilon = 1e-10
+
+n = 1000
+s = 42
+Random.seed!(s)
+
+# Create test problem with controlled condition number
+const condition_number = 10000.0  # Much better than random conditioning
+const matrix = begin
+    # Create orthogonal matrix
+    Q = qr(randn(n, n)).Q
+    # Create diagonal matrix with controlled condition number
+    λ_max = 1.0
+    λ_min = λ_max / condition_number
+    Λ = Diagonal(range(λ_min, λ_max, length=n))
+    # Final matrix with controlled conditioning
+    Q * sqrt(Λ)
+end
+const hessian = transpose(matrix) * matrix
+const linear = rand(n)
+
+f(x) = dot(linear, x) + 0.5 * transpose(x) * hessian * x
+
+function grad!(storage, x)
+    return storage .= linear + hessian * x
+end
+
+const L = eigmax(hessian)
+
+# Compute optimal solution using direct solve for testing
+const x_opt = -hessian \ linear
+const f_opt = f(x_opt)
+
+println("\nTesting adaptive gradient descent algorithms...\n")
+println("Test instance statistics:")
+println("------------------------")
+println("Dimension n: $n")
+println("Lipschitz constant L: $L")
+println("Optimal objective value f*: $f_opt")
+println("Optimal solution norm: $(norm(x_opt))")
+println("Problem condition number: $(eigmax(hessian)/eigmin(hessian))")
+println()
+
+########## SOLVING
+
+# Initial point
+x0 = 10 * rand(n)
+
+println("Testing Adaptive Gradient Descent (variant 1)")
+println("============================================")
+
+x1, f1, hist1 = FrankWolfe.adaptive_gradient_descent(
+    f,
+    grad!,
+    x0;
+    step0=0.1,
+    max_iteration=max_iter,
+    print_iter=print_iter,
+    epsilon=epsilon,
+    verbose=true,
+)
+
+println("\nFinal objective value: $(f1)")
+println("Final gradient norm: $(norm(grad!(similar(x0), x1)))")
+
+println("\nTesting Adaptive Gradient Descent (variant 2)")
+println("============================================")
+
+x2, f2, hist2 = FrankWolfe.adaptive_gradient_descent2(
+    f,
+    grad!,
+    x0;
+    step0=0.1,
+    max_iteration=max_iter,
+    print_iter=print_iter,
+    epsilon=epsilon,
+    verbose=true,
+)
+
+println("\nFinal objective value: $(f2)")
+println("Final gradient norm: $(norm(grad!(similar(x0), x2)))")
+
+# Compare the two methods
+println("\nComparison")
+println("==========")
+println("Method 1 final objective: $(f1)")
+println("Method 2 final objective: $(f2)")
+println("Objective difference: $(abs(f1 - f2))")
+println("Solution difference norm: $(norm(x1 - x2))")
diff --git a/ext/FrankWolfeProxExt.jl b/ext/FrankWolfeProxExt.jl
@@ -0,0 +1,6 @@
+module FrankWolfeProxExt
+
+using FrankWolfe
+using ProximalOperators
+
+end
diff --git a/src/FrankWolfe.jl b/src/FrankWolfe.jl
@@ -9,6 +9,7 @@ using SparseArrays: spzeros, SparseVector
 import SparseArrays
 import Random
 using Setfield: @set
+import ProximalCore
 
 import MathOptInterface
 const MOI = MathOptInterface
@@ -49,6 +50,8 @@ include("dicg.jl")
 include("tracking.jl")
 include("callback.jl")
 
+include("gradient_descent.jl")
+
 # collecting most common data types etc and precompile
 # min version req set to 1.5 to prevent stalling of julia 1
 @static if VERSION >= v"1.5"

diff --git a/src/afw.jl b/src/afw.jl
@@ -204,7 +204,7 @@ function away_frank_wolfe(
 
         # compute current iterate from active set
         x = get_active_set_iterate(active_set)
-        if isnothing(momentum)
+        if momentum === nothing
             grad!(gradient, x)
         else
             grad!(gtemp, x)

diff --git a/src/blended_cg.jl b/src/blended_cg.jl
@@ -493,7 +493,7 @@ function minimize_over_convex_hull!(
             tolerance,
         )
         #Early exit if we have detected that the strong-Wolfe gap is below the desired tolerance while building the reduced problem.
-        if isnothing(M)
+        if M === nothing
             return 0
         end
         T = eltype(M)