Adjusted IHB, added more FW variant options

Dominik Kuzinowicz · Dominik Kuzinowicz · commit a410cdaff4e9 · 2024-06-26T11:43:45.000+02:00
diff --git a/src/oracle_avi.jl b/src/oracle_avi.jl
@@ -82,7 +82,7 @@ function fit_oavi(
             term_evaluated_squared = norm(term_evaluated, 2)^2
             
             # built-in Frank-Wolfe oracle
-            if oracle in ["CG", "BCG", "BPCG"]
+            if oracle in ["CG", "Away", "AFW", "PCG", "Lazy", "LCG", "BCG", "BPCG"]
                 coefficient_vector, loss = conditional_gradients(   oracle, 
                                                                     data, term_evaluated, 
                                                                     lambda, 
diff --git a/src/oracle_constructors.jl b/src/oracle_constructors.jl
@@ -42,38 +42,47 @@ function conditional_gradients(
     
     # determine oracle
     if oracle_type == "CG"
-        oracle = frank_wolfe
+        oracle = FrankWolfe.frank_wolfe
+    elseif oracle_type == "Away" || oracle_type == "AFW"
+        oracle = FrankWolfe.away_frank_wolfe
+    elseif oracle_type == "PCG"
+        oracle = FrankWolfe.pairwise_frank_wolfe
+    elseif oracle_type == "Lazy" || oracle_type == "LCG"
+        oracle = FrankWolfe.lazified_conditional_gradient
     elseif oracle_type == "BCG"
-        oracle = blended_conditional_gradient
+        oracle = FrankWolfe.blended_conditional_gradient
     elseif oracle_type == "BPCG"
         oracle = FrankWolfe.blended_pairwise_conditional_gradient
     end
     
     # create L1 ball as feasible region
     region = FrankWolfe.LpNormLMO{1}(tau-1)
-    
-    # compute starting point
+
+
+    # call oracles
     if inverse_hessian_boost in ["weak", "full"]
+        display("Inverse Hessian Boosting (IHB) is active. Vanilla Frank-Wolfe is used for the IHB run.")
+
+        # compute starting point for IHB
         x0 = l1_projection(solution; radius=tau-1)
         x0 = reshape(x0, length(x0))
-    else
-        x0 = compute_extreme_point(region, zeros(Float64, n))
-        x0 = Vector(x0)
-    end
-    
-    # run oracle to find coefficient vector
-    if inverse_hessian_boost == "weak"
-        coefficient_vector, _ = oracle(f, grad!, region, x0; epsilon=epsilon, max_iteration=max_iters)
+
+        # IHB oracle call
+        coefficient_vector, _ = FrankWolfe.frank_wolfe(f, grad!, region, x0; epsilon=epsilon, max_iteration=max_iters)
         if typeof(coefficient_vector) <: FrankWolfe.ScaledHotVector
             coefficient_vector = convert(Vector, coefficient_vector)
         end
         coefficient_vector = vcat(coefficient_vector, [1])
     
         loss = 1/m * norm(data_with_labels * coefficient_vector, 2)^2
-        
-        if loss <= psi
+
+        # attempt to find sparse solution if IHB solution found
+        if inverse_hessian_boost == "weak" && loss <= psi 
+            display("IHB solution found. Attempting to find sparse solution.")
+
             x0 = compute_extreme_point(region, zeros(Float64, n))
             x0 = Vector(x0)
+
             tmp_coefficient_vector, _ = oracle(f, grad!, region, x0; epsilon=epsilon, max_iteration=max_iters)
             tmp_coefficient_vector = vcat(tmp_coefficient_vector, [1])
             
@@ -83,14 +92,18 @@ function conditional_gradients(
                 loss = loss2
                 coefficient_vector = tmp_coefficient_vector
             end
-                   
         end
-    else    
+    else
+        # compute starting vertex
+        x0 = compute_extreme_point(region, zeros(Float64, n))
+        x0 = Vector(x0)
+
+        # oracle call
         coefficient_vector, _ = oracle(f, grad!, region, x0; epsilon=epsilon, max_iteration=max_iters)
         coefficient_vector = vcat(coefficient_vector, [1])
        
         loss = 1/m * norm(data_with_labels * coefficient_vector, 2)^2
-    end        
+    end      
     return coefficient_vector, loss
 end
 
diff --git a/test/test_auxiliary_functions.jl b/test/test_auxiliary_functions.jl
@@ -1,6 +1,8 @@
 using Test
+using LinearAlgebra
+using ApproximateVanishingIdeals
+const AVI = ApproximateVanishingIdeals
 
-include("../src/auxiliary_functions.jl")
 
 matrix = Matrix([[1 2 3 4 3]; 
                  [0 1 2 3 2]; 
@@ -16,26 +18,26 @@ matrix_unique = Matrix([[1 3 2 4];
 
 
 @testset "Test suite for deg_lex_sort" begin
-  matrix_sorted_1, matrix_sorted_2, _ = deg_lex_sort(matrix, 1. * matrix)
+  matrix_sorted_1, matrix_sorted_2, _ = AVI.deg_lex_sort(matrix, 1. * matrix)
   @test matrix_sorted_1 == matrix_sorted
   @test matrix_sorted_2 == matrix_sorted
 end
 
 
 @testset "Test suite for get_unique_columns" begin 
-  mat1_unique, mat2_unique, unique_inds = get_unique_columns(matrix, matrix)
+  mat1_unique, mat2_unique, unique_inds = AVI.get_unique_columns(matrix, matrix)
   @test mat1_unique == matrix_unique
   @test mat2_unique == matrix_unique
   @test unique_inds == [1, 2, 4, 5]
   
-  mat1_unique, mat2_unique, _ = get_unique_columns(matrix)
+  mat1_unique, mat2_unique, _ = AVI.get_unique_columns(matrix)
   @test mat1_unique == matrix_unique
   @test mat2_unique == zeros(Float64, 0, 0)
 end
 
 
 @testset "Test suite for compute_degree" begin
-  @test compute_degree(matrix) == [2 6 6 9 6]
+  @test AVI.compute_degree(matrix) == [2 6 6 9 6]
 end
 
 
@@ -45,9 +47,9 @@ matrix_non_zero = Matrix([[1 0 3 0 0 0];
                           [1 0 0 0 3 0]])
 
 @testset "Test suite for finding non-zero entries" begin
-  first_ids = find_first_non_zero_entries(matrix_non_zero)
+  first_ids = AVI.find_first_non_zero_entries(matrix_non_zero)
   @test first_ids == [1, 2, 1, 3, 4, 1]
 
-  last_ids = find_last_non_zero_entries(matrix_non_zero)
+  last_ids = AVI.find_last_non_zero_entries(matrix_non_zero)
   @test last_ids == [4, 3, 3, 3, 4, 4]
 end
diff --git a/test/test_auxiliary_functions_avi.jl b/test/test_auxiliary_functions_avi.jl
@@ -1,28 +1,24 @@
 using Test
 using FrankWolfe
+using ApproximateVanishingIdeals
+using LinearAlgebra
+const AVI = ApproximateVanishingIdeals
 
-include("../src/auxiliary_functions.jl")
-include("../src/terms_and_polynomials.jl")
-include("../src/oracle_constructors.jl") 
-include("../src/border_construction.jl")
-include("../src/objective_functions.jl")
-include("../src/auxiliary_functions_avi.jl")
-include("../src/oracle_avi.jl")
 
 @testset "Test suite for update_coefficient_vectors" begin
   G_coefficient_vectors = reshape([1, 2, 0], 3, 1)
     
   vec1 = reshape([1, 2], 2, 1)
     
-  G_coefficient_vectors = update_coefficient_vectors(G_coefficient_vectors, vec1)
+  G_coefficient_vectors = AVI.update_coefficient_vectors(G_coefficient_vectors, vec1)
   G_coefficient_vectors = vcat(G_coefficient_vectors, zeros(1, size(G_coefficient_vectors, 2)))
-  G_coefficient_vectors = update_coefficient_vectors(G_coefficient_vectors, vec1)
+  G_coefficient_vectors = AVI.update_coefficient_vectors(G_coefficient_vectors, vec1)
   G_coefficient_vectors = vcat(G_coefficient_vectors, zeros(1, size(G_coefficient_vectors, 2)))
   G_coefficient_vectors = vcat(G_coefficient_vectors, zeros(1, size(G_coefficient_vectors, 2)))
     
   vec1 = reshape([1, 2, 3], 3, 1)
     
-  G_coefficient_vectors = update_coefficient_vectors(G_coefficient_vectors, vec1)
+  G_coefficient_vectors = AVI.update_coefficient_vectors(G_coefficient_vectors, vec1)
     
   @test G_coefficient_vectors == Matrix([[1. 1. 1. 1.];
                                          [2. 0. 0. 0.];
@@ -37,14 +33,14 @@ end
     vec1 = rand(1:10, 20)
     radius_1 = 2.5
     radius_2 = 3.0
-    @test norm(l1_projection(vec1), 1) ≈ 1
-    @test norm(l1_projection(vec1; radius=radius_1), 1) ≈ radius_1
-    @test norm(l1_projection(vec1; radius=radius_2), 1) ≈ radius_2
+    @test norm(AVI.l1_projection(vec1), 1) ≈ 1
+    @test norm(AVI.l1_projection(vec1; radius=radius_1), 1) ≈ radius_1
+    @test norm(AVI.l1_projection(vec1; radius=radius_2), 1) ≈ radius_2
 
-    vec2 = l1_projection(vec1)
-    @test vec2 ≈ l1_projection(vec2)
-    @test norm(l1_projection(vec2), 1) ≈ 1
-    @test norm(vec2) ≈ norm(l1_projection(vec2))
+    vec2 = AVI.l1_projection(vec1)
+    @test vec2 ≈ AVI.l1_projection(vec2)
+    @test norm(AVI.l1_projection(vec2), 1) ≈ 1
+    @test norm(vec2) ≈ norm(AVI.l1_projection(vec2))
 end;
 
 
@@ -60,7 +56,7 @@ end;
 
   A_a = transpose(A) * a 
 
-  B, B_2, B_2_1 = streaming_matrix_updates(A, A_sq, A_a, a, a_sq; A_squared_inv=A_sq_inv)
+  B, B_2, B_2_1 = AVI.streaming_matrix_updates(A, A_sq, A_a, a, a_sq; A_squared_inv=A_sq_inv)
 
   C = hcat(A, a)
 
diff --git a/test/test_border_construction.jl b/test/test_border_construction.jl
@@ -1,7 +1,6 @@
-include("../src/border_construction.jl")
-include("../src/auxiliary_functions.jl")
-
 using LinearAlgebra
+using ApproximateVanishingIdeals
+const AVI = ApproximateVanishingIdeals
 using Test
 
 
@@ -14,7 +13,7 @@ using Test
                      [0 1];
                      [0 1];])
     
-  matrix_2_purged, matrix_2_purged_2, _ = purge(matrix_2, 1. * matrix_2, matrix_1)
+  matrix_2_purged, matrix_2_purged_2, _ = AVI.purge(matrix_2, 1. * matrix_2, matrix_1)
     
   @test size(matrix_2_purged, 2) == 0
   @test size(matrix_2_purged_2, 2) == 0
@@ -27,7 +26,7 @@ using Test
                      [0 2];
                      [2 1];])
     
-  matrix_2_purged, matrix_2_purged_2, _ = purge(matrix_2, 1. * matrix_2, matrix_1)
+  matrix_2_purged, matrix_2_purged_2, _ = AVI.purge(matrix_2, 1. * matrix_2, matrix_1)
     
   @test matrix_2_purged == matrix_2_purged_2
   @test matrix_2_purged == Matrix([[1 2 3];
@@ -52,7 +51,7 @@ end
   # duplicate indices: 3, 6, 8, 12; purged indices: 7, 8, 11, 12, 13, 15
   unique_non_purging_indices = [1, 2, 4, 5, 9, 10, 14]
 
-  terms_raw, _, non_purging_indices, _ = construct_border(terms, 1. * terms, zeros(Float64, 0, 0), degree_1_terms, 1. * degree_1_terms, purging_terms)
+  terms_raw, _, non_purging_indices, _ = AVI.construct_border(terms, 1. * terms, zeros(Float64, 0, 0), degree_1_terms, 1. * degree_1_terms, purging_terms)
 
   @test terms_raw[:, non_purging_indices] == raw_border[:, unique_non_purging_indices]
 end
diff --git a/test/test_objective_functions.jl b/test/test_objective_functions.jl
@@ -1,31 +1,32 @@
 using Test
 using LinearAlgebra
+using ApproximateVanishingIdeals
+const AVI = ApproximateVanishingIdeals
 
-include("../src/objective_functions.jl")
 
-@testset "Test suite for 'evaluate_function' in L2Loss" begin
+@testset "Test suite for evaluate_function in L2Loss" begin
   for m in 1:5
     for n in 1:10
       A = rand(m, n)
       b = rand(m)
       x = rand(n)
       lambda = rand() * n
-      _, evaluate_function, _ = L2Loss(A, b, lambda, A' * A, A' * b, b' * b)
+      _, evaluate_function, _ = AVI.L2Loss(A, b, lambda, A' * A, A' * b, b' * b)
             
       @test 1/m * norm(A * x + b, 2)^2 + lambda * norm(x, 2)^2 / 2 ≈ evaluate_function(x)
     end
   end
 end;
 
 
-@testset "Test suite for 'evaluate_gradient!' in L2Loss" begin
+@testset "Test suite for evaluate_gradient! in L2Loss" begin
   for m in 1:5
     for n in 1:10
       A = rand(m, n)
       b = rand(m)
       x = rand(n)
       lambda = rand() * n
-      _, _, evaluate_gradient! = L2Loss(A, b, lambda, A' * A, A' * b, b' * b)
+      _, _, evaluate_gradient! = AVI.L2Loss(A, b, lambda, A' * A, A' * b, b' * b)
             
       gradient = 2/m * (A' * A * x + A' * b + m/2 * lambda * x)
       approx_vec = (gradient .≈ evaluate_gradient!(zeros(n), x))
diff --git a/test/test_oracle_avi.jl b/test/test_oracle_avi.jl
@@ -2,21 +2,16 @@ using Test
 using Random
 using LinearAlgebra
 using FrankWolfe
+using ApproximateVanishingIdeals
+const AVI = ApproximateVanishingIdeals
 
-include("../src/oracle_avi.jl")
-include("../src/auxiliary_functions.jl")
-include("../src/terms_and_polynomials.jl")
-include("../src/objective_functions.jl")
-include("../src/auxiliary_functions_avi.jl")
-include("../src/oracle_constructors.jl")
-include("../src/border_construction.jl")
 
 @testset "Test suite for fit_oavi" begin
-  for oracle in ["CG", "BCG", "BPCG"]
+  for oracle in ["CG", "Away", "PCG", "Lazy", "BCG", "BPCG"]
     m, n = rand(15:25), rand(4:10)
     X_train = rand(m, n)
     for ihb in ["false", "weak", "full"]
-      X_train_transformed, sets = fit_oavi(X_train; oracle=oracle, inverse_hessian_boost=ihb)
+      X_train_transformed, sets = AVI.fit_oavi(X_train; oracle=oracle, inverse_hessian_boost=ihb)
       loss_list = Vector{Float64}([])
       for col in 1:size(sets.G_evaluations, 2)
         cur_col = sets.G_evaluations[:, col]
@@ -33,7 +28,7 @@ end;
   for _ in 1:5
     m, n = rand(15:25), rand(4:10)
     X_train = rand(m, n)
-    X_train_transformed, sets = fit_oavi(X_train; oracle="ABM", psi=0.05)
+    X_train_transformed, sets = AVI.fit_oavi(X_train; oracle="ABM", psi=0.05)
     loss_list = Vector{Float64}([])
     for col in 1:size(sets.G_evaluations, 2)
       cur_col = sets.G_evaluations[:, col]
@@ -49,15 +44,15 @@ end;
   for oracle in ["CG", "BPCG", "ABM"]
     m, n = rand(15:25), rand(4:10)
     X_tr = rand(m, n)
-    X_tr_transformed, sets_tr = fit_oavi(X_tr; oracle=oracle)
-    X_te_transformed, sets_te = evaluate_oavi(sets_tr, X_tr)
+    X_tr_transformed, sets_tr = AVI.fit_oavi(X_tr; oracle=oracle)
+    X_te_transformed, sets_te = AVI.evaluate_oavi(sets_tr, X_tr)
 
     @test all(X_tr_transformed .- X_te_transformed .<= 1.0e-10)
 
     if oracle !== "ABM"
         X_train = rand(10, 3)
-        X_train_transformed, sets_train = fit_oavi(X_train; psi=0.01, lambda=0.1, oracle=oracle)
-        X_test_transformed, sets_test = evaluate_oavi(sets_train, X_train)
+        X_train_transformed, sets_train = AVI.fit_oavi(X_train; psi=0.01, lambda=0.1, oracle=oracle)
+        X_test_transformed, sets_test = AVI.evaluate_oavi(sets_train, X_train)
         @test all(X_train_transformed .- X_test_transformed .<= 1.0e-10)
     end
   end
diff --git a/test/test_print_polynomials.jl b/test/test_print_polynomials.jl
@@ -1,11 +1,13 @@
 using Test
+using LinearAlgebra
+using ApproximateVanishingIdeals
+const AVI = ApproximateVanishingIdeals
 
-include("../src/print_polynomials.jl")
 
 @testset "Test suite for print_polynomials" begin
     A = rand(10, 3)
 
-    sets = construct_SetsOandG(A)
+    sets = AVI.construct_SetsOandG(A)
 
     append!(sets.G_coefficient_vectors, [reshape(   [   -0.77, 
                                                         0.44, 
@@ -39,7 +41,7 @@ include("../src/print_polynomials.jl")
                 "x_{3}^{2} - x_{3} - 0.03x_{1} + 0.17"
             ]
 
-    constructed_polys = print_polynomials(sets; ret=true)
+    constructed_polys = AVI.print_polynomials(sets; ret=true)
 
     @test all(polys .== constructed_polys)
 end;
@@ -55,6 +57,6 @@ end;
 
     for i in 1:size(terms, 2)
         term = terms[:, i]
-        @test convert_term_to_latex(term) == converted_terms[i]
+        @test AVI.convert_term_to_latex(term) == converted_terms[i]
     end
 end;
diff --git a/test/test_terms_and_polynomials.jl b/test/test_terms_and_polynomials.jl
diff --git a/test/test_terms_and_polynomials_vca.jl b/test/test_terms_and_polynomials_vca.jl
diff --git a/test/test_vca.jl b/test/test_vca.jl