From 632608675288ba62c2f80829beb03c1d47b87403 Mon Sep 17 00:00:00 2001 From: Sh3xe Date: Tue, 17 Jun 2025 11:10:29 +0200 Subject: [PATCH 01/22] Updated wolfe line search --- fdaPDE/src/optimization/wolfe_line_search.h | 95 +++++++++++++++------ 1 file changed, 68 insertions(+), 27 deletions(-) diff --git a/fdaPDE/src/optimization/wolfe_line_search.h b/fdaPDE/src/optimization/wolfe_line_search.h index 3821d0cf..444b550c 100644 --- a/fdaPDE/src/optimization/wolfe_line_search.h +++ b/fdaPDE/src/optimization/wolfe_line_search.h @@ -1,3 +1,4 @@ + // This file is part of fdaPDE, a C++ library for physics-informed // spatial and functional data analysis. // @@ -14,8 +15,8 @@ // You should have received a copy of the GNU General Public License // along with this program. If not, see . -#ifndef __FDAPDE_WOLFE_LINE_SEARCH_H__ -#define __FDAPDE_WOLFE_LINE_SEARCH_H__ +#ifndef __FDAPDE_WOLFE_LINE_SEARCH_EXP_H__ +#define __FDAPDE_WOLFE_LINE_SEARCH_EXP_H__ #include "header_check.h" @@ -23,47 +24,87 @@ namespace fdapde { // implementation of the Wolfe line search method for step selection // check "Jorge Nocedal, Stephen J. Wright (2006), Numerical Optimization" -class WolfeLineSearch { - private: +class WolfeLineSearchExp { +private: + static constexpr int MAX_ITER = 10; double alpha_ = 1.0; double alpha_max_ = std::numeric_limits::infinity(), alpha_min_ = 0; double c1_ = 1e-4, c2_ = 0.9; - public: + +public: // constructors - WolfeLineSearch() = default; - WolfeLineSearch(double alpha, double c1, double c2) : alpha_(alpha), c1_(c1), c2_(c2) { } + WolfeLineSearchExp() = default; + WolfeLineSearchExp(double alpha, double c1, double c2) : alpha_(alpha), c1_(c1), c2_(c2) { } // bisection method for the weak Wolfe conditions template bool pre_update_step(Opt& opt, Obj& obj) { - // restore to initial value - double alpha = alpha_; - double alpha_max = alpha_max_, alpha_min = alpha_min_; + // Cached values + double grad_0 = opt.grad_old.dot(opt.update); + double val_0 = obj(opt.x_old); + auto grad = obj.gradient(); + + // Variables + bool zooming = false; double c1 = c1_, c2 = c2_; + double alpha_max = alpha_max_, alpha_min = alpha_min_; + double alpha_prev = 0; + double alpha_curr = alpha_; + double val_prev = val_0; + + for(int i = 0; i < MAX_ITER; ++i) { + double val_curr = obj(opt.x_old + alpha_curr * opt.update); + double grad_curr = grad(opt.x_old + alpha_curr * opt.update).dot(opt.update); + + if(!zooming) { + if(val_curr > val_0 + c1*grad_0*alpha_curr || (i > 0 && val_curr >= val_prev)) { + // found an upper bound, ready to "zoom" + alpha_min = alpha_prev; + alpha_max = alpha_curr; + zooming = true; + continue; + } + + if(std::abs(grad_curr) <= -c2*grad_0) { + // found our solution + opt.h = alpha_curr; + return false; + } + + if( grad_curr >= 0 ) { + // the function at alpha is increasing, we can zoom but with alpha_min and alpha_max reversed + alpha_min = alpha_curr; + alpha_max = alpha_prev; - // initialization - bool stop = false; - double m = opt.grad_old.dot(opt.update); - while (!stop) { - if (obj(opt.x_old) - obj(opt.x_old + alpha * opt.update) // Armijo–Goldstein condition - + c1 * alpha * m < 0) { - alpha_max = alpha; - alpha = (alpha_min + alpha_max) * 0.5; + zooming = true; + continue; + } } else { - double grad_new = obj.gradient()(opt.x_old + alpha * opt.update).dot(opt.update); - if (grad_new < c2 * m && std::abs(grad_new) > 1e-2) { // curvature condition - alpha_min = alpha; - alpha = (std::isinf(alpha_max)) ? 2 * alpha_min : (alpha_min + alpha_max) * 0.5; - } else { - stop = true; + if( val_curr > val_0 + c1*alpha_curr*grad_0 || val_curr > obj(opt.x_old + alpha_max * opt.update)) { + alpha_max = alpha_curr; + } + else { + if( std::abs(grad_curr) <= -c2*grad_0 ) { + opt.h = alpha_curr; + return false; + } + if( grad_curr * (alpha_max - alpha_min) >= 0) { + alpha_max = alpha_min; + } + alpha_min = alpha_curr; } } + + alpha_prev = alpha_curr; + val_prev = val_curr; + + // We havn't yet found a proper upper bound, so we try our change by multiplying alpha_max by two. + alpha_curr = std::isinf(alpha_max) ? 2*alpha_curr : (alpha_max + alpha_min)/2; } - opt.h = alpha; + return false; } }; } // namespace fdapde -#endif // __FDAPDE_WOLFE_LINE_SEARCH_H__ - +#endif // __FDAPDE_WOLFE_LINE_SEARCH_EXP_H__ \ No newline at end of file From 366f22ad67af30a14e0822273178787b5476191c Mon Sep 17 00:00:00 2001 From: Sh3xe Date: Tue, 17 Jun 2025 11:11:37 +0200 Subject: [PATCH 02/22] fixed header guard --- fdaPDE/src/optimization/wolfe_line_search.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fdaPDE/src/optimization/wolfe_line_search.h b/fdaPDE/src/optimization/wolfe_line_search.h index 444b550c..10d5882d 100644 --- a/fdaPDE/src/optimization/wolfe_line_search.h +++ b/fdaPDE/src/optimization/wolfe_line_search.h @@ -15,8 +15,8 @@ // You should have received a copy of the GNU General Public License // along with this program. If not, see . -#ifndef __FDAPDE_WOLFE_LINE_SEARCH_EXP_H__ -#define __FDAPDE_WOLFE_LINE_SEARCH_EXP_H__ +#ifndef __FDAPDE_WOLFE_LINE_SEARCH_H__ +#define __FDAPDE_WOLFE_LINE_SEARCH_H__ #include "header_check.h" @@ -107,4 +107,4 @@ class WolfeLineSearchExp { } // namespace fdapde -#endif // __FDAPDE_WOLFE_LINE_SEARCH_EXP_H__ \ No newline at end of file +#endif // __FDAPDE_WOLFE_LINE_SEARCH_H__ \ No newline at end of file From 7937e62fb8fa97133dff03cc13c2f01e867310ed Mon Sep 17 00:00:00 2001 From: Sh3xe Date: Tue, 17 Jun 2025 11:15:15 +0200 Subject: [PATCH 03/22] fixed class name --- fdaPDE/src/optimization/wolfe_line_search.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fdaPDE/src/optimization/wolfe_line_search.h b/fdaPDE/src/optimization/wolfe_line_search.h index 10d5882d..e58a6eb9 100644 --- a/fdaPDE/src/optimization/wolfe_line_search.h +++ b/fdaPDE/src/optimization/wolfe_line_search.h @@ -24,7 +24,7 @@ namespace fdapde { // implementation of the Wolfe line search method for step selection // check "Jorge Nocedal, Stephen J. Wright (2006), Numerical Optimization" -class WolfeLineSearchExp { +class WolfeLineSearch { private: static constexpr int MAX_ITER = 10; double alpha_ = 1.0; @@ -33,8 +33,8 @@ class WolfeLineSearchExp { public: // constructors - WolfeLineSearchExp() = default; - WolfeLineSearchExp(double alpha, double c1, double c2) : alpha_(alpha), c1_(c1), c2_(c2) { } + WolfeLineSearch() = default; + WolfeLineSearch(double alpha, double c1, double c2) : alpha_(alpha), c1_(c1), c2_(c2) { } // bisection method for the weak Wolfe conditions template bool pre_update_step(Opt& opt, Obj& obj) { From fd8445f3b11d36f76e19a1eaef3e7fa001b35337 Mon Sep 17 00:00:00 2001 From: Sh3xe Date: Wed, 18 Jun 2025 16:41:05 +0200 Subject: [PATCH 04/22] initial genetic optimizer impl --- fdaPDE/optimization.h | 8 + .../binary_tournament_selection.h | 59 +++++++ fdaPDE/src/optimization/crossover_mutation.h | 40 +++++ fdaPDE/src/optimization/gaussian_mutation.h | 53 ++++++ fdaPDE/src/optimization/genetic_optim.h | 165 ++++++++++++++++++ fdaPDE/src/optimization/rank_selection.h | 40 +++++ 6 files changed, 365 insertions(+) create mode 100644 fdaPDE/src/optimization/binary_tournament_selection.h create mode 100644 fdaPDE/src/optimization/crossover_mutation.h create mode 100644 fdaPDE/src/optimization/gaussian_mutation.h create mode 100644 fdaPDE/src/optimization/genetic_optim.h create mode 100644 fdaPDE/src/optimization/rank_selection.h diff --git a/fdaPDE/optimization.h b/fdaPDE/optimization.h index 53405917..bd48a3bd 100644 --- a/fdaPDE/optimization.h +++ b/fdaPDE/optimization.h @@ -28,12 +28,20 @@ #include "src/optimization/callbacks.h" #include "src/optimization/backtracking_line_search.h" #include "src/optimization/wolfe_line_search.h" + +// Genetic algorithm "plug-ins" +#include "src/optimization/binary_tournament_selection.h" +#include "src/optimization/crossover_mutation.h" +#include "src/optimization/gaussian_mutation.h" +#include "src/optimization/rank_selection.h" + // algorithms #include "src/optimization/grid.h" #include "src/optimization/newton.h" #include "src/optimization/gradient_descent.h" #include "src/optimization/bfgs.h" #include "src/optimization/lbfgs.h" +#include "src/optimization/genetic_optim.h" // clang-format on diff --git a/fdaPDE/src/optimization/binary_tournament_selection.h b/fdaPDE/src/optimization/binary_tournament_selection.h new file mode 100644 index 00000000..56e071ce --- /dev/null +++ b/fdaPDE/src/optimization/binary_tournament_selection.h @@ -0,0 +1,59 @@ + +// This file is part of fdaPDE, a C++ library for physics-informed +// spatial and functional data analysis. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +#ifndef __FDAPDE_BINARY_TOURNAMENT_SELECTION_H__ +#define __FDAPDE_BINARY_TOURNAMENT_SELECTION_H__ + +#include "header_check.h" + +namespace fdapde { + +class BinaryTournamentSelection { +private: + std::uniform_int_distribution distribution_; + +public: + // constructors + BinaryTournamentSelection(int population_size = 10): + distribution_(0,population_size-1) + {} + + template bool pre_update_step(Opt& opt, Obj& obj) { + assert(distribution_.max() == opt.population.size() - 1); + + // Perform binary tournament selection + for(int i = 0; i < opt.population.size(); i += 2) { + int id_first = distribution_(opt.rng); + int id_second = distribution_(opt.rng); + if(id_first == id_second) + continue; + + double fit_first = opt.population_fitness[id_first]; + double fit_second = opt.population_fitness[id_second]; + if(fit_first <= fit_second) { + opt.population[id_second] = opt.population[id_first]; + } else { + opt.population[id_first] = opt.population[id_second]; + } + } + return false; + } +}; + +} // namespace fdapde + +#endif // __FDAPDE_BINARY_TOURNAMENT_SELECTION_H__ \ No newline at end of file diff --git a/fdaPDE/src/optimization/crossover_mutation.h b/fdaPDE/src/optimization/crossover_mutation.h new file mode 100644 index 00000000..1e611151 --- /dev/null +++ b/fdaPDE/src/optimization/crossover_mutation.h @@ -0,0 +1,40 @@ + +// This file is part of fdaPDE, a C++ library for physics-informed +// spatial and functional data analysis. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +#ifndef __FDAPDE_CROSSOVER_MUTATION_H__ +#define __FDAPDE_CROSSOVER_MUTATION_H__ + +#include "header_check.h" + +namespace fdapde { + +class CrossoverMutation { +private: + +public: + // constructors + CrossoverMutation() = default; + + template bool pre_update_step(Opt& opt, Obj& obj) { + + return false; + } +}; + +} // namespace fdapde + +#endif // __FDAPDE_CROSSOVER_MUTATION_H__ \ No newline at end of file diff --git a/fdaPDE/src/optimization/gaussian_mutation.h b/fdaPDE/src/optimization/gaussian_mutation.h new file mode 100644 index 00000000..3c6b7644 --- /dev/null +++ b/fdaPDE/src/optimization/gaussian_mutation.h @@ -0,0 +1,53 @@ + +// This file is part of fdaPDE, a C++ library for physics-informed +// spatial and functional data analysis. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +#ifndef __FDAPDE_GAUSSIAN_MUTATION_H__ +#define __FDAPDE_GAUSSIAN_MUTATION_H__ + +#include "header_check.h" + +namespace fdapde { + +class GaussianMutation { +private: + double variance_ = 2.5; + double multiplier_ = 0.95; + std::normal_distribution normal_dist_; + +public: + // constructors + GaussianMutation(double initial_variance, double multiplier): + variance_(initial_variance), + multiplier_(multiplier), + normal_dist_(0.0, 1.0) + {} + + template bool pre_update_step(Opt& opt, Obj& obj) { + // Perform binary tournament selection + for(auto &vec: opt.population) { + for(int i = 0; i < vec.rows(); ++i) + vec[i] += normal_dist_(opt.rng) * variance_; + } + + variance_ *= multiplier_; + return false; + } +}; + +} // namespace fdapde + +#endif // __FDAPDE_GAUSSIAN_MUTATION_H__ \ No newline at end of file diff --git a/fdaPDE/src/optimization/genetic_optim.h b/fdaPDE/src/optimization/genetic_optim.h new file mode 100644 index 00000000..cbb8baed --- /dev/null +++ b/fdaPDE/src/optimization/genetic_optim.h @@ -0,0 +1,165 @@ +// This file is part of fdaPDE, a C++ library for physics-informed +// spatial and functional data analysis. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +#ifndef __FDAPDE_GENETIC_OPTIM_H__ +#define __FDAPDE_GENETIC_OPTIM_H__ + +#include "header_check.h" + +namespace fdapde { + +template class GeneticOptim { +private: + using vector_t = std::conditional_t, Eigen::Matrix>; + using matrix_t = std::conditional_t, Eigen::Matrix>; + + std::tuple callbacks_; + vector_t optimum_; + double value_; // objective value at optimum + int max_iter_; // maximum number of iterations before forced stop + int n_iter_ = 0; // current iteration number + double tol_; // tolerance on error before forced stop + double variance_; // update step + int population_size_; // The size of any given generation + unsigned seed_; + +public: + std::vector population; + std::vector population_fitness; + std::mt19937 rng; + +public: + // constructors + GeneticOptim() = default; + + // GeneticOptim(int max_iter, double tol, double variance, int population_size, unsigned seed = 0) + // requires(sizeof...(Args) != 0): + // seed_(seed), + // max_iter_(max_iter), + // population_size_(population_size), + // population(population_size, vector_t{}), + // population_fitness(population_size, std::numeric_limits::max()), + // tol_(tol), variance_(variance), + // rng(seed) { + // assert(population_size_ >= 0); + // } + + GeneticOptim(int max_iter, double tol, double variance, int population_size, unsigned seed, Args&&... callbacks) + requires(sizeof...(Args) != 0): + callbacks_(std::make_tuple(std::forward(callbacks)...)), + max_iter_(max_iter), + population_size_(population_size), + population(population_size, vector_t{}), + population_fitness(population_size, std::numeric_limits::max()), + tol_(tol), variance_(variance), + rng(seed) { + assert(population_size_ >= 0); + } + + // copy semantic + GeneticOptim(const GeneticOptim& other) : + callbacks_(other.callbacks_), + population_size_(other.population_size_), + population_fitness(other.population_fitness), + max_iter_(other.max_iter_), + seed_(other.seed_), + tol_(other.tol_), + population(other.population), + variance_(other.variance_){ + } + + GeneticOptim& operator=(const GeneticOptim& other) { + max_iter_ = other.max_iter_; + tol_ = other.tol_; + population_fitness = other.population_fitness; + variance_ = other.variance_; + population_size_ = other.population_size_; + callbacks_ = other.callbacks_; + seed_ = other.seed_; + population = other.population; + return *this; + } + + template + requires(sizeof...(Functor) < 2) && ((requires(Functor f, double value) { f(value); }) && ...) + vector_t optimize(ObjectiveT&& objective, const vector_t& x0, Functor&&... func) { + fdapde_static_assert( + std::is_same().operator()(vector_t())) FDAPDE_COMMA double>::value, + INVALID_CALL_TO_OPTIMIZE__OBJECTIVE_FUNCTOR_NOT_ACCEPTING_VECTORTYPE); + + // whether or not the callbacks's stoping criteria are met + bool stop = false; + value_ = std::numeric_limits::max(); + n_iter_ = 0; + + // Initialize the state + for(int i = 0; i < population_size_; ++i) { + population[i] = x0; + } + + // In the case of this algorithm, post_update... is the mutation process + // so we apply it before the main loop + stop |= execute_post_update_step(*this, objective, callbacks_); + + while (n_iter_ < max_iter_ && !stop) { + // Selection + for(int i = 0; i < population_size_; ++i) + population_fitness[i] = objective(population[i]); + + stop |= execute_pre_update_step(*this, objective, callbacks_); + stop |= execute_post_update_step(*this, objective, callbacks_); + + // Update the current variance + int current_best = 0; + for(int i = 1; i < population_size_; ++i) + if(population_fitness[i] < population_fitness[current_best]) + current_best = i; + + stop |= execute_stopping_criterion(*this, objective); + + ++n_iter_; + value_ = population_fitness[current_best]; + optimum_ = population[current_best]; + } + + return optimum_; + } + + /** + * @brief Returns the argmin of the last call to [optimize] + * + * @return vector_t + */ + vector_t optimum() const { return optimum_; } + + /** + * @brief Returns the minimum of the last call to [optimize] + * + * @return double + */ + double value() const { return value_; } + + /** + * @brief Returns the number of iterations performed by the method on the last call to [optimize] + * + * @return int + */ + int n_iter() const { return n_iter_; } +}; + +} // namespace fdapde + +#endif // __FDAPDE_GENETIC_OPTIM_H__ diff --git a/fdaPDE/src/optimization/rank_selection.h b/fdaPDE/src/optimization/rank_selection.h new file mode 100644 index 00000000..fcf88cf2 --- /dev/null +++ b/fdaPDE/src/optimization/rank_selection.h @@ -0,0 +1,40 @@ + +// This file is part of fdaPDE, a C++ library for physics-informed +// spatial and functional data analysis. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +#ifndef __FDAPDE_RANK_SELECTION_H__ +#define __FDAPDE_RANK_SELECTION_H__ + +#include "header_check.h" + +namespace fdapde { + +class RankSelection { +private: + +public: + // constructors + RankSelection() = default; + + template bool pre_update_step(Opt& opt, Obj& obj) { + + return false; + } +}; + +} // namespace fdapde + +#endif // __FDAPDE_RANK_SELECTION_H__ \ No newline at end of file From 89b16d438d0a767145846008599e1b2a2b281530 Mon Sep 17 00:00:00 2001 From: Sh3xe Date: Wed, 18 Jun 2025 20:20:09 +0200 Subject: [PATCH 05/22] Fixed GeneticOptim class bug where state would not reset properly from one optim to another --- .../binary_tournament_selection.h | 7 ++- fdaPDE/src/optimization/callbacks.h | 12 +++++ fdaPDE/src/optimization/gaussian_mutation.h | 8 ++- fdaPDE/src/optimization/genetic_optim.h | 53 ++++++++++++++----- fdaPDE/src/optimization/rank_selection.h | 24 ++++++++- 5 files changed, 86 insertions(+), 18 deletions(-) diff --git a/fdaPDE/src/optimization/binary_tournament_selection.h b/fdaPDE/src/optimization/binary_tournament_selection.h index 56e071ce..dd1f369e 100644 --- a/fdaPDE/src/optimization/binary_tournament_selection.h +++ b/fdaPDE/src/optimization/binary_tournament_selection.h @@ -32,11 +32,16 @@ class BinaryTournamentSelection { distribution_(0,population_size-1) {} + template void reset_step(Opt& opt) { + assert(opt.population.size() >= 2); + distribution_ = std::uniform_int_distribution{0,static_cast(opt.population.size())-1}; + } + template bool pre_update_step(Opt& opt, Obj& obj) { assert(distribution_.max() == opt.population.size() - 1); // Perform binary tournament selection - for(int i = 0; i < opt.population.size(); i += 2) { + for(int i = 0; i < opt.population.size(); i += 1) { int id_first = distribution_(opt.rng); int id_second = distribution_(opt.rng); if(id_first == id_second) diff --git a/fdaPDE/src/optimization/callbacks.h b/fdaPDE/src/optimization/callbacks.h index 9f67f85f..280a1710 100644 --- a/fdaPDE/src/optimization/callbacks.h +++ b/fdaPDE/src/optimization/callbacks.h @@ -59,6 +59,18 @@ template bool execute_stopping_criterion(Opt& optim return b; } +template +void execute_reset_step(Opt& optimizer, std::tuple& callbacks) { + auto exec_callback = [&](auto&& callback) { + if constexpr (requires(std::decay_t c, Opt opt) { + { c.reset_step(opt) } -> std::same_as; + }) { + callback.reset_step(optimizer); + } + }; + std::apply([&](auto&&... callback) { (exec_callback(callback), ...); }, callbacks); +} + } // namespace fdapde #endif // __FDAPDE_OPTIMIZATION_CALLBACKS_H__ diff --git a/fdaPDE/src/optimization/gaussian_mutation.h b/fdaPDE/src/optimization/gaussian_mutation.h index 3c6b7644..9c215b90 100644 --- a/fdaPDE/src/optimization/gaussian_mutation.h +++ b/fdaPDE/src/optimization/gaussian_mutation.h @@ -24,6 +24,7 @@ namespace fdapde { class GaussianMutation { private: + double initial_variance_ = 2.5; double variance_ = 2.5; double multiplier_ = 0.95; std::normal_distribution normal_dist_; @@ -31,12 +32,17 @@ class GaussianMutation { public: // constructors GaussianMutation(double initial_variance, double multiplier): + initial_variance_(initial_variance), variance_(initial_variance), multiplier_(multiplier), normal_dist_(0.0, 1.0) {} - template bool pre_update_step(Opt& opt, Obj& obj) { + template void reset_step(Opt& opt) { + variance_ = initial_variance_; + } + + template bool post_update_step(Opt& opt, Obj& obj) { // Perform binary tournament selection for(auto &vec: opt.population) { for(int i = 0; i < vec.rows(); ++i) diff --git a/fdaPDE/src/optimization/genetic_optim.h b/fdaPDE/src/optimization/genetic_optim.h index cbb8baed..e5bb15d5 100644 --- a/fdaPDE/src/optimization/genetic_optim.h +++ b/fdaPDE/src/optimization/genetic_optim.h @@ -28,13 +28,13 @@ template class GeneticOptim { std::tuple callbacks_; vector_t optimum_; - double value_; // objective value at optimum - int max_iter_; // maximum number of iterations before forced stop - int n_iter_ = 0; // current iteration number - double tol_; // tolerance on error before forced stop - double variance_; // update step - int population_size_; // The size of any given generation - unsigned seed_; + double value_; // objective value at optimum + int max_iter_; // maximum number of iterations before forced stop + int n_iter_ = 0; // current iteration number + double tol_; // tolerance on error before forced stop + double variance_; // update step + int population_size_; // The size of any given generation + unsigned seed_; // Seed for the RNG public: std::vector population; @@ -102,6 +102,7 @@ template class GeneticOptim { // whether or not the callbacks's stoping criteria are met bool stop = false; + double fitness_mean = 0.0; value_ = std::numeric_limits::max(); n_iter_ = 0; @@ -110,26 +111,50 @@ template class GeneticOptim { population[i] = x0; } + // Reset the state of all callbacks and sync their state with the + // parameters in GeneticOptim + execute_reset_step(*this, callbacks_); + // In the case of this algorithm, post_update... is the mutation process // so we apply it before the main loop stop |= execute_post_update_step(*this, objective, callbacks_); while (n_iter_ < max_iter_ && !stop) { - // Selection - for(int i = 0; i < population_size_; ++i) + // Compute the fitness for selection + for(int i = 0; i < population_size_; ++i) { population_fitness[i] = objective(population[i]); + } + // pre_update_step should implement a selection algorithm stop |= execute_pre_update_step(*this, objective, callbacks_); + + // post_update_step should implement a mutation algorithm stop |= execute_post_update_step(*this, objective, callbacks_); - // Update the current variance + // Compute the mean of the population fitness + fitness_mean = 0.0; + for(int i = 0; i < population_size_; ++i) { + fitness_mean += population_fitness[i]; + } + fitness_mean /= static_cast(population_size_); + + // Compute the variance and max of the population fitness int current_best = 0; - for(int i = 1; i < population_size_; ++i) - if(population_fitness[i] < population_fitness[current_best]) - current_best = i; - + double variance = 0.0; + for(int i = 1; i < population_size_; ++i) { + double x = population_fitness[i] - fitness_mean; + variance += x*x; + if(population_fitness[i] < population_fitness[current_best]) + current_best = i; + } + variance /= static_cast(population_size_-1); + // std::cout << "Std dev at " << n_iter_ << " : " << std::sqrt(variance) << std::endl; + + // Stoping condition + stop |= (std::sqrt(variance) <= tol_); stop |= execute_stopping_criterion(*this, objective); + // Update ++n_iter_; value_ = population_fitness[current_best]; optimum_ = population[current_best]; diff --git a/fdaPDE/src/optimization/rank_selection.h b/fdaPDE/src/optimization/rank_selection.h index fcf88cf2..d440598a 100644 --- a/fdaPDE/src/optimization/rank_selection.h +++ b/fdaPDE/src/optimization/rank_selection.h @@ -24,13 +24,33 @@ namespace fdapde { class RankSelection { private: + std::vector population_order_; public: // constructors - RankSelection() = default; + RankSelection(int population_size = 10) { + population_order_.reserve(population_size); + for(int i = 0; i < population_size; ++i) + population_order_.push_back(i); + } + + template void reset_step(Opt& opt) { + population_order_.clear(); + population_order_.reserve(opt.population.size()); + for(int i = 0; i < opt.population.size(); ++i) + population_order_.push_back(i); + } template bool pre_update_step(Opt& opt, Obj& obj) { - + std::sort( + population_order_.begin(), + population_order_.end(), + [&](int a, int b) { + return opt.population_fitness[a] > opt.population_fitness[b]; + } + ); + + return false; } }; From d0780a46751e1adba19dfca7752ad9b40303a601 Mon Sep 17 00:00:00 2001 From: Sh3xe Date: Thu, 19 Jun 2025 15:35:05 +0200 Subject: [PATCH 06/22] Added rank selection --- fdaPDE/src/optimization/rank_selection.h | 64 +++++++++++++++++++++--- 1 file changed, 57 insertions(+), 7 deletions(-) diff --git a/fdaPDE/src/optimization/rank_selection.h b/fdaPDE/src/optimization/rank_selection.h index d440598a..645d60d7 100644 --- a/fdaPDE/src/optimization/rank_selection.h +++ b/fdaPDE/src/optimization/rank_selection.h @@ -22,23 +22,67 @@ namespace fdapde { +template class RankSelection { private: std::vector population_order_; + std::uniform_real_distribution<> distribution_{0.0, 1.0}; + std::vector cdf_; + std::vector new_population_; + + /** + * @brief Given a random number between 0 and 1, returns the index of selected rank with the probability of rank i \pi_i = (amax - (amax-amin)*(i-1)/(m-1))/m + * + * @param sample random uniform sample between 0 and 1 + * @return int the rank associated with the sample + */ + int index_of_sample(double sample) { + int upper_bound = cdf_.size() - 1; + int lower_bound = 0; + + // Handle edge cases + if (sample <= cdf_[0]) { + return 0; + } + if (sample > cdf_[upper_bound]) { + return upper_bound; + } + + // Binary search to find the appropriate interval + while (lower_bound < upper_bound) { + int middle = lower_bound + (upper_bound - lower_bound) / 2; + if (cdf_[middle] < sample) { + lower_bound = middle + 1; + } else { + upper_bound = middle; + } + } + + return upper_bound; + } public: // constructors - RankSelection(int population_size = 10) { - population_order_.reserve(population_size); - for(int i = 0; i < population_size; ++i) - population_order_.push_back(i); - } + RankSelection() = default; template void reset_step(Opt& opt) { population_order_.clear(); + cdf_.clear(); + new_population_.clear(); + population_order_.reserve(opt.population.size()); - for(int i = 0; i < opt.population.size(); ++i) + new_population_.reserve(opt.population.size()); + cdf_.reserve(opt.population.size()); + + constexpr double amax = 1.2; + constexpr double amin = 2.0 - amax; + double probability_sum = 0.0; + for(int i = 0; i < opt.population.size(); ++i) { + double pi_i = (amax - (double)(amax - amin)*(i-1)/(double)(opt.population.size()-1))/(double)(opt.population.size()); + probability_sum += pi_i; + cdf_.push_back(probability_sum); population_order_.push_back(i); + } } template bool pre_update_step(Opt& opt, Obj& obj) { @@ -46,10 +90,16 @@ class RankSelection { population_order_.begin(), population_order_.end(), [&](int a, int b) { - return opt.population_fitness[a] > opt.population_fitness[b]; + return opt.population_fitness[a] < opt.population_fitness[b]; } ); + new_population_.clear(); + for(int i = 0; i < opt.population.size(); ++i) { + int selected = index_of_sample(distribution_(opt.rng)); + new_population_.push_back(opt.population[population_order_[selected]]); + } + opt.population = new_population_; return false; } From cf0423b466588c229003c38375dab6ef1869d072 Mon Sep 17 00:00:00 2001 From: Sh3xe Date: Fri, 20 Jun 2025 15:31:42 +0200 Subject: [PATCH 07/22] fixed assert type --- fdaPDE/src/optimization/lbfgs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fdaPDE/src/optimization/lbfgs.h b/fdaPDE/src/optimization/lbfgs.h index 46945b23..3866ad33 100644 --- a/fdaPDE/src/optimization/lbfgs.h +++ b/fdaPDE/src/optimization/lbfgs.h @@ -102,7 +102,7 @@ template class LBFGS { delta_grad_memory_(memory_size, vector_t{}), delta_x_memory_(memory_size, vector_t{}), tol_(tol), step_(step){ - assert(memory_size_ >= 0); + fdapde_assert(memory_size_ >= 0); } /** @@ -121,7 +121,7 @@ template class LBFGS { delta_grad_memory_(memory_size, vector_t{}), delta_x_memory_(memory_size, vector_t{}), tol_(tol), step_(step){ - assert(memory_size_ >= 0); + fdapde_assert(memory_size_ >= 0); } // copy semantic From 66eb65863c22b05bd2910f9cec187b7d00926887 Mon Sep 17 00:00:00 2001 From: Sh3xe Date: Fri, 20 Jun 2025 15:38:58 +0200 Subject: [PATCH 08/22] NelderMead initial commit (wip) --- fdaPDE/optimization.h | 2 + fdaPDE/src/optimization/nelder_mead.h | 228 ++++++++++++++++++++++++++ 2 files changed, 230 insertions(+) create mode 100644 fdaPDE/src/optimization/nelder_mead.h diff --git a/fdaPDE/optimization.h b/fdaPDE/optimization.h index 53405917..249d2b56 100644 --- a/fdaPDE/optimization.h +++ b/fdaPDE/optimization.h @@ -28,12 +28,14 @@ #include "src/optimization/callbacks.h" #include "src/optimization/backtracking_line_search.h" #include "src/optimization/wolfe_line_search.h" + // algorithms #include "src/optimization/grid.h" #include "src/optimization/newton.h" #include "src/optimization/gradient_descent.h" #include "src/optimization/bfgs.h" #include "src/optimization/lbfgs.h" +#include "src/optimization/nelder_mead.h" // clang-format on diff --git a/fdaPDE/src/optimization/nelder_mead.h b/fdaPDE/src/optimization/nelder_mead.h new file mode 100644 index 00000000..475a7e11 --- /dev/null +++ b/fdaPDE/src/optimization/nelder_mead.h @@ -0,0 +1,228 @@ +// This file is part of fdaPDE, a C++ library for physics-informed +// spatial and functional data analysis. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +#ifndef __FDAPDE_NELDER_MEAD_H__ +#define __FDAPDE_NELDER_MEAD_H__ + +#include "header_check.h" + +namespace fdapde { + +/** + * @brief Implementation of a perturbed version of the Nelder–Mead algorithm as analysed in [Fajfar, I., Bűrmen, Á. & Puhan, J. The Nelder–Mead simplex algorithm with perturbed centroid for high-dimensional function optimization. Optim Lett 13, 1011–1025 (2019)] + * + * @tparam N dimension of the problem + */ +template class NelderMead { +private: + using vector_t = std::conditional_t, Eigen::Matrix>; + using matrix_t = std::conditional_t, Eigen::Matrix>; + + vector_t optimum_; // Argmin of the optimum + double value_; // objective value at optimum + int max_iter_; // maximum number of iterations before forced stop + int n_iter_ = 0; // current iteration number + double tol_; // tolerance on error before forced stop + + std::vector simplex_; // Edges of the simplex + std::vector vertices_values_; // Value of each edge + std::vector vertices_rank_; // Index into the simplex vector, sorted from best to worst + + double alpha_ = 1.0; // Reflexion coeff + double beta_ = 2.0; // Expension coeff + double gamma_ = 0.5; // Outer contraction coeff + double delta_ = 0.5; // Inner contraction coeff + + void init_simplex_(const vector_t &x0) { + // Wessing, S. Proper initialization is crucial for the Nelder–Mead simplex search. + // Optim Lett 13, 847–856 (2019). https://doi.org/10.1007/s11590-018-1284-4 + + + const int dimension = x0.rows(); + + // Initialises the vectors with cached values + vertices_rank_.resize(dimension+1, 0); + vertices_values_.resize(dimension+1, std::numeric_limits::max()); + + for(int i = 0; i < dimension+1; ++i) { + // vertices_rank_[i] = ; + } + } + +public: + + // constructors + NelderMead() = default; + + /** + * @brief Construct a new NelderMead instance + * + * @param max_iter maximum number of iterations performed + * @param memory_size size of the memory used to approximate the inverse hessian matrix in the LBFGH alg. + * @param tol tolerance used as the stoping criterion (|\nabla f_k|_2 < tol) + */ + NelderMead(int max_iter, double tol) + requires(sizeof...(Args) != 0): + max_iter_(max_iter), + memory_size_(memory_size), + tol_(tol){ + fdapde_assert(memory_size_ >= 0); + } + + /** + * @brief Minimizes a function using the L-BFGS method + * + * @tparam ObjectiveT Type of the function to be minimized + * @param objective function to be minimized + * @param x0 initial point + * @param func + */ + template + requires(sizeof...(Functor) < 2) && ((requires(Functor f, double value) { f(value); }) && ...) + vector_t optimize(ObjectiveT&& objective, const vector_t& x0, Functor&&... func) { + fdapde_static_assert( + std::is_same().operator()(vector_t())) FDAPDE_COMMA double>::value, + INVALID_CALL_TO_OPTIMIZE__OBJECTIVE_FUNCTOR_NOT_ACCEPTING_VECTORTYPE); + + bool done = false; + vector_t zero; + const int dimension = x0.rows(); + fdapde_assert(dimension >= 1); + if constexpr (N == Dynamic) { // inv_hessian approximated with identity matrix + zero = vector_t::Zero(x0.rows()); + } else { + zero = vector_t::Zero(); + } + + // Initialise the simplex given x0 + init_simplex_(x0); + + // Compute the vertices's values + for(int i = 0; i < simplex_.size(); ++i) + vertices_values[i] = objective(simplex_[i]); + + while(!done) { + // Sort the vertices according to their objective value + std::sort(vertices_rank_.begin(), vertices_rank_.end(), [&](int a, int b) { + return vertices_values_[a] < vertices_values_[b]; + }); + + // Centroid calculation + + // Fajfar, I., Bűrmen, Á. & Puhan, J. The Nelder–Mead simplex algorithm with perturbed centroid for high-dimensional function optimization. + // Optim Lett 13, 1011–1025 (2019). https://doi.org/10.1007/s11590-018-1306-2 + vector_t centroid = zero; // centroid of the [dimension] best vertices + for(int i = 0; i < dimension; ++i) + centroid += simplex_[vertices_rank_[i]]; + centroid /= (double)(dimension); + + vector_t xr = centroid + alpha_*(centroid - simplex_[vertices_rank_[dimension-1]]); + + // Cache the values used for the if statements + const double xr_value = objective(xr); + const double best_value = vertices_values_[vertices_rank_[0]]; + const double worst_value = vertices_values_[vertices_rank_[dimension]]; + const double second_worst_value = vertices_values_[vertices_rank_[dimension-1]]; + + // Compute the new simplex + if( best_value <= xr_value && xr_value < second_worst_value ) { + // Reflexion + simplex_[vertices_rank_[dimension]] = xr; + vertices_values_[vertices_rank_[dimension]] = xr_value; + } else if( xr_value < best_value ) { + // Expansion + vector_t xe = centroid + beta_*(xr - centroid); + double xe_val = objective(xe); + if(xe_vel < xr_value) { + simplex_[vertices_rank_[dimension]] = xe; + vertices_values_[vertices_rank_[dimension]] = xe_value; + } else { + simplex_[vertices_rank_[dimension]] = xr; + vertices_values_[vertices_rank_[dimension]] = xr_value; + } + } else if( second_worst_value <= xr_value < worst_value ) { + // Outer Contraction + vector_t xoc = c + gamma_ * (xr - c); + double xoc_val = objective(xoc); + if (xoc_val ≤ xr_val) { + simplex_[vertices_rank_[dimension]] = xoc; + vertices_values_[vertices_rank_[dimension]] = xoc_value; + } + } else { + // Inner Contraction + vector_t xic = c - delta_ * (xr - c); + double xic_val = objective(xic); + if( xic_val < best_val) { + simplex_[vertices_rank_[dimension]] = xic; + vertices_values_[vertices_rank_[dimension]] = xic_value; + } + } + + // Shrink + const vector_t &best_vertex = simplex_[vertices_rank_[0]]; + for(int i = 1; i < dimension + 1; ++i) { + simplex_[vertices_rank_[i]] = best_vertex + delta_ * (simplex_[vertices_rank_[i]] - best_vertex); + } + + // Stoping criterion + double value_mean = 0.0; + for(double val: vertices_values_) { + value_mean += val; + } + value_mean /= (double)simplex_.size(); + + double variance = 0.0; + for(double val: vertices_values_) { + double x = val - value_mean; + variance += x*x; + } + variance /= (double)(dimension); + + if(variance <= tol_) { + done = true; + } + } + + optimum_ = simplex_[0]; + value_ = objective(optimum_); + return optimum_; + } + + /** + * @brief Returns the argmin of the last call to [optimize] + * + * @return vector_t + */ + vector_t optimum() const { return optimum_; } + + /** + * @brief Returns the minimum of the last call to [optimize] + * + * @return double + */ + double value() const { return value_; } + + /** + * @brief Returns the number of iterations performed by the method on the last call to [optimize] + * + * @return int + */ + int n_iter() const { return n_iter_; } +}; + +} // namespace fdapde + +#endif // __FDAPDE_NELDER_MEAD_H__ \ No newline at end of file From 2b7746409579d2072fb29267ed0b69d858adff30 Mon Sep 17 00:00:00 2001 From: Sh3xe Date: Fri, 20 Jun 2025 19:06:53 +0200 Subject: [PATCH 09/22] Added initial condition, minor fixes, still wip --- fdaPDE/src/optimization/nelder_mead.h | 87 ++++++++++++++++----------- 1 file changed, 53 insertions(+), 34 deletions(-) diff --git a/fdaPDE/src/optimization/nelder_mead.h b/fdaPDE/src/optimization/nelder_mead.h index 475a7e11..6fb47279 100644 --- a/fdaPDE/src/optimization/nelder_mead.h +++ b/fdaPDE/src/optimization/nelder_mead.h @@ -37,6 +37,9 @@ template class NelderMead { int n_iter_ = 0; // current iteration number double tol_; // tolerance on error before forced stop + std::normal_distribution normal_dist_ {0.0, 1.0}; + std::mt19937 rng_; + std::vector simplex_; // Edges of the simplex std::vector vertices_values_; // Value of each edge std::vector vertices_rank_; // Index into the simplex vector, sorted from best to worst @@ -47,18 +50,17 @@ template class NelderMead { double delta_ = 0.5; // Inner contraction coeff void init_simplex_(const vector_t &x0) { - // Wessing, S. Proper initialization is crucial for the Nelder–Mead simplex search. - // Optim Lett 13, 847–856 (2019). https://doi.org/10.1007/s11590-018-1284-4 - - const int dimension = x0.rows(); // Initialises the vectors with cached values vertices_rank_.resize(dimension+1, 0); vertices_values_.resize(dimension+1, std::numeric_limits::max()); + simplex_.resize(dimension+1, x0); - for(int i = 0; i < dimension+1; ++i) { - // vertices_rank_[i] = ; + for(int i = 0; i < dimension; ++i) { + double tho = std::abs(simplex_[i+1][i]) < tol_ ? 0.00025: 0.05; + simplex_[i+1][i] += tho; + vertices_rank_[i+1] = i+1; } } @@ -74,12 +76,9 @@ template class NelderMead { * @param memory_size size of the memory used to approximate the inverse hessian matrix in the LBFGH alg. * @param tol tolerance used as the stoping criterion (|\nabla f_k|_2 < tol) */ - NelderMead(int max_iter, double tol) - requires(sizeof...(Args) != 0): + NelderMead(int max_iter, double tol): max_iter_(max_iter), - memory_size_(memory_size), tol_(tol){ - fdapde_assert(memory_size_ >= 0); } /** @@ -112,9 +111,9 @@ template class NelderMead { // Compute the vertices's values for(int i = 0; i < simplex_.size(); ++i) - vertices_values[i] = objective(simplex_[i]); + vertices_values_[i] = objective(simplex_[i]); - while(!done) { + while(!done && n_iter_ < max_iter_) { // Sort the vertices according to their objective value std::sort(vertices_rank_.begin(), vertices_rank_.end(), [&](int a, int b) { return vertices_values_[a] < vertices_values_[b]; @@ -122,52 +121,57 @@ template class NelderMead { // Centroid calculation - // Fajfar, I., Bűrmen, Á. & Puhan, J. The Nelder–Mead simplex algorithm with perturbed centroid for high-dimensional function optimization. - // Optim Lett 13, 1011–1025 (2019). https://doi.org/10.1007/s11590-018-1306-2 vector_t centroid = zero; // centroid of the [dimension] best vertices - for(int i = 0; i < dimension; ++i) + vector_t random_vect = zero; + for(int i = 0; i < dimension; ++i) { centroid += simplex_[vertices_rank_[i]]; + random_vect[i] = normal_dist_(rng_); + } centroid /= (double)(dimension); - - vector_t xr = centroid + alpha_*(centroid - simplex_[vertices_rank_[dimension-1]]); + random_vect /= random_vect.norm(); + + // Perturbation of the centroid to enhance performance for large dimensions + // Optim Lett 13, 1011–1025 (2019). https://doi.org/10.1007/s11590-018-1306-2 + vector_t perturbed_centroid = centroid + 0.1 * random_vect * (simplex_[vertices_rank_[0]] - simplex_[vertices_rank_[dimension]]).norm(); + vector_t xr = perturbed_centroid + alpha_*(perturbed_centroid - simplex_[vertices_rank_[dimension]]); // Cache the values used for the if statements - const double xr_value = objective(xr); - const double best_value = vertices_values_[vertices_rank_[0]]; - const double worst_value = vertices_values_[vertices_rank_[dimension]]; - const double second_worst_value = vertices_values_[vertices_rank_[dimension-1]]; + const double xr_val = objective(xr); + const double best_val = vertices_values_[vertices_rank_[0]]; + const double worst_val = vertices_values_[vertices_rank_[dimension]]; + const double second_worst_val = vertices_values_[vertices_rank_[dimension-1]]; // Compute the new simplex - if( best_value <= xr_value && xr_value < second_worst_value ) { + if( best_val <= xr_val && xr_val < second_worst_val ) { // Reflexion simplex_[vertices_rank_[dimension]] = xr; - vertices_values_[vertices_rank_[dimension]] = xr_value; - } else if( xr_value < best_value ) { + vertices_values_[vertices_rank_[dimension]] = xr_val; + } else if( xr_val < best_val ) { // Expansion - vector_t xe = centroid + beta_*(xr - centroid); + vector_t xe = perturbed_centroid + beta_*(xr - perturbed_centroid); double xe_val = objective(xe); - if(xe_vel < xr_value) { + if(xe_val < xr_val) { simplex_[vertices_rank_[dimension]] = xe; - vertices_values_[vertices_rank_[dimension]] = xe_value; + vertices_values_[vertices_rank_[dimension]] = xe_val; } else { simplex_[vertices_rank_[dimension]] = xr; - vertices_values_[vertices_rank_[dimension]] = xr_value; + vertices_values_[vertices_rank_[dimension]] = xr_val; } - } else if( second_worst_value <= xr_value < worst_value ) { + } else if( second_worst_val <= xr_val < worst_val ) { // Outer Contraction - vector_t xoc = c + gamma_ * (xr - c); + vector_t xoc = centroid + gamma_ * (xr - centroid); double xoc_val = objective(xoc); - if (xoc_val ≤ xr_val) { + if (xoc_val <= xr_val) { simplex_[vertices_rank_[dimension]] = xoc; - vertices_values_[vertices_rank_[dimension]] = xoc_value; + vertices_values_[vertices_rank_[dimension]] = xoc_val; } } else { // Inner Contraction - vector_t xic = c - delta_ * (xr - c); + vector_t xic = centroid - gamma_ * (xr - centroid); double xic_val = objective(xic); if( xic_val < best_val) { simplex_[vertices_rank_[dimension]] = xic; - vertices_values_[vertices_rank_[dimension]] = xic_value; + vertices_values_[vertices_rank_[dimension]] = xic_val; } } @@ -194,6 +198,21 @@ template class NelderMead { if(variance <= tol_) { done = true; } + + std::cout << "---------- STEP " << n_iter_ << " ----------\n"; + std::cout << "Variance: " << variance << "\n"; + for(int i = 0; i < simplex_.size(); ++i) { + std::cout << "Simplex " << i << ": "; + std::cout << simplex_[i] << "\n\n"; + } + + for(int i = 0; i < simplex_.size(); ++i) { + std::cout << "Rank n " << i << ": "; + std::cout << vertices_rank_[i]; + std::cout << ". With value: " << vertices_values_[vertices_rank_[i]] << "\n\n"; + } + + ++n_iter_; } optimum_ = simplex_[0]; From ab713031f448b228327165e46688615247ec6bda Mon Sep 17 00:00:00 2001 From: Sh3xe Date: Mon, 23 Jun 2025 15:22:22 +0200 Subject: [PATCH 10/22] Merged every current optimizers, improved NelderMead initial simplex, use std dev instead of variance for stopping criterion --- fdaPDE/optimization.h | 1 + .../binary_tournament_selection.h | 4 +- fdaPDE/src/optimization/gaussian_mutation.h | 7 ++-- fdaPDE/src/optimization/genetic_optim.h | 31 ++++++++------- fdaPDE/src/optimization/nelder_mead.h | 38 +++++++++---------- 5 files changed, 39 insertions(+), 42 deletions(-) diff --git a/fdaPDE/optimization.h b/fdaPDE/optimization.h index bd48a3bd..b0f0d756 100644 --- a/fdaPDE/optimization.h +++ b/fdaPDE/optimization.h @@ -42,6 +42,7 @@ #include "src/optimization/bfgs.h" #include "src/optimization/lbfgs.h" #include "src/optimization/genetic_optim.h" +#include "src/optimization/nelder_mead.h" // clang-format on diff --git a/fdaPDE/src/optimization/binary_tournament_selection.h b/fdaPDE/src/optimization/binary_tournament_selection.h index dd1f369e..dc698c8c 100644 --- a/fdaPDE/src/optimization/binary_tournament_selection.h +++ b/fdaPDE/src/optimization/binary_tournament_selection.h @@ -28,9 +28,7 @@ class BinaryTournamentSelection { public: // constructors - BinaryTournamentSelection(int population_size = 10): - distribution_(0,population_size-1) - {} + BinaryTournamentSelection() = default; template void reset_step(Opt& opt) { assert(opt.population.size() >= 2); diff --git a/fdaPDE/src/optimization/gaussian_mutation.h b/fdaPDE/src/optimization/gaussian_mutation.h index 9c215b90..6b4b01b0 100644 --- a/fdaPDE/src/optimization/gaussian_mutation.h +++ b/fdaPDE/src/optimization/gaussian_mutation.h @@ -27,15 +27,16 @@ class GaussianMutation { double initial_variance_ = 2.5; double variance_ = 2.5; double multiplier_ = 0.95; - std::normal_distribution normal_dist_; + std::normal_distribution normal_dist_ {0.0, 1.0}; public: // constructors + GaussianMutation() = default; + GaussianMutation(double initial_variance, double multiplier): initial_variance_(initial_variance), variance_(initial_variance), - multiplier_(multiplier), - normal_dist_(0.0, 1.0) + multiplier_(multiplier) {} template void reset_step(Opt& opt) { diff --git a/fdaPDE/src/optimization/genetic_optim.h b/fdaPDE/src/optimization/genetic_optim.h index e5bb15d5..4a38061b 100644 --- a/fdaPDE/src/optimization/genetic_optim.h +++ b/fdaPDE/src/optimization/genetic_optim.h @@ -45,17 +45,16 @@ template class GeneticOptim { // constructors GeneticOptim() = default; - // GeneticOptim(int max_iter, double tol, double variance, int population_size, unsigned seed = 0) - // requires(sizeof...(Args) != 0): - // seed_(seed), - // max_iter_(max_iter), - // population_size_(population_size), - // population(population_size, vector_t{}), - // population_fitness(population_size, std::numeric_limits::max()), - // tol_(tol), variance_(variance), - // rng(seed) { - // assert(population_size_ >= 0); - // } + GeneticOptim(int max_iter, double tol, double variance, int population_size, unsigned seed) + requires(sizeof...(Args) != 0): + max_iter_(max_iter), + population_size_(population_size), + population(population_size, vector_t{}), + population_fitness(population_size, std::numeric_limits::max()), + tol_(tol), variance_(variance), + rng(seed) { + assert(population_size_ >= 0); + } GeneticOptim(int max_iter, double tol, double variance, int population_size, unsigned seed, Args&&... callbacks) requires(sizeof...(Args) != 0): @@ -140,18 +139,18 @@ template class GeneticOptim { // Compute the variance and max of the population fitness int current_best = 0; - double variance = 0.0; + double std_dev = 0.0; for(int i = 1; i < population_size_; ++i) { double x = population_fitness[i] - fitness_mean; - variance += x*x; + std_dev += x*x; if(population_fitness[i] < population_fitness[current_best]) current_best = i; } - variance /= static_cast(population_size_-1); - // std::cout << "Std dev at " << n_iter_ << " : " << std::sqrt(variance) << std::endl; + std_dev /= static_cast(population_size_-1); + std_dev = std::sqrt(std_dev); // Stoping condition - stop |= (std::sqrt(variance) <= tol_); + stop |= (std_dev <= tol_); stop |= execute_stopping_criterion(*this, objective); // Update diff --git a/fdaPDE/src/optimization/nelder_mead.h b/fdaPDE/src/optimization/nelder_mead.h index 6fb47279..82b6cb76 100644 --- a/fdaPDE/src/optimization/nelder_mead.h +++ b/fdaPDE/src/optimization/nelder_mead.h @@ -51,17 +51,25 @@ template class NelderMead { void init_simplex_(const vector_t &x0) { const int dimension = x0.rows(); + double infnty_norm = x0.cwiseAbs().maxCoeff(); + double scale_factor = std::min(std::max(infnty_norm, 1.0), 10.0); + double last_coeff = scale_factor * (1.0 - (double)std::sqrt(dimension+1))/(double)(dimension); // Initialises the vectors with cached values + simplex_.clear(); + vertices_rank_.clear(); + vertices_values_.clear(); vertices_rank_.resize(dimension+1, 0); vertices_values_.resize(dimension+1, std::numeric_limits::max()); simplex_.resize(dimension+1, x0); for(int i = 0; i < dimension; ++i) { - double tho = std::abs(simplex_[i+1][i]) < tol_ ? 0.00025: 0.05; - simplex_[i+1][i] += tho; + simplex_[i][i] += scale_factor; vertices_rank_[i+1] = i+1; + simplex_[dimension][i] += last_coeff; } + + simplex_[dimension][dimension-1] += last_coeff; } public: @@ -186,30 +194,20 @@ template class NelderMead { for(double val: vertices_values_) { value_mean += val; } - value_mean /= (double)simplex_.size(); + value_mean /= (double)vertices_values_.size(); - double variance = 0.0; + double std_dev = 0.0; for(double val: vertices_values_) { double x = val - value_mean; - variance += x*x; - } - variance /= (double)(dimension); - - if(variance <= tol_) { - done = true; + std_dev += x*x; } + std_dev /= (double)(dimension); + std_dev = std::sqrt(std_dev); - std::cout << "---------- STEP " << n_iter_ << " ----------\n"; - std::cout << "Variance: " << variance << "\n"; - for(int i = 0; i < simplex_.size(); ++i) { - std::cout << "Simplex " << i << ": "; - std::cout << simplex_[i] << "\n\n"; - } + std::cout << "At i=" << n_iter_ << ", std_dev=" << std_dev << std::endl; - for(int i = 0; i < simplex_.size(); ++i) { - std::cout << "Rank n " << i << ": "; - std::cout << vertices_rank_[i]; - std::cout << ". With value: " << vertices_values_[vertices_rank_[i]] << "\n\n"; + if(std_dev <= tol_) { + done = true; } ++n_iter_; From 92dc1f9a82a2be0d23577c8c805db93d9362a1b1 Mon Sep 17 00:00:00 2001 From: Sh3xe Date: Mon, 23 Jun 2025 17:20:52 +0200 Subject: [PATCH 11/22] Finished Nelder Mead --- fdaPDE/src/optimization/nelder_mead.h | 41 +++++++++++++++------ fdaPDE/src/optimization/wolfe_line_search.h | 2 +- 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/fdaPDE/src/optimization/nelder_mead.h b/fdaPDE/src/optimization/nelder_mead.h index 82b6cb76..aad5d725 100644 --- a/fdaPDE/src/optimization/nelder_mead.h +++ b/fdaPDE/src/optimization/nelder_mead.h @@ -44,11 +44,13 @@ template class NelderMead { std::vector vertices_values_; // Value of each edge std::vector vertices_rank_; // Index into the simplex vector, sorted from best to worst - double alpha_ = 1.0; // Reflexion coeff - double beta_ = 2.0; // Expension coeff - double gamma_ = 0.5; // Outer contraction coeff - double delta_ = 0.5; // Inner contraction coeff + double alpha_ = 0.0; // Reflexion coeff + double beta_ = 0.0; // Expension coeff + double gamma_ = 0.0; // Outer contraction coeff + double delta_ = 0.0; // Inner contraction coeff + // Wessing, S. Proper initialization is crucial for the Nelder–Mead simplex search. + // Optim Lett 13, 847–856 (2019). https://doi.org/10.1007/s11590-018-1284-4 void init_simplex_(const vector_t &x0) { const int dimension = x0.rows(); double infnty_norm = x0.cwiseAbs().maxCoeff(); @@ -105,18 +107,31 @@ template class NelderMead { INVALID_CALL_TO_OPTIMIZE__OBJECTIVE_FUNCTOR_NOT_ACCEPTING_VECTORTYPE); bool done = false; + bool require_shrink = false; + n_iter_ = 0; vector_t zero; const int dimension = x0.rows(); fdapde_assert(dimension >= 1); - if constexpr (N == Dynamic) { // inv_hessian approximated with identity matrix + if constexpr (N == Dynamic) { zero = vector_t::Zero(x0.rows()); } else { zero = vector_t::Zero(); } + // Implementing the Nelder-Mead simplex algorithm with adaptive parameters + // DOI: 10.1007/s10589-010-9329-3 + alpha_ = 1.0; + beta_ = 1 + 2.0/(double)dimension; + gamma_ = 0.75- 2.0/(double)dimension; + delta_ = 1.0 - 1.0/(double)dimension; + // Initialise the simplex given x0 init_simplex_(x0); + fdapde_assert(vertices_values_.size() == simplex_.size()); + fdapde_assert(vertices_rank_.size() == simplex_.size()); + fdapde_assert(x0.rows()+1 == simplex_.size()); + // Compute the vertices's values for(int i = 0; i < simplex_.size(); ++i) vertices_values_[i] = objective(simplex_[i]); @@ -128,7 +143,6 @@ template class NelderMead { }); // Centroid calculation - vector_t centroid = zero; // centroid of the [dimension] best vertices vector_t random_vect = zero; for(int i = 0; i < dimension; ++i) { @@ -144,6 +158,7 @@ template class NelderMead { vector_t xr = perturbed_centroid + alpha_*(perturbed_centroid - simplex_[vertices_rank_[dimension]]); // Cache the values used for the if statements + require_shrink = false; const double xr_val = objective(xr); const double best_val = vertices_values_[vertices_rank_[0]]; const double worst_val = vertices_values_[vertices_rank_[dimension]]; @@ -172,6 +187,8 @@ template class NelderMead { if (xoc_val <= xr_val) { simplex_[vertices_rank_[dimension]] = xoc; vertices_values_[vertices_rank_[dimension]] = xoc_val; + } else { + require_shrink = true; } } else { // Inner Contraction @@ -180,13 +197,17 @@ template class NelderMead { if( xic_val < best_val) { simplex_[vertices_rank_[dimension]] = xic; vertices_values_[vertices_rank_[dimension]] = xic_val; + } else { + require_shrink = true; } } // Shrink - const vector_t &best_vertex = simplex_[vertices_rank_[0]]; - for(int i = 1; i < dimension + 1; ++i) { - simplex_[vertices_rank_[i]] = best_vertex + delta_ * (simplex_[vertices_rank_[i]] - best_vertex); + if(require_shrink) { + const vector_t &best_vertex = simplex_[vertices_rank_[0]]; + for(int i = 1; i < dimension + 1; ++i) { + simplex_[vertices_rank_[i]] = best_vertex + delta_ * (simplex_[vertices_rank_[i]] - best_vertex); + } } // Stoping criterion @@ -204,8 +225,6 @@ template class NelderMead { std_dev /= (double)(dimension); std_dev = std::sqrt(std_dev); - std::cout << "At i=" << n_iter_ << ", std_dev=" << std_dev << std::endl; - if(std_dev <= tol_) { done = true; } diff --git a/fdaPDE/src/optimization/wolfe_line_search.h b/fdaPDE/src/optimization/wolfe_line_search.h index cabd96d0..e58a6eb9 100644 --- a/fdaPDE/src/optimization/wolfe_line_search.h +++ b/fdaPDE/src/optimization/wolfe_line_search.h @@ -34,7 +34,7 @@ class WolfeLineSearch { public: // constructors WolfeLineSearch() = default; - WolfeLineSearch(double alpha, double c1, double c2, double c3) : alpha_(alpha), c1_(c1), c2_(c2), c3_(c3) { } + WolfeLineSearch(double alpha, double c1, double c2) : alpha_(alpha), c1_(c1), c2_(c2) { } // bisection method for the weak Wolfe conditions template bool pre_update_step(Opt& opt, Obj& obj) { From 0567c88bbc688388a6cf1c351d1619c5a1cf5181 Mon Sep 17 00:00:00 2001 From: Sh3xe Date: Mon, 23 Jun 2025 17:54:35 +0200 Subject: [PATCH 12/22] Finished nelder mead, removed genetic algorithm as it is not ready, improve wolfe line search --- fdaPDE/optimization.h | 7 - .../binary_tournament_selection.h | 62 ------ fdaPDE/src/optimization/callbacks.h | 12 -- fdaPDE/src/optimization/crossover_mutation.h | 40 ---- fdaPDE/src/optimization/gaussian_mutation.h | 60 ------ fdaPDE/src/optimization/genetic_optim.h | 189 ------------------ fdaPDE/src/optimization/nelder_mead.h | 42 +--- fdaPDE/src/optimization/rank_selection.h | 110 ---------- 8 files changed, 3 insertions(+), 519 deletions(-) delete mode 100644 fdaPDE/src/optimization/binary_tournament_selection.h delete mode 100644 fdaPDE/src/optimization/crossover_mutation.h delete mode 100644 fdaPDE/src/optimization/gaussian_mutation.h delete mode 100644 fdaPDE/src/optimization/genetic_optim.h delete mode 100644 fdaPDE/src/optimization/rank_selection.h diff --git a/fdaPDE/optimization.h b/fdaPDE/optimization.h index b0f0d756..249d2b56 100644 --- a/fdaPDE/optimization.h +++ b/fdaPDE/optimization.h @@ -29,19 +29,12 @@ #include "src/optimization/backtracking_line_search.h" #include "src/optimization/wolfe_line_search.h" -// Genetic algorithm "plug-ins" -#include "src/optimization/binary_tournament_selection.h" -#include "src/optimization/crossover_mutation.h" -#include "src/optimization/gaussian_mutation.h" -#include "src/optimization/rank_selection.h" - // algorithms #include "src/optimization/grid.h" #include "src/optimization/newton.h" #include "src/optimization/gradient_descent.h" #include "src/optimization/bfgs.h" #include "src/optimization/lbfgs.h" -#include "src/optimization/genetic_optim.h" #include "src/optimization/nelder_mead.h" // clang-format on diff --git a/fdaPDE/src/optimization/binary_tournament_selection.h b/fdaPDE/src/optimization/binary_tournament_selection.h deleted file mode 100644 index dc698c8c..00000000 --- a/fdaPDE/src/optimization/binary_tournament_selection.h +++ /dev/null @@ -1,62 +0,0 @@ - -// This file is part of fdaPDE, a C++ library for physics-informed -// spatial and functional data analysis. -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program. If not, see . - -#ifndef __FDAPDE_BINARY_TOURNAMENT_SELECTION_H__ -#define __FDAPDE_BINARY_TOURNAMENT_SELECTION_H__ - -#include "header_check.h" - -namespace fdapde { - -class BinaryTournamentSelection { -private: - std::uniform_int_distribution distribution_; - -public: - // constructors - BinaryTournamentSelection() = default; - - template void reset_step(Opt& opt) { - assert(opt.population.size() >= 2); - distribution_ = std::uniform_int_distribution{0,static_cast(opt.population.size())-1}; - } - - template bool pre_update_step(Opt& opt, Obj& obj) { - assert(distribution_.max() == opt.population.size() - 1); - - // Perform binary tournament selection - for(int i = 0; i < opt.population.size(); i += 1) { - int id_first = distribution_(opt.rng); - int id_second = distribution_(opt.rng); - if(id_first == id_second) - continue; - - double fit_first = opt.population_fitness[id_first]; - double fit_second = opt.population_fitness[id_second]; - if(fit_first <= fit_second) { - opt.population[id_second] = opt.population[id_first]; - } else { - opt.population[id_first] = opt.population[id_second]; - } - } - return false; - } -}; - -} // namespace fdapde - -#endif // __FDAPDE_BINARY_TOURNAMENT_SELECTION_H__ \ No newline at end of file diff --git a/fdaPDE/src/optimization/callbacks.h b/fdaPDE/src/optimization/callbacks.h index 280a1710..9f67f85f 100644 --- a/fdaPDE/src/optimization/callbacks.h +++ b/fdaPDE/src/optimization/callbacks.h @@ -59,18 +59,6 @@ template bool execute_stopping_criterion(Opt& optim return b; } -template -void execute_reset_step(Opt& optimizer, std::tuple& callbacks) { - auto exec_callback = [&](auto&& callback) { - if constexpr (requires(std::decay_t c, Opt opt) { - { c.reset_step(opt) } -> std::same_as; - }) { - callback.reset_step(optimizer); - } - }; - std::apply([&](auto&&... callback) { (exec_callback(callback), ...); }, callbacks); -} - } // namespace fdapde #endif // __FDAPDE_OPTIMIZATION_CALLBACKS_H__ diff --git a/fdaPDE/src/optimization/crossover_mutation.h b/fdaPDE/src/optimization/crossover_mutation.h deleted file mode 100644 index 1e611151..00000000 --- a/fdaPDE/src/optimization/crossover_mutation.h +++ /dev/null @@ -1,40 +0,0 @@ - -// This file is part of fdaPDE, a C++ library for physics-informed -// spatial and functional data analysis. -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program. If not, see . - -#ifndef __FDAPDE_CROSSOVER_MUTATION_H__ -#define __FDAPDE_CROSSOVER_MUTATION_H__ - -#include "header_check.h" - -namespace fdapde { - -class CrossoverMutation { -private: - -public: - // constructors - CrossoverMutation() = default; - - template bool pre_update_step(Opt& opt, Obj& obj) { - - return false; - } -}; - -} // namespace fdapde - -#endif // __FDAPDE_CROSSOVER_MUTATION_H__ \ No newline at end of file diff --git a/fdaPDE/src/optimization/gaussian_mutation.h b/fdaPDE/src/optimization/gaussian_mutation.h deleted file mode 100644 index 6b4b01b0..00000000 --- a/fdaPDE/src/optimization/gaussian_mutation.h +++ /dev/null @@ -1,60 +0,0 @@ - -// This file is part of fdaPDE, a C++ library for physics-informed -// spatial and functional data analysis. -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program. If not, see . - -#ifndef __FDAPDE_GAUSSIAN_MUTATION_H__ -#define __FDAPDE_GAUSSIAN_MUTATION_H__ - -#include "header_check.h" - -namespace fdapde { - -class GaussianMutation { -private: - double initial_variance_ = 2.5; - double variance_ = 2.5; - double multiplier_ = 0.95; - std::normal_distribution normal_dist_ {0.0, 1.0}; - -public: - // constructors - GaussianMutation() = default; - - GaussianMutation(double initial_variance, double multiplier): - initial_variance_(initial_variance), - variance_(initial_variance), - multiplier_(multiplier) - {} - - template void reset_step(Opt& opt) { - variance_ = initial_variance_; - } - - template bool post_update_step(Opt& opt, Obj& obj) { - // Perform binary tournament selection - for(auto &vec: opt.population) { - for(int i = 0; i < vec.rows(); ++i) - vec[i] += normal_dist_(opt.rng) * variance_; - } - - variance_ *= multiplier_; - return false; - } -}; - -} // namespace fdapde - -#endif // __FDAPDE_GAUSSIAN_MUTATION_H__ \ No newline at end of file diff --git a/fdaPDE/src/optimization/genetic_optim.h b/fdaPDE/src/optimization/genetic_optim.h deleted file mode 100644 index 4a38061b..00000000 --- a/fdaPDE/src/optimization/genetic_optim.h +++ /dev/null @@ -1,189 +0,0 @@ -// This file is part of fdaPDE, a C++ library for physics-informed -// spatial and functional data analysis. -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program. If not, see . - -#ifndef __FDAPDE_GENETIC_OPTIM_H__ -#define __FDAPDE_GENETIC_OPTIM_H__ - -#include "header_check.h" - -namespace fdapde { - -template class GeneticOptim { -private: - using vector_t = std::conditional_t, Eigen::Matrix>; - using matrix_t = std::conditional_t, Eigen::Matrix>; - - std::tuple callbacks_; - vector_t optimum_; - double value_; // objective value at optimum - int max_iter_; // maximum number of iterations before forced stop - int n_iter_ = 0; // current iteration number - double tol_; // tolerance on error before forced stop - double variance_; // update step - int population_size_; // The size of any given generation - unsigned seed_; // Seed for the RNG - -public: - std::vector population; - std::vector population_fitness; - std::mt19937 rng; - -public: - // constructors - GeneticOptim() = default; - - GeneticOptim(int max_iter, double tol, double variance, int population_size, unsigned seed) - requires(sizeof...(Args) != 0): - max_iter_(max_iter), - population_size_(population_size), - population(population_size, vector_t{}), - population_fitness(population_size, std::numeric_limits::max()), - tol_(tol), variance_(variance), - rng(seed) { - assert(population_size_ >= 0); - } - - GeneticOptim(int max_iter, double tol, double variance, int population_size, unsigned seed, Args&&... callbacks) - requires(sizeof...(Args) != 0): - callbacks_(std::make_tuple(std::forward(callbacks)...)), - max_iter_(max_iter), - population_size_(population_size), - population(population_size, vector_t{}), - population_fitness(population_size, std::numeric_limits::max()), - tol_(tol), variance_(variance), - rng(seed) { - assert(population_size_ >= 0); - } - - // copy semantic - GeneticOptim(const GeneticOptim& other) : - callbacks_(other.callbacks_), - population_size_(other.population_size_), - population_fitness(other.population_fitness), - max_iter_(other.max_iter_), - seed_(other.seed_), - tol_(other.tol_), - population(other.population), - variance_(other.variance_){ - } - - GeneticOptim& operator=(const GeneticOptim& other) { - max_iter_ = other.max_iter_; - tol_ = other.tol_; - population_fitness = other.population_fitness; - variance_ = other.variance_; - population_size_ = other.population_size_; - callbacks_ = other.callbacks_; - seed_ = other.seed_; - population = other.population; - return *this; - } - - template - requires(sizeof...(Functor) < 2) && ((requires(Functor f, double value) { f(value); }) && ...) - vector_t optimize(ObjectiveT&& objective, const vector_t& x0, Functor&&... func) { - fdapde_static_assert( - std::is_same().operator()(vector_t())) FDAPDE_COMMA double>::value, - INVALID_CALL_TO_OPTIMIZE__OBJECTIVE_FUNCTOR_NOT_ACCEPTING_VECTORTYPE); - - // whether or not the callbacks's stoping criteria are met - bool stop = false; - double fitness_mean = 0.0; - value_ = std::numeric_limits::max(); - n_iter_ = 0; - - // Initialize the state - for(int i = 0; i < population_size_; ++i) { - population[i] = x0; - } - - // Reset the state of all callbacks and sync their state with the - // parameters in GeneticOptim - execute_reset_step(*this, callbacks_); - - // In the case of this algorithm, post_update... is the mutation process - // so we apply it before the main loop - stop |= execute_post_update_step(*this, objective, callbacks_); - - while (n_iter_ < max_iter_ && !stop) { - // Compute the fitness for selection - for(int i = 0; i < population_size_; ++i) { - population_fitness[i] = objective(population[i]); - } - - // pre_update_step should implement a selection algorithm - stop |= execute_pre_update_step(*this, objective, callbacks_); - - // post_update_step should implement a mutation algorithm - stop |= execute_post_update_step(*this, objective, callbacks_); - - // Compute the mean of the population fitness - fitness_mean = 0.0; - for(int i = 0; i < population_size_; ++i) { - fitness_mean += population_fitness[i]; - } - fitness_mean /= static_cast(population_size_); - - // Compute the variance and max of the population fitness - int current_best = 0; - double std_dev = 0.0; - for(int i = 1; i < population_size_; ++i) { - double x = population_fitness[i] - fitness_mean; - std_dev += x*x; - if(population_fitness[i] < population_fitness[current_best]) - current_best = i; - } - std_dev /= static_cast(population_size_-1); - std_dev = std::sqrt(std_dev); - - // Stoping condition - stop |= (std_dev <= tol_); - stop |= execute_stopping_criterion(*this, objective); - - // Update - ++n_iter_; - value_ = population_fitness[current_best]; - optimum_ = population[current_best]; - } - - return optimum_; - } - - /** - * @brief Returns the argmin of the last call to [optimize] - * - * @return vector_t - */ - vector_t optimum() const { return optimum_; } - - /** - * @brief Returns the minimum of the last call to [optimize] - * - * @return double - */ - double value() const { return value_; } - - /** - * @brief Returns the number of iterations performed by the method on the last call to [optimize] - * - * @return int - */ - int n_iter() const { return n_iter_; } -}; - -} // namespace fdapde - -#endif // __FDAPDE_GENETIC_OPTIM_H__ diff --git a/fdaPDE/src/optimization/nelder_mead.h b/fdaPDE/src/optimization/nelder_mead.h index aad5d725..fc9c4e38 100644 --- a/fdaPDE/src/optimization/nelder_mead.h +++ b/fdaPDE/src/optimization/nelder_mead.h @@ -21,11 +21,7 @@ namespace fdapde { -/** - * @brief Implementation of a perturbed version of the Nelder–Mead algorithm as analysed in [Fajfar, I., Bűrmen, Á. & Puhan, J. The Nelder–Mead simplex algorithm with perturbed centroid for high-dimensional function optimization. Optim Lett 13, 1011–1025 (2019)] - * - * @tparam N dimension of the problem - */ +// Implementation of Nelder-Mead algorithm for gradient-free unconstrained nonlinear optimization template class NelderMead { private: using vector_t = std::conditional_t, Eigen::Matrix>; @@ -79,26 +75,11 @@ template class NelderMead { // constructors NelderMead() = default; - /** - * @brief Construct a new NelderMead instance - * - * @param max_iter maximum number of iterations performed - * @param memory_size size of the memory used to approximate the inverse hessian matrix in the LBFGH alg. - * @param tol tolerance used as the stoping criterion (|\nabla f_k|_2 < tol) - */ NelderMead(int max_iter, double tol): max_iter_(max_iter), tol_(tol){ } - /** - * @brief Minimizes a function using the L-BFGS method - * - * @tparam ObjectiveT Type of the function to be minimized - * @param objective function to be minimized - * @param x0 initial point - * @param func - */ template requires(sizeof...(Functor) < 2) && ((requires(Functor f, double value) { f(value); }) && ...) vector_t optimize(ObjectiveT&& objective, const vector_t& x0, Functor&&... func) { @@ -118,8 +99,8 @@ template class NelderMead { zero = vector_t::Zero(); } - // Implementing the Nelder-Mead simplex algorithm with adaptive parameters - // DOI: 10.1007/s10589-010-9329-3 + // Gao, F., Han, L. Implementing the Nelder-Mead simplex algorithm with adaptive parameters. + // Comput Optim Appl 51, 259–277 (2012). https://doi.org/10.1007/s10589-010-9329-3 alpha_ = 1.0; beta_ = 1 + 2.0/(double)dimension; gamma_ = 0.75- 2.0/(double)dimension; @@ -237,25 +218,8 @@ template class NelderMead { return optimum_; } - /** - * @brief Returns the argmin of the last call to [optimize] - * - * @return vector_t - */ vector_t optimum() const { return optimum_; } - - /** - * @brief Returns the minimum of the last call to [optimize] - * - * @return double - */ double value() const { return value_; } - - /** - * @brief Returns the number of iterations performed by the method on the last call to [optimize] - * - * @return int - */ int n_iter() const { return n_iter_; } }; diff --git a/fdaPDE/src/optimization/rank_selection.h b/fdaPDE/src/optimization/rank_selection.h deleted file mode 100644 index 645d60d7..00000000 --- a/fdaPDE/src/optimization/rank_selection.h +++ /dev/null @@ -1,110 +0,0 @@ - -// This file is part of fdaPDE, a C++ library for physics-informed -// spatial and functional data analysis. -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program. If not, see . - -#ifndef __FDAPDE_RANK_SELECTION_H__ -#define __FDAPDE_RANK_SELECTION_H__ - -#include "header_check.h" - -namespace fdapde { - -template -class RankSelection { -private: - std::vector population_order_; - std::uniform_real_distribution<> distribution_{0.0, 1.0}; - std::vector cdf_; - std::vector new_population_; - - /** - * @brief Given a random number between 0 and 1, returns the index of selected rank with the probability of rank i \pi_i = (amax - (amax-amin)*(i-1)/(m-1))/m - * - * @param sample random uniform sample between 0 and 1 - * @return int the rank associated with the sample - */ - int index_of_sample(double sample) { - int upper_bound = cdf_.size() - 1; - int lower_bound = 0; - - // Handle edge cases - if (sample <= cdf_[0]) { - return 0; - } - if (sample > cdf_[upper_bound]) { - return upper_bound; - } - - // Binary search to find the appropriate interval - while (lower_bound < upper_bound) { - int middle = lower_bound + (upper_bound - lower_bound) / 2; - if (cdf_[middle] < sample) { - lower_bound = middle + 1; - } else { - upper_bound = middle; - } - } - - return upper_bound; - } - -public: - // constructors - RankSelection() = default; - - template void reset_step(Opt& opt) { - population_order_.clear(); - cdf_.clear(); - new_population_.clear(); - - population_order_.reserve(opt.population.size()); - new_population_.reserve(opt.population.size()); - cdf_.reserve(opt.population.size()); - - constexpr double amax = 1.2; - constexpr double amin = 2.0 - amax; - double probability_sum = 0.0; - for(int i = 0; i < opt.population.size(); ++i) { - double pi_i = (amax - (double)(amax - amin)*(i-1)/(double)(opt.population.size()-1))/(double)(opt.population.size()); - probability_sum += pi_i; - cdf_.push_back(probability_sum); - population_order_.push_back(i); - } - } - - template bool pre_update_step(Opt& opt, Obj& obj) { - std::sort( - population_order_.begin(), - population_order_.end(), - [&](int a, int b) { - return opt.population_fitness[a] < opt.population_fitness[b]; - } - ); - - new_population_.clear(); - for(int i = 0; i < opt.population.size(); ++i) { - int selected = index_of_sample(distribution_(opt.rng)); - new_population_.push_back(opt.population[population_order_[selected]]); - } - opt.population = new_population_; - - return false; - } -}; - -} // namespace fdapde - -#endif // __FDAPDE_RANK_SELECTION_H__ \ No newline at end of file From 382ccf2418df7d99b5bd5b7055b9f78ff4dde7ea Mon Sep 17 00:00:00 2001 From: Sh3xe Date: Mon, 23 Jun 2025 17:56:54 +0200 Subject: [PATCH 13/22] fixed wofle line search --- fdaPDE/src/optimization/wolfe_line_search.h | 1 + 1 file changed, 1 insertion(+) diff --git a/fdaPDE/src/optimization/wolfe_line_search.h b/fdaPDE/src/optimization/wolfe_line_search.h index e58a6eb9..0bcb4469 100644 --- a/fdaPDE/src/optimization/wolfe_line_search.h +++ b/fdaPDE/src/optimization/wolfe_line_search.h @@ -101,6 +101,7 @@ class WolfeLineSearch { alpha_curr = std::isinf(alpha_max) ? 2*alpha_curr : (alpha_max + alpha_min)/2; } + opt.h = alpha_curr; return false; } }; From 1ab82a786af9aad6a30b8f26d7bc0494d141f6e8 Mon Sep 17 00:00:00 2001 From: Sh3xe Date: Wed, 25 Jun 2025 16:52:08 +0200 Subject: [PATCH 14/22] fixed GD header guard, added CG meethod --- fdaPDE/optimization.h | 1 + fdaPDE/src/optimization/conjugate_gradient.h | 124 +++++++++++++++++++ fdaPDE/src/optimization/gradient_descent.h | 6 +- 3 files changed, 128 insertions(+), 3 deletions(-) create mode 100644 fdaPDE/src/optimization/conjugate_gradient.h diff --git a/fdaPDE/optimization.h b/fdaPDE/optimization.h index 249d2b56..2134929a 100644 --- a/fdaPDE/optimization.h +++ b/fdaPDE/optimization.h @@ -33,6 +33,7 @@ #include "src/optimization/grid.h" #include "src/optimization/newton.h" #include "src/optimization/gradient_descent.h" +#include "src/optimization/conjugate_gradient.h" #include "src/optimization/bfgs.h" #include "src/optimization/lbfgs.h" #include "src/optimization/nelder_mead.h" diff --git a/fdaPDE/src/optimization/conjugate_gradient.h b/fdaPDE/src/optimization/conjugate_gradient.h new file mode 100644 index 00000000..acb43cc0 --- /dev/null +++ b/fdaPDE/src/optimization/conjugate_gradient.h @@ -0,0 +1,124 @@ +// This file is part of fdaPDE, a C++ library for physics-informed +// spatial and functional data analysis. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +#ifndef __FDAPDE_CONJUGATE_GRADIENT_H__ +#define __FDAPDE_CONJUGATE_GRADIENT_H__ + +#include "header_check.h" + +namespace fdapde { + +// implementation of Conjugate Gradient method for unconstrained nonlinear optimization +template class ConjugateGradient { + private: + using vector_t = + std::conditional_t, Eigen::Matrix>; + using matrix_t = + std::conditional_t, Eigen::Matrix>; + + std::tuple callbacks_ {}; + vector_t optimum_; + double value_; // objective value at optimum + int max_iter_; // maximum number of iterations before forced stop + double tol_; // tolerance on error before forced stop + double step_; // update step + int n_iter_ = 0; // current iteration number + bool use_polak_ribiere_ = true; // If set to false, use Fletcher-Reave, else use Polak-Ribière to calculate beta + + public: + vector_t x_old, x_new, update, dir, grad_old, grad_new; + double h; + + // constructor + ConjugateGradient() = default; + + ConjugateGradient(int max_iter, double tol, double step, bool use_polak_ribiere = true) + requires(sizeof...(Args) != 0) + : max_iter_(max_iter), tol_(tol), step_(step), use_polak_ribiere_(use_polak_ribiere) { } + + ConjugateGradient(int max_iter, double tol, double step, bool use_polak_ribiere, Args&&... callbacks) : + callbacks_(std::make_tuple(std::forward(callbacks)...)), max_iter_(max_iter), tol_(tol), step_(step), use_polak_ribiere_(use_polak_ribiere) { } + + // copy semantic + ConjugateGradient(const ConjugateGradient& other) : + callbacks_(other.callbacks_), max_iter_(other.max_iter_), tol_(other.tol_), step_(other.step_), use_polak_ribiere_(other.use_polak_ribiere_) { } + + ConjugateGradient& operator=(const ConjugateGradient& other) { + max_iter_ = other.max_iter_; + tol_ = other.tol_; + step_ = other.step_; + callbacks_ = other.callbacks_; + use_polak_ribiere_ = other.use_polak_ribiere_; + return *this; + } + + template + requires(sizeof...(Functor) < 2) && ((requires(Functor f, double value) { f(value); }) && ...) + vector_t optimize(ObjectiveT&& objective, const vector_t& x0, Functor&&... func) { + fdapde_static_assert( + std::is_same().operator()(vector_t())) FDAPDE_COMMA double>::value, + INVALID_CALL_TO_OPTIMIZE__OBJECTIVE_FUNCTOR_NOT_ACCEPTING_VECTORTYPE + ); + + bool stop = false; // asserted true in case of forced stop + double error = std::numeric_limits::max(); + double beta = 0.0; + h = step_; + n_iter_ = 0; + x_old = x0, x_new = x0; + auto grad = objective.gradient(); + grad_old = grad(x_old); + dir = -grad_old; + + while (n_iter_ < max_iter_ && error > tol_ && !stop) { + update = dir; + stop |= execute_pre_update_step(*this, objective, callbacks_); + + // update along descent direction + x_new = x_old + h * update; + grad_new = grad(x_new); + if constexpr (sizeof...(Functor) == 1) { (func(objective(x_old)), ...); } + if( use_polak_ribiere_ ) + beta = grad_new.dot(grad_new - grad_old) / grad_old.dot(grad_old); // Polak Ribière + else + beta = grad_new.dot(grad_new) / grad_old.dot(grad_old); // Fletcher-Reeves + + dir = -grad_new + beta * dir; + + // prepare next iteration + error = grad_new.norm(); + stop |= + (execute_post_update_step(*this, objective, callbacks_) || execute_stopping_criterion(*this, objective)); + x_old = x_new; + grad_old = grad_new; + + n_iter_++; + } + optimum_ = x_old; + value_ = objective(optimum_); + if constexpr (sizeof...(Functor) == 1) { (func(value_), ...); } + return optimum_; + } + + // getters + vector_t optimum() const { return optimum_; } + double value() const { return value_; } + int n_iter() const { return n_iter_; } +}; + +} // namespace fdapde + +#endif // __FDAPDE_CONJUGATE_GRADIENT_H__ \ No newline at end of file diff --git a/fdaPDE/src/optimization/gradient_descent.h b/fdaPDE/src/optimization/gradient_descent.h index 4216e957..db08b634 100644 --- a/fdaPDE/src/optimization/gradient_descent.h +++ b/fdaPDE/src/optimization/gradient_descent.h @@ -14,8 +14,8 @@ // You should have received a copy of the GNU General Public License // along with this program. If not, see . -#ifndef __FDAPDE_GRADIENT_DESCENT__ -#define __FDAPDE_GRADIENT_DESCENT__ +#ifndef __FDAPDE_GRADIENT_DESCENT_H__ +#define __FDAPDE_GRADIENT_DESCENT_H__ #include "header_check.h" @@ -98,4 +98,4 @@ template class GradientDescent { } // namespace fdapde -#endif // __FDAPDE_GRADIENT_DESCENT__ +#endif // __FDAPDE_GRADIENT_DESCENT_H__ From 1bc2fedda108125703b35335b53e560fe2249ba5 Mon Sep 17 00:00:00 2001 From: Sh3xe Date: Thu, 26 Jun 2025 15:49:48 +0200 Subject: [PATCH 15/22] Added crossover mutation, fixed geneticOptim constructor --- fdaPDE/src/optimization/crossover_mutation.h | 16 ++++++++++++- fdaPDE/src/optimization/gaussian_mutation.h | 7 +++--- fdaPDE/src/optimization/genetic_optim.h | 24 ++++++++++---------- 3 files changed, 30 insertions(+), 17 deletions(-) diff --git a/fdaPDE/src/optimization/crossover_mutation.h b/fdaPDE/src/optimization/crossover_mutation.h index 1e611151..4c08f886 100644 --- a/fdaPDE/src/optimization/crossover_mutation.h +++ b/fdaPDE/src/optimization/crossover_mutation.h @@ -24,13 +24,27 @@ namespace fdapde { class CrossoverMutation { private: + std::uniform_int_distribution distribution_{0,10}; public: // constructors CrossoverMutation() = default; + template void reset_step(Opt& opt) { + distribution_ = std::uniform_int_distribution(0, static_cast(opt.population[0].rows())); + } + template bool pre_update_step(Opt& opt, Obj& obj) { - + for(int i = 0; i < opt.population.size() - 1; i += 2) { + auto &parent_1 = opt.population[i]; + auto &parent_2 = opt.population[i+1]; + int k = distribution_(opt.rng); + for(int j = k; j < opt.population[0].rows(); ++j) { + double coeff = (parent_1[j] + parent_2[j])/2.0; + parent_1[j] = coeff; + parent_2[j] = coeff; + } + } return false; } }; diff --git a/fdaPDE/src/optimization/gaussian_mutation.h b/fdaPDE/src/optimization/gaussian_mutation.h index 9c215b90..0e120aee 100644 --- a/fdaPDE/src/optimization/gaussian_mutation.h +++ b/fdaPDE/src/optimization/gaussian_mutation.h @@ -27,15 +27,14 @@ class GaussianMutation { double initial_variance_ = 2.5; double variance_ = 2.5; double multiplier_ = 0.95; - std::normal_distribution normal_dist_; + std::normal_distribution normal_dist_{0.0,1.0}; public: // constructors - GaussianMutation(double initial_variance, double multiplier): + GaussianMutation(double initial_variance = 2.5, double multiplier = 0.95): initial_variance_(initial_variance), variance_(initial_variance), - multiplier_(multiplier), - normal_dist_(0.0, 1.0) + multiplier_(multiplier) {} template void reset_step(Opt& opt) { diff --git a/fdaPDE/src/optimization/genetic_optim.h b/fdaPDE/src/optimization/genetic_optim.h index e5bb15d5..2fefd3d9 100644 --- a/fdaPDE/src/optimization/genetic_optim.h +++ b/fdaPDE/src/optimization/genetic_optim.h @@ -45,22 +45,23 @@ template class GeneticOptim { // constructors GeneticOptim() = default; - // GeneticOptim(int max_iter, double tol, double variance, int population_size, unsigned seed = 0) - // requires(sizeof...(Args) != 0): - // seed_(seed), - // max_iter_(max_iter), - // population_size_(population_size), - // population(population_size, vector_t{}), - // population_fitness(population_size, std::numeric_limits::max()), - // tol_(tol), variance_(variance), - // rng(seed) { - // assert(population_size_ >= 0); - // } + GeneticOptim(int max_iter, double tol, double variance, int population_size, unsigned seed = 0) + requires(sizeof...(Args) != 0): + seed_(seed), + max_iter_(max_iter), + population_size_(population_size), + population(population_size, vector_t{}), + population_fitness(population_size, std::numeric_limits::max()), + tol_(tol), variance_(variance), + rng(seed) { + assert(population_size_ >= 0); + } GeneticOptim(int max_iter, double tol, double variance, int population_size, unsigned seed, Args&&... callbacks) requires(sizeof...(Args) != 0): callbacks_(std::make_tuple(std::forward(callbacks)...)), max_iter_(max_iter), + seed_(seed), population_size_(population_size), population(population_size, vector_t{}), population_fitness(population_size, std::numeric_limits::max()), @@ -148,7 +149,6 @@ template class GeneticOptim { current_best = i; } variance /= static_cast(population_size_-1); - // std::cout << "Std dev at " << n_iter_ << " : " << std::sqrt(variance) << std::endl; // Stoping condition stop |= (std::sqrt(variance) <= tol_); From 4ac3d4c7001aee29c1b5974e78065270e2f4e41b Mon Sep 17 00:00:00 2001 From: Sh3xe Date: Fri, 4 Jul 2025 17:41:26 +0200 Subject: [PATCH 16/22] removed perturbized centroid in low dimensions because of performance --- fdaPDE/src/optimization/nelder_mead.h | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/fdaPDE/src/optimization/nelder_mead.h b/fdaPDE/src/optimization/nelder_mead.h index fc9c4e38..0b6dd209 100644 --- a/fdaPDE/src/optimization/nelder_mead.h +++ b/fdaPDE/src/optimization/nelder_mead.h @@ -128,14 +128,21 @@ template class NelderMead { vector_t random_vect = zero; for(int i = 0; i < dimension; ++i) { centroid += simplex_[vertices_rank_[i]]; - random_vect[i] = normal_dist_(rng_); + if(dimension >= 10) { + random_vect[i] = normal_dist_(rng_); + } } centroid /= (double)(dimension); - random_vect /= random_vect.norm(); + if(dimension >= 10) { + random_vect /= random_vect.norm(); + } // Perturbation of the centroid to enhance performance for large dimensions // Optim Lett 13, 1011–1025 (2019). https://doi.org/10.1007/s11590-018-1306-2 - vector_t perturbed_centroid = centroid + 0.1 * random_vect * (simplex_[vertices_rank_[0]] - simplex_[vertices_rank_[dimension]]).norm(); + vector_t perturbed_centroid = centroid; + if(dimension >= 10) { + perturbed_centroid += 0.1 * random_vect * (simplex_[vertices_rank_[0]] - simplex_[vertices_rank_[dimension]]).norm(); + } vector_t xr = perturbed_centroid + alpha_*(perturbed_centroid - simplex_[vertices_rank_[dimension]]); // Cache the values used for the if statements @@ -225,4 +232,4 @@ template class NelderMead { } // namespace fdapde -#endif // __FDAPDE_NELDER_MEAD_H__ \ No newline at end of file +#endif // __FDAPDE_NELDER_MEAD_H__ From 9847b99e091b534635b5c946d0a89f2e2c18f999 Mon Sep 17 00:00:00 2001 From: Sh3xe Date: Mon, 7 Jul 2025 18:15:04 +0200 Subject: [PATCH 17/22] fixed genetic opts --- fdaPDE/src/optimization/gaussian_mutation.h | 1 + fdaPDE/src/optimization/genetic_optim.h | 16 ++++++++-------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/fdaPDE/src/optimization/gaussian_mutation.h b/fdaPDE/src/optimization/gaussian_mutation.h index 0e120aee..b16c792a 100644 --- a/fdaPDE/src/optimization/gaussian_mutation.h +++ b/fdaPDE/src/optimization/gaussian_mutation.h @@ -38,6 +38,7 @@ class GaussianMutation { {} template void reset_step(Opt& opt) { + initial_variance_ = opt.initial_variance; variance_ = initial_variance_; } diff --git a/fdaPDE/src/optimization/genetic_optim.h b/fdaPDE/src/optimization/genetic_optim.h index 2fefd3d9..ae8364af 100644 --- a/fdaPDE/src/optimization/genetic_optim.h +++ b/fdaPDE/src/optimization/genetic_optim.h @@ -32,32 +32,32 @@ template class GeneticOptim { int max_iter_; // maximum number of iterations before forced stop int n_iter_ = 0; // current iteration number double tol_; // tolerance on error before forced stop - double variance_; // update step int population_size_; // The size of any given generation unsigned seed_; // Seed for the RNG - + public: std::vector population; std::vector population_fitness; std::mt19937 rng; + double initial_variance; // update step public: // constructors GeneticOptim() = default; - GeneticOptim(int max_iter, double tol, double variance, int population_size, unsigned seed = 0) + GeneticOptim(int max_iter, double tol, double initial_variance, int population_size, unsigned seed = 0) requires(sizeof...(Args) != 0): seed_(seed), max_iter_(max_iter), population_size_(population_size), population(population_size, vector_t{}), population_fitness(population_size, std::numeric_limits::max()), - tol_(tol), variance_(variance), + tol_(tol), initial_variance(initial_variance), rng(seed) { assert(population_size_ >= 0); } - GeneticOptim(int max_iter, double tol, double variance, int population_size, unsigned seed, Args&&... callbacks) + GeneticOptim(int max_iter, double tol, double initial_variance, int population_size, unsigned seed, Args&&... callbacks) requires(sizeof...(Args) != 0): callbacks_(std::make_tuple(std::forward(callbacks)...)), max_iter_(max_iter), @@ -65,7 +65,7 @@ template class GeneticOptim { population_size_(population_size), population(population_size, vector_t{}), population_fitness(population_size, std::numeric_limits::max()), - tol_(tol), variance_(variance), + tol_(tol), initial_variance(initial_variance), rng(seed) { assert(population_size_ >= 0); } @@ -79,14 +79,14 @@ template class GeneticOptim { seed_(other.seed_), tol_(other.tol_), population(other.population), - variance_(other.variance_){ + initial_variance(other.initial_variance){ } GeneticOptim& operator=(const GeneticOptim& other) { max_iter_ = other.max_iter_; tol_ = other.tol_; population_fitness = other.population_fitness; - variance_ = other.variance_; + initial_variance = other.initial_variance; population_size_ = other.population_size_; callbacks_ = other.callbacks_; seed_ = other.seed_; From bd7dac734f13010a1742f0bcfbfcb951b40632f6 Mon Sep 17 00:00:00 2001 From: Sh3xe Date: Mon, 7 Jul 2025 18:16:11 +0200 Subject: [PATCH 18/22] fixed nelder-mead coefficients --- fdaPDE/src/optimization/nelder_mead.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fdaPDE/src/optimization/nelder_mead.h b/fdaPDE/src/optimization/nelder_mead.h index 0b6dd209..11814693 100644 --- a/fdaPDE/src/optimization/nelder_mead.h +++ b/fdaPDE/src/optimization/nelder_mead.h @@ -75,9 +75,10 @@ template class NelderMead { // constructors NelderMead() = default; - NelderMead(int max_iter, double tol): + NelderMead(int max_iter, double tol, unsigned int seed = 0u): max_iter_(max_iter), - tol_(tol){ + tol_(tol), + rng_(seed){ } template @@ -103,7 +104,7 @@ template class NelderMead { // Comput Optim Appl 51, 259–277 (2012). https://doi.org/10.1007/s10589-010-9329-3 alpha_ = 1.0; beta_ = 1 + 2.0/(double)dimension; - gamma_ = 0.75- 2.0/(double)dimension; + gamma_ = 0.75- 1.0/(double)(2*dimension); delta_ = 1.0 - 1.0/(double)dimension; // Initialise the simplex given x0 From f25d25503e7ae500df384cd4667c880c9597eb4e Mon Sep 17 00:00:00 2001 From: Sh3xe Date: Fri, 25 Jul 2025 13:57:16 +0200 Subject: [PATCH 19/22] replaced std::vector with matrix_t --- .../src/optimization/genetic_mutation_ops.h | 16 ++++---- fdaPDE/src/optimization/genetic_optim.h | 35 ++++++++---------- .../src/optimization/genetic_selection_ops.h | 37 +++++++++++-------- 3 files changed, 44 insertions(+), 44 deletions(-) diff --git a/fdaPDE/src/optimization/genetic_mutation_ops.h b/fdaPDE/src/optimization/genetic_mutation_ops.h index e8786d3c..8f38af4c 100644 --- a/fdaPDE/src/optimization/genetic_mutation_ops.h +++ b/fdaPDE/src/optimization/genetic_mutation_ops.h @@ -44,9 +44,9 @@ class GaussianMutation { template bool mutate_hook(Opt& opt) { // Perform binary tournament selection - for(auto &vec: opt.population) { - for(int i = 0; i < vec.rows(); ++i) - vec[i] += normal_dist_(opt.rng) * variance_; + for(int j = 0; j < opt.population.cols(); ++j) { + for(int i = 0; i < opt.population.rows(); ++i) + opt.population(i,j) += normal_dist_(opt.rng) * variance_; } @@ -64,16 +64,16 @@ class CrossoverMutation { CrossoverMutation() = default; template bool sync_hook(Opt& opt) { - distribution_ = std::uniform_int_distribution(0, static_cast(opt.population[0].rows())); + distribution_ = std::uniform_int_distribution(0, static_cast(opt.population.rows())); return false; } template bool mutate_hook(Opt& opt) { - for(int i = 0; i < opt.population.size() - 1; i += 2) { - auto &parent_1 = opt.population[i]; - auto &parent_2 = opt.population[i+1]; + for(int i = 0; i < opt.population.cols() - 1; i += 2) { + auto &parent_1 = opt.population.col(i); + auto &parent_2 = opt.population.col(i+1); int k = distribution_(opt.rng); - for(int j = k; j < opt.population[0].rows(); ++j) { + for(int j = k; j < opt.population.rows(); ++j) { double coeff = (parent_1[j] + parent_2[j])/2.0; parent_1[j] = coeff; parent_2[j] = coeff; diff --git a/fdaPDE/src/optimization/genetic_optim.h b/fdaPDE/src/optimization/genetic_optim.h index d46ec0a7..87080009 100644 --- a/fdaPDE/src/optimization/genetic_optim.h +++ b/fdaPDE/src/optimization/genetic_optim.h @@ -40,8 +40,8 @@ template class GeneticOptim { static constexpr bool gradient_free = true; static constexpr int static_input_size = N; - std::vector population; - std::vector population_fitness; + matrix_t population; + vector_t population_fitness; std::mt19937 rng; vector_t x_curr; @@ -62,8 +62,6 @@ template class GeneticOptim { max_iter_(max_iter), tol_(tol), population_size_(population_size), - population(population_size, vector_t{}), - population_fitness(population_size, std::numeric_limits::max()), rng(seed) { fdapde_assert(population_size_ > 0); } @@ -94,22 +92,20 @@ template class GeneticOptim { vector_t optimize(ObjectiveT&& objective, const vector_t& x0, Callbacks&&... callbacks) { fdapde_static_assert( std::is_same().operator()(vector_t())) FDAPDE_COMMA double>::value, - INVALID_CALL_TO_OPTIMIZE__OBJECTIVE_FUNCTOR_NOT_CALLABLE_AT_VECTOR_TYPE); - fdapde_assert(population_size_ == population.size()); - fdapde_assert(population_size_ == population_fitness.size()); - + INVALID_CALL_TO_OPTIMIZE__OBJECTIVE_FUNCTOR_NOT_CALLABLE_AT_VECTOR_TYPE + ); + std::tuple callbacks_ {callbacks...}; - + // whether or not the callbacks's stoping criteria are met bool stop = false; value_ = std::numeric_limits::max(); n_iter_ = 0; static_since_ = 0; - - // Initialize the state - for(int i = 0; i < population_size_; ++i) { - population[i] = x0; - } + population = x0.rowwise().replicate(population_size_); + population_fitness = vector_t::Zero(population_size_, 1); + fdapde_assert(population_size_ == population.cols()); + fdapde_assert(population_size_ == population_fitness.rows()); // Reset the state of all callbacks and sync their state with the // parameters in GeneticOptim @@ -122,7 +118,7 @@ template class GeneticOptim { while (n_iter_ < max_iter_ && !stop) { // Compute the fitness for selection for(int i = 0; i < population_size_; ++i) { - population_fitness[i] = objective(population[i]); + population_fitness(i) = objective(population.col(i)); } stop |= internals::exec_select_hooks(*this, callbacks_); @@ -131,12 +127,11 @@ template class GeneticOptim { // Compute argmax of the population fitness int current_best = 0; for(int i = 1; i < population_size_; ++i) { - if(population_fitness[i] < population_fitness[current_best]) + if(population_fitness(i) < population_fitness(current_best)) current_best = i; } - // TODO: use tolerance? - if( std::abs(value_ - population_fitness[current_best]) < tol_) { + if( std::abs(value_ - population_fitness(current_best)) < tol_) { ++static_since_; } @@ -146,8 +141,8 @@ template class GeneticOptim { // Update ++n_iter_; - value_ = population_fitness[current_best]; - optimum_ = population[current_best]; + value_ = population_fitness(current_best); + optimum_ = population.col(current_best); } return optimum_; diff --git a/fdaPDE/src/optimization/genetic_selection_ops.h b/fdaPDE/src/optimization/genetic_selection_ops.h index b2cae8b6..25a88d43 100644 --- a/fdaPDE/src/optimization/genetic_selection_ops.h +++ b/fdaPDE/src/optimization/genetic_selection_ops.h @@ -26,11 +26,12 @@ template class RankSelection { private: using vector_t = Eigen::Matrix; + using matrix_t = Eigen::Matrix; std::vector population_order_; std::uniform_real_distribution<> distribution_{0.0, 1.0}; std::vector cdf_; - std::vector< vector_t > new_population_; + matrix_t new_population_; /** * @brief Given a random number between 0 and 1, returns the index of selected rank with the probability of rank i \pi_i = (amax - (amax-amin)*(i-1)/(m-1))/m @@ -70,17 +71,21 @@ class RankSelection { template bool sync_hook(Opt& opt) { population_order_.clear(); cdf_.clear(); - new_population_.clear(); population_order_.reserve(opt.population.size()); - new_population_.reserve(opt.population.size()); + new_population_ = opt.population; cdf_.reserve(opt.population.size()); constexpr double amax = 1.2; constexpr double amin = 2.0 - amax; double probability_sum = 0.0; - for(int i = 0; i < opt.population.size(); ++i) { - double pi_i = (amax - (double)(amax - amin)*(i-1)/(double)(opt.population.size()-1))/(double)(opt.population.size()); + for(int i = 0; i < opt.population.cols(); ++i) { + double pi_i = ( + (amax - (double)(amax - amin)*(i-1) + / + (double)(opt.population.cols()-1))/(double)(opt.population.cols()) + ); + probability_sum += pi_i; cdf_.push_back(probability_sum); population_order_.push_back(i); @@ -93,14 +98,13 @@ class RankSelection { population_order_.begin(), population_order_.end(), [&](int a, int b) { - return opt.population_fitness[a] < opt.population_fitness[b]; + return opt.population_fitness(a) < opt.population_fitness(b); } ); - new_population_.clear(); - for(int i = 0; i < opt.population.size(); ++i) { + for(int i = 0; i < opt.population.cols(); ++i) { int selected = index_of_sample(distribution_(opt.rng)); - new_population_.push_back(opt.population[population_order_[selected]]); + new_population_.col(i) = (opt.population.col(population_order_[selected])); } opt.population = new_population_; @@ -120,26 +124,27 @@ class BinaryTournamentSelection { template bool sync_hook(Opt& opt) { fdapde_assert(opt.population.size() >= 2); - distribution_ = std::uniform_int_distribution{0,static_cast(opt.population.size())-1}; + distribution_ = std::uniform_int_distribution{0,static_cast(opt.population.cols())-1}; return false; } template bool select_hook(Opt& opt) { - fdapde_assert(distribution_.max() == opt.population.size() - 1); + fdapde_assert(distribution_.max() == opt.population.cols() - 1); + fdapde_assert(distribution_.min() == 0); // Perform binary tournament selection - for(int i = 0; i < opt.population.size(); i += 1) { + for(int i = 0; i < opt.population.cols(); i += 1) { int id_first = distribution_(opt.rng); int id_second = distribution_(opt.rng); if(id_first == id_second) continue; - double fit_first = opt.population_fitness[id_first]; - double fit_second = opt.population_fitness[id_second]; + double fit_first = opt.population_fitness(id_first); + double fit_second = opt.population_fitness(id_second); if(fit_first <= fit_second) { - opt.population[id_second] = opt.population[id_first]; + opt.population.col(id_second) = opt.population.col(id_first); } else { - opt.population[id_first] = opt.population[id_second]; + opt.population.col(id_first) = opt.population.col(id_second); } } return false; From eb1a86c84e8a677a453002ad5c30db593ee01d7d Mon Sep 17 00:00:00 2001 From: Sh3xe Date: Fri, 25 Jul 2025 20:33:33 +0200 Subject: [PATCH 20/22] fixed broken algorithms --- fdaPDE/src/optimization/conjugate_gradient.h | 4 +- .../src/optimization/genetic_mutation_ops.h | 20 +- fdaPDE/src/optimization/lbfgs.h | 248 +++++++++++---- fdaPDE/src/optimization/nelder_mead.h | 297 +++++++++++------- 4 files changed, 374 insertions(+), 195 deletions(-) diff --git a/fdaPDE/src/optimization/conjugate_gradient.h b/fdaPDE/src/optimization/conjugate_gradient.h index 10eeadd5..d7ff7326 100644 --- a/fdaPDE/src/optimization/conjugate_gradient.h +++ b/fdaPDE/src/optimization/conjugate_gradient.h @@ -100,11 +100,11 @@ template class conjugate_gradient_impl { }; struct fletcher_reeves_update { - template double operator()(const Opt& opt) { return opt.grad_new.norm() / opt.grad_old.norm(); } + template double operator()(const Opt& opt) { return opt.grad_new.norm() / opt.grad_old.dot(opt.grad_old); } }; struct polak_ribiere_update { template double operator()(const Opt& opt) { - return opt.grad_new.dot(opt.grad_new - opt.grad_old) / opt.grad_old.norm(); + return opt.grad_new.dot(opt.grad_new - opt.grad_old) / opt.grad_old.dot(opt.grad_old); } }; diff --git a/fdaPDE/src/optimization/genetic_mutation_ops.h b/fdaPDE/src/optimization/genetic_mutation_ops.h index 8f38af4c..943d23ee 100644 --- a/fdaPDE/src/optimization/genetic_mutation_ops.h +++ b/fdaPDE/src/optimization/genetic_mutation_ops.h @@ -57,26 +57,24 @@ class GaussianMutation { class CrossoverMutation { private: - std::uniform_int_distribution distribution_{0,10}; + std::uniform_real_distribution distribution_{0.0,1.0}; + double mutation_probability_ = 0.3; public: // constructors - CrossoverMutation() = default; + CrossoverMutation(double mutation_probability = 0.3) : mutation_probability_(mutation_probability) {} template bool sync_hook(Opt& opt) { - distribution_ = std::uniform_int_distribution(0, static_cast(opt.population.rows())); return false; } template bool mutate_hook(Opt& opt) { - for(int i = 0; i < opt.population.cols() - 1; i += 2) { - auto &parent_1 = opt.population.col(i); - auto &parent_2 = opt.population.col(i+1); - int k = distribution_(opt.rng); - for(int j = k; j < opt.population.rows(); ++j) { - double coeff = (parent_1[j] + parent_2[j])/2.0; - parent_1[j] = coeff; - parent_2[j] = coeff; + for(int j = 0; j < opt.population.cols() - 1; j += 2) + for(int i = 0; i < opt.population.rows(); ++i) { + if( distribution_(opt.rng) < mutation_probability_) { + double coeff = (opt.population(i, j) + opt.population(i, j+1))/2.0; + opt.population(i, j) = coeff; + opt.population(i, j+1) = coeff; } } return false; diff --git a/fdaPDE/src/optimization/lbfgs.h b/fdaPDE/src/optimization/lbfgs.h index 0e596afe..433e6f23 100644 --- a/fdaPDE/src/optimization/lbfgs.h +++ b/fdaPDE/src/optimization/lbfgs.h @@ -21,109 +21,233 @@ namespace fdapde { + +/** + * @brief Implementation of Broyden–Fletcher–Goldfarb–Shanno algorithm for unconstrained nonlinear optimization + * + * @tparam N dimension of the problem + * @tparam Args list of callbacks that will be called at different points of the algorithm. + */ template class LBFGS { - private: +private: using vector_t = std::conditional_t, Eigen::Matrix>; - using matrix_t = - std::conditional_t, Eigen::Matrix>; + using matrix_t = std::conditional_t, Eigen::Matrix>; vector_t optimum_; - double value_; // objective value at optimum - int n_iter_ = 0; // current iteration number - std::vector values_; // explored objective values during optimization - - int max_iter_; // maximum number of iterations before forced stop - double tol_; // tolerance on error before forced stop - double step_; // update step - int mem_size_ = 10; // number of vector used for approximating the objective hessian - Eigen::Matrix grad_mem_, x_mem_; - public: + double value_; // objective value at optimum + int n_iter_ = 0; // current iteration number + std::vector values_; // explored objective values during optimization + + int max_iter_; // maximum number of iterations before forced stop + double tol_; // tolerance on error before forced stop + double step_; // update step + int mem_size_ = 10; // number of vector used for the hessian approx + + Eigen::Matrix delta_grad_memory_, delta_x_memory_; + +public: static constexpr bool gradient_free = false; static constexpr int static_input_size = N; vector_t x_old, x_new, update, grad_old, grad_new; double h; + +private: + vector_t compute_hess_inv_grad_mul_(const vector_t &grad, double initial_coeff) { + vector_t q = grad; + + // If the iteration count is too low, then we cannot find [mem_size_] vectors, + // so we instead use the last [n_iter_] to compute the approximation of the hessian. + int current_memory = n_iter_ < mem_size_ ? n_iter_: mem_size_; + + Eigen::VectorXd alphas (current_memory); + alphas.setZero(); + + for(int i = 0; std::cmp_less(i, current_memory); ++i) { + int k = (n_iter_ + mem_size_ - i - 1) % mem_size_; + vector_t delta_x = delta_x_memory_.col(k); + vector_t delta_grad = delta_grad_memory_.col(k); + + alphas(i) = delta_x.dot(q) / delta_grad.dot(delta_x); + q -= alphas(i) * delta_grad; + } + + // H_0^k (Our initial guess as the inverse hessian at x_old) is chosen to be + // an identity matrix times a constant anyways so we don't have to store any matrix + vector_t r = initial_coeff /* \times Identity \times */ * q; + + for(int i = current_memory - 1; i >= 0; --i) { + int k = (n_iter_ + mem_size_ - i - 1) % mem_size_; + vector_t delta_x = delta_x_memory_.col(k); + vector_t delta_grad = delta_grad_memory_.col(k); + auto beta = delta_grad.dot(r) / delta_grad.dot(delta_x); + r += delta_x*(alphas(i) - beta); + } + + return r; + } + +public: // constructors - LBFGS() : max_iter_(500), tol_(1e-5), step_(1e-2) { } - LBFGS(int max_iter, double tol, double step, int mem_size) : - max_iter_(max_iter), tol_(tol), step_(step), mem_size_(mem_size) { + LBFGS(): + max_iter_(500), tol_(1e-5), step_(1e-2), mem_size_(5) { + } + + /** + * @brief Construct a new LBFGS instance + * + * @param max_iter maximum number of iterations performed + * @param memory_size size of the memory used to approximate the inverse hessian matrix in the LBFGH alg. + * @param tol tolerance used as the stoping criterion (|\nabla f_k|_2 < tol) + * @param step default step used, may be overwritten by the line search method + */ + LBFGS(int max_iter, double tol, double step, int memory_size) : + max_iter_(max_iter), + mem_size_(memory_size), + tol_(tol), step_(step){ fdapde_assert(mem_size_ >= 0); } + + // copy semantic LBFGS(const LBFGS& other) : - max_iter_(other.max_iter_), tol_(other.tol_), step_(other.step_), mem_size_(other.mem_size_) { } + mem_size_(other.mem_size_), + max_iter_(other.max_iter_), + n_iter_(other.n_iter_), + tol_(other.tol_), + delta_grad_memory_(other.delta_grad_memory_), + delta_x_memory_(other.delta_x_memory_), + value_(other.value_), + step_(other.step_), + optimum_(other.optimum_), + values_(other.values_) { + } + LBFGS& operator=(const LBFGS& other) { + mem_size_ = other.mem_size_; max_iter_ = other.max_iter_; + n_iter_ = other.n_iter_; tol_ = other.tol_; + delta_grad_memory_ = other.delta_grad_memory_; + delta_x_memory_ = other.delta_x_memory_; + value_ = other.value_; step_ = other.step_; - mem_size_ = other.mem_size_; + optimum_ = other.optimum_; + values_ = other.values_; return *this; } + + /** + * @brief Minimizes a function using the L-BFGS method + * + * @tparam ObjectiveT Type of the function to be minimized + * @tparam Functor Types of the callbacks + * @param objective function to be minimized + * @param x0 initial point + * @param func + */ template - vector_t optimize(ObjectiveT&& objective, const vector_t& x0, Callbacks&&... callbacks) { + vector_t optimize( + ObjectiveT&& objective, + const vector_t& x0, + Callbacks&&... callbacks + ) { fdapde_static_assert( std::is_same().operator()(vector_t())) FDAPDE_COMMA double>::value, INVALID_CALL_TO_OPTIMIZE__OBJECTIVE_FUNCTOR_NOT_CALLABLE_AT_VECTOR_TYPE); - constexpr double NaN = std::numeric_limits::quiet_NaN(); + + // whether or not the callbacks's stoping criteria are met + bool stop = false; + + // Callback management std::tuple callbacks_ {callbacks...}; - bool stop = false; // asserted true in case of forced stop - double error = std::numeric_limits::max(); - double gamma = 1.0; - int size = N == Dynamic ? x0.rows() : N; + + // Initialize the state + vector_t zero; + if constexpr (N == Dynamic) { + zero = vector_t::Zero(x0.rows()); + } else { + zero = vector_t::Zero(); + } + double error = 0; auto grad = objective.gradient(); - h = step_; n_iter_ = 0; - x_old = x0, x_new = vector_t::Constant(size, NaN); - grad_old = grad(x_old), grad_new = vector_t::Constant(size, NaN); - update = -grad_old; + update = zero; + h = step_; + x_old = x0, x_new = x0; + delta_x_memory_.resize(x0.rows(), mem_size_); + delta_grad_memory_.resize(x0.rows(), mem_size_); + + // Stationarity test + grad_old = grad(x_old); stop |= internals::exec_grad_hooks(*this, objective, callbacks_); + if (grad_old.isApprox(zero)) { + optimum_ = x_old; + value_ = objective(optimum_); + return optimum_; + } error = grad_old.norm(); - values_.push_back(objective(x_old)); - x_mem_.resize(x0.rows(), mem_size_); - grad_mem_.resize(x0.rows(), mem_size_); - + + // state to calculate the (inverse Hessian * grad) approx + double initial_coeff = 1.0; + while (n_iter_ < max_iter_ && error > tol_ && !stop) { + // Update along descent direction + update = -compute_hess_inv_grad_mul_(grad_old, initial_coeff); stop |= internals::exec_adapt_hooks(*this, objective, callbacks_); - // update along descent direction x_new = x_old + h * update; + + // Stationarity test grad_new = grad(x_new); - // prepare next iteration - // update inverse Hessian approximation - int col_idx = n_iter_ % mem_size_; - grad_mem_.col(col_idx) = grad_new - grad_old; - x_mem_.col(col_idx) = x_new - x_old; - gamma = x_mem_.col(col_idx).dot(grad_mem_.col(col_idx)) / grad_mem_.col(col_idx).norm(); - // compute update direction - vector_t q = grad_new; - n_iter_++; - int current_mem = n_iter_ < mem_size_ ? n_iter_: mem_size_; - std::vector alpha(current_mem, 0); - for (int i = 0; std::cmp_less(i, current_mem); ++i) { - int k = (n_iter_ + mem_size_ - i - 1) % mem_size_; - alpha[i] = x_mem_.col(k).dot(q) / grad_mem_.col(k).dot(x_mem_.col(k)); - q -= alpha[i] * grad_mem_.col(k); - } - // H_0^k = I (initial guess of the inverse hessian) - update = -gamma * q; - for (int i = current_mem - 1; i >= 0; --i) { - int k = (n_iter_ + mem_size_ - i - 1) % mem_size_; - double beta = grad_mem_.col(k).dot(update) / grad_mem_.col(k).dot(x_mem_.col(k)); - update -= x_mem_.col(k) * (alpha[i] + beta); + if (grad_new.isApprox(zero)) { + optimum_ = x_old; + value_ = objective(optimum_); + return optimum_; } - stop |= - (internals::exec_grad_hooks(*this, objective, callbacks_) || internals::exec_stop_if(*this, objective)); + + // Update the memory + auto x_diff = x_new - x_old; + auto grad_diff = grad_new - grad_old; + + delta_grad_memory_.col(n_iter_ % mem_size_) = grad_diff; + delta_x_memory_.col(n_iter_ % mem_size_) = x_diff; + + // Calculate the inverse Hessian approximation + initial_coeff = x_diff.dot(grad_diff) / grad_diff.dot(grad_diff); + + // Prepare next iteration + stop |= (internals::exec_grad_hooks(*this, objective, callbacks_) || internals::exec_stop_if(*this, objective)); + + error = grad_new.norm(); + values_.push_back(objective(x_old)); x_old = x_new; grad_old = grad_new; - error = grad_new.norm(); - values_.push_back(objective(x_old)); - } + ++n_iter_; + } + optimum_ = x_old; value_ = values_.back(); return optimum_; } - // observers + + /** + * @brief Returns the argmin of the last call to [optimize] + * + * @return vector_t + */ vector_t optimum() const { return optimum_; } + + /** + * @brief Returns the minimum of the last call to [optimize] + * + * @return double + */ double value() const { return value_; } + + /** + * @brief Returns the number of iterations performed by the method on the last call to [optimize] + * + * @return int + */ int n_iter() const { return n_iter_; } - const std::vector& values() const { return values_; } }; } // namespace fdapde diff --git a/fdaPDE/src/optimization/nelder_mead.h b/fdaPDE/src/optimization/nelder_mead.h index 3de00be5..121ff8d8 100644 --- a/fdaPDE/src/optimization/nelder_mead.h +++ b/fdaPDE/src/optimization/nelder_mead.h @@ -21,171 +21,228 @@ namespace fdapde { +// Implementation of Nelder-Mead algorithm for gradient-free unconstrained nonlinear optimization template class NelderMead { - private: +private: using vector_t = std::conditional_t, Eigen::Matrix>; - using matrix_t = - std::conditional_t, Eigen::Matrix>; + using matrix_t = std::conditional_t, Eigen::Matrix>; - vector_t optimum_; + vector_t optimum_; // Argmin of the optimum double value_; // objective value at optimum int max_iter_; // maximum number of iterations before forced stop - double tol_; // tolerance on error before forced stop int n_iter_ = 0; // current iteration number - int seed_; // employed for random centroid perturbation - double alpha_; // reflexion coeff - double beta_; // expension coeff - double gamma_; // outer contraction coeff - double delta_; // inner contraction coeff - - Eigen::Matrix simplex_; // simplex vertices - std::vector vertices_values_; // objective values at simplex vertices - std::vector vertices_rank_; // simplex vertices sorted from lower to higher objective value - public: - vector_t x_curr; - double obj_curr; + double tol_; // tolerance on error before forced stop + + std::normal_distribution normal_dist_ {0.0, 1.0}; + std::mt19937 rng_; + + std::vector simplex_; // Edges of the simplex + std::vector vertices_values_; // Value of each edge + std::vector vertices_rank_; // Index into the simplex vector, sorted from best to worst + + double alpha_ = 0.0; // Reflexion coeff + double beta_ = 0.0; // Expension coeff + double gamma_ = 0.0; // Outer contraction coeff + double delta_ = 0.0; // Inner contraction coeff + +private: + // Wessing, S. Proper initialization is crucial for the Nelder–Mead simplex search. + // Optim Lett 13, 847–856 (2019). https://doi.org/10.1007/s11590-018-1284-4 + void init_simplex_(const vector_t &x0) { + const int dimension = x0.rows(); + double infnty_norm = x0.cwiseAbs().maxCoeff(); + double scale_factor = std::min(std::max(infnty_norm, 1.0), 10.0); + double last_coeff = scale_factor * (1.0 - (double)std::sqrt(dimension+1))/(double)(dimension); + + // Initialises the vectors with cached values + simplex_.clear(); + vertices_rank_.clear(); + vertices_values_.clear(); + vertices_rank_.resize(dimension+1, 0); + vertices_values_.resize(dimension+1, std::numeric_limits::max()); + simplex_.resize(dimension+1, x0); + + for(int i = 0; i < dimension; ++i) { + simplex_[i][i] += scale_factor; + vertices_rank_[i+1] = i+1; + simplex_[dimension][i] += last_coeff; + } + + simplex_[dimension][dimension-1] += last_coeff; + } + +public: static constexpr bool gradient_free = true; static constexpr int static_input_size = N; + vector_t x_curr; + double obj_curr; + +public: + // constructors NelderMead() = default; - NelderMead(int max_iter, double tol, int seed = fdapde::random_seed) : - max_iter_(max_iter), tol_(tol), seed_(seed) { } + + NelderMead(int max_iter, double tol, unsigned int seed = 0u): + max_iter_(max_iter), + tol_(tol), + rng_(seed){ + } template vector_t optimize(ObjectiveT&& objective, const vector_t& x0, Callbacks&&... callbacks) { fdapde_static_assert( std::is_same().operator()(vector_t())) FDAPDE_COMMA double>::value, INVALID_CALL_TO_OPTIMIZE__OBJECTIVE_FUNCTOR_NOT_ACCEPTING_VECTORTYPE); + fdapde_assert(x0.rows() > 0); + std::tuple callbacks_ {callbacks...}; - double dims = x0.rows(); + bool stop = false; - bool shrink = false; + bool require_shrink = false; n_iter_ = 0; - vector_t centroid; - // adaptive parameters initialization - // Gao, F., Han, L. Implementing the Nelder-Mead simplex algorithm with adaptive parameters. Comput Optim Appl - // 51, 259–277 (2012). https://doi.org/10.1007/s10589-010-9329-3 - alpha_ = 1.0; - beta_ = 1.0 + 2. / dims; - gamma_ = 0.75 - 1. / (2 * dims); - delta_ = 1.0 - 1. / dims; - // simplex initialization - // Wessing, S. Proper initialization is crucial for the Nelder–Mead simplex search. Optim Lett 13, 847–856 - // (2019). https://doi.org/10.1007/s11590-018-1284-4 - double c_h = std::min(std::max(x0.template lpNorm(), 1.0), 10.0); - simplex_ = x0.replicate(1, dims + 1); - simplex_.block(0, 0, dims, dims) += vector_t::Constant(dims, c_h).asDiagonal(); - simplex_.col(dims).array() += c_h * (1.0 - std::sqrt(dims + 1)) / dims; - for (int i = 0; i < dims + 1; ++i) { - x_curr = simplex_.col(i); - obj_curr = objective(simplex_.col(i)); - stop |= internals::exec_eval_hooks(*this, objective, callbacks_); - vertices_values_.push_back(obj_curr); + vector_t zero; + const int dimension = x0.rows(); + fdapde_assert(dimension >= 1); + if constexpr (N == Dynamic) { + zero = vector_t::Zero(x0.rows()); + } else { + zero = vector_t::Zero(); } - for (int i = 0; i < dims + 1; ++i) { vertices_rank_.push_back(i); } - // sort vertices according to their objective value - std::sort(vertices_rank_.begin(), vertices_rank_.end(), [&](int a, int b) { - return vertices_values_[a] < vertices_values_[b]; - }); - - while (n_iter_ < max_iter_ && !stop) { - // centroid calculation (using the dims best vertices) - centroid.setZero(); - for (int i = 0; i < dims; ++i) { centroid += simplex_.col(vertices_rank_[i]); } - centroid /= dims; - // Fajfar, I., Bűrmen, Á. & Puhan, J. The Nelder–Mead simplex algorithm with perturbed centroid for - // high-dimensional function optimization. Optim Lett 13, 1011–1025 (2019). - // https://doi.org/10.1007/s11590-018-1306-2 - vector_t perturbed_centroid = centroid; - if (dims >= 10) { - vector_t random_delta = internals::gaussian_matrix(1, dims, 1.0, seed_); - random_delta /= random_delta.norm(); - perturbed_centroid += - 0.1 * random_delta * (simplex_.col(vertices_rank_[0]) - simplex_.col(vertices_rank_[dims])).norm(); + + // Gao, F., Han, L. Implementing the Nelder-Mead simplex algorithm with adaptive parameters. + // Comput Optim Appl 51, 259–277 (2012). https://doi.org/10.1007/s10589-010-9329-3 + alpha_ = 1.0; + beta_ = 1 + 2.0/(double)dimension; + gamma_ = 0.75- 1.0/(double)(2*dimension); + delta_ = 1.0 - 1.0/(double)dimension; + + // Initialise the simplex given x0 + init_simplex_(x0); + + fdapde_assert(vertices_values_.size() == simplex_.size()); + fdapde_assert(vertices_rank_.size() == simplex_.size()); + fdapde_assert(x0.rows()+1 == simplex_.size()); + + // Compute the vertices's values + for(int i = 0; i < simplex_.size(); ++i) + vertices_values_[i] = objective(simplex_[i]); + + while(!stop && n_iter_ < max_iter_) { + // Sort the vertices according to their objective value + std::sort(vertices_rank_.begin(), vertices_rank_.end(), [&](int a, int b) { + return vertices_values_[a] < vertices_values_[b]; + }); + + // Centroid calculation + vector_t centroid = zero; // centroid of the [dimension] best vertices + vector_t random_vect = zero; + for(int i = 0; i < dimension; ++i) { + centroid += simplex_[vertices_rank_[i]]; + if(dimension >= 10) { + random_vect[i] = normal_dist_(rng_); + } + } + centroid /= (double)(dimension); + if(dimension >= 10) { + random_vect /= random_vect.norm(); + } + + // Perturbation of the centroid to enhance performance for large dimensions + // Optim Lett 13, 1011–1025 (2019). https://doi.org/10.1007/s11590-018-1306-2 + vector_t perturbed_centroid = centroid; + if(false) { + perturbed_centroid += 0.1 * random_vect * (simplex_[vertices_rank_[0]] - simplex_[vertices_rank_[dimension]]).norm(); } - vector_t xr = perturbed_centroid + alpha_ * (perturbed_centroid - simplex_.col(vertices_rank_[dims])); - - // simplex update - shrink = false; - double xr_val = objective(xr); // (perturbed) centroid objective value - double fb_val = vertices_values_[vertices_rank_[0]]; - double fw_val = vertices_values_[vertices_rank_[dims]]; - double sw_val = vertices_values_[vertices_rank_[dims - 1]]; - x_curr = xr; - obj_curr = xr_val; + vector_t xr = perturbed_centroid + alpha_*(perturbed_centroid - simplex_[vertices_rank_[dimension]]); + + // Cache the values used for the if statements + require_shrink = false; + const double xr_val = objective(xr); + const double best_val = vertices_values_[vertices_rank_[0]]; + const double worst_val = vertices_values_[vertices_rank_[dimension]]; + const double second_worst_val = vertices_values_[vertices_rank_[dimension-1]]; + stop |= internals::exec_eval_hooks(*this, objective, callbacks_); - - if (fb_val <= xr_val && xr_val < sw_val) { - // reflection - simplex_.col(vertices_rank_[dims]) = xr; - vertices_values_[vertices_rank_[dims]] = xr_val; - } else if (xr_val < fb_val) { - // expansion - vector_t xe = perturbed_centroid + beta_ * (xr - perturbed_centroid); + + // Compute the new simplex + if( best_val <= xr_val && xr_val < second_worst_val ) { + // Reflexion + simplex_[vertices_rank_[dimension]] = xr; + vertices_values_[vertices_rank_[dimension]] = xr_val; + } else if( xr_val < best_val ) { + // Expansion + vector_t xe = perturbed_centroid + beta_*(xr - perturbed_centroid); double xe_val = objective(xe); - x_curr = xe; - obj_curr = xe_val; stop |= internals::exec_eval_hooks(*this, objective, callbacks_); - simplex_.col(vertices_rank_[dims]) = xe_val < xr_val ? xe : xr; - vertices_values_[vertices_rank_[dims]] = xe_val < xr_val ? xe_val : xr_val; - } else if (sw_val <= xr_val && xr_val < fw_val) { - // outer contraction + if(xe_val < xr_val) { + simplex_[vertices_rank_[dimension]] = xe; + vertices_values_[vertices_rank_[dimension]] = xe_val; + } else { + simplex_[vertices_rank_[dimension]] = xr; + vertices_values_[vertices_rank_[dimension]] = xr_val; + } + } else if( second_worst_val <= xr_val < worst_val ) { + // Outer Contraction vector_t xoc = centroid + gamma_ * (xr - centroid); double xoc_val = objective(xoc); - x_curr = xoc; - obj_curr = xoc_val; stop |= internals::exec_eval_hooks(*this, objective, callbacks_); if (xoc_val <= xr_val) { - simplex_.col(vertices_rank_[dims]) = xoc; - vertices_values_[vertices_rank_[dims]] = xoc_val; + simplex_[vertices_rank_[dimension]] = xoc; + vertices_values_[vertices_rank_[dimension]] = xoc_val; } else { - shrink = true; + require_shrink = true; } } else { - // inner contraction + // Inner Contraction vector_t xic = centroid - gamma_ * (xr - centroid); double xic_val = objective(xic); - x_curr = xic; - obj_curr = xic_val; stop |= internals::exec_eval_hooks(*this, objective, callbacks_); - if (xic_val < fb_val) { - simplex_.col(vertices_rank_[dims]) = xic; - vertices_values_[vertices_rank_[dims]] = xic_val; + if( xic_val < best_val) { + simplex_[vertices_rank_[dimension]] = xic; + vertices_values_[vertices_rank_[dimension]] = xic_val; } else { - shrink = true; + require_shrink = true; } } - // shrink - if (shrink) { - auto best_vertex = simplex_.col(vertices_rank_[0]); - for (int i = 1; i < dims + 1; ++i) { - simplex_.col(vertices_rank_[i]) = - best_vertex + delta_ * (simplex_.col(vertices_rank_[i]) - best_vertex); + + // Shrink + if(require_shrink) { + const vector_t &best_vertex = simplex_[vertices_rank_[0]]; + for(int i = 1; i < dimension + 1; ++i) { + simplex_[vertices_rank_[i]] = best_vertex + delta_ * (simplex_[vertices_rank_[i]] - best_vertex); } } - // prepare next iteration - double mean = std::accumulate(vertices_values_.begin(), vertices_values_.end(), 0.0) / (dims + 1); - double std_dev = - std::accumulate(vertices_values_.begin(), vertices_values_.end(), 0.0, [&](double a, double b) { - return a + (std::pow(b - mean, 2)); - }); - std_dev = std::sqrt(std_dev / dims); - stop = std_dev < tol_; - // sort vertices according to their objective value - std::sort(vertices_rank_.begin(), vertices_rank_.end(), [&](int a, int b) { - return vertices_values_[a] < vertices_values_[b]; - }); - stop |= internals::exec_stop_if(*this, objective); - n_iter_++; + // Stoping criterion + stop |= internals::exec_stop_if(*this, objective); + double value_mean = 0.0; + for(double val: vertices_values_) { + value_mean += val; + } + value_mean /= (double)vertices_values_.size(); + + double std_dev = 0.0; + for(double val: vertices_values_) { + double x = val - value_mean; + std_dev += x*x; + } + std_dev /= (double)(dimension); + std_dev = std::sqrt(std_dev); + + if(std_dev <= tol_) { + stop = true; + } + + ++n_iter_; } - optimum_ = simplex_.col(vertices_rank_[0]); + + optimum_ = simplex_[0]; value_ = objective(optimum_); return optimum_; } - // getters - const vector_t& optimum() const { return optimum_; } + + vector_t optimum() const { return optimum_; } double value() const { return value_; } int n_iter() const { return n_iter_; } }; From c2aff9831d6cdbbbc0961219aef242fc49f074be Mon Sep 17 00:00:00 2001 From: Sh3xe Date: Mon, 28 Jul 2025 18:13:10 +0200 Subject: [PATCH 21/22] updated nelder mead, fixed static_since var, added some const(s) --- .../src/optimization/genetic_mutation_ops.h | 3 +- fdaPDE/src/optimization/genetic_optim.h | 13 +- .../src/optimization/genetic_selection_ops.h | 22 +-- fdaPDE/src/optimization/nelder_mead.h | 177 ++++++++---------- fdaPDE/src/optimization/wolfe.h | 6 +- 5 files changed, 98 insertions(+), 123 deletions(-) diff --git a/fdaPDE/src/optimization/genetic_mutation_ops.h b/fdaPDE/src/optimization/genetic_mutation_ops.h index 943d23ee..4124acfa 100644 --- a/fdaPDE/src/optimization/genetic_mutation_ops.h +++ b/fdaPDE/src/optimization/genetic_mutation_ops.h @@ -38,7 +38,7 @@ class GaussianMutation { {} template bool sync_hook(Opt& opt) { - variance_ = initial_variance_; + variance_ = initial_variance_ * opt.population.rowwise().norm().mean(); return false; } @@ -49,7 +49,6 @@ class GaussianMutation { opt.population(i,j) += normal_dist_(opt.rng) * variance_; } - variance_ *= multiplier_; return false; } diff --git a/fdaPDE/src/optimization/genetic_optim.h b/fdaPDE/src/optimization/genetic_optim.h index 87080009..c1421679 100644 --- a/fdaPDE/src/optimization/genetic_optim.h +++ b/fdaPDE/src/optimization/genetic_optim.h @@ -114,15 +114,20 @@ template class GeneticOptim { // In the case of this algorithm, post_update... is the mutation process // so we apply it before the main loop stop |= internals::exec_mutate_hooks(*this, callbacks_); + + // Compute the fitness for selection + for(int i = 0; i < population_size_; ++i) { + population_fitness(i) = objective(population.col(i)); + } while (n_iter_ < max_iter_ && !stop) { + stop |= internals::exec_select_hooks(*this, callbacks_); + stop |= internals::exec_mutate_hooks(*this, callbacks_); + // Compute the fitness for selection for(int i = 0; i < population_size_; ++i) { population_fitness(i) = objective(population.col(i)); } - - stop |= internals::exec_select_hooks(*this, callbacks_); - stop |= internals::exec_mutate_hooks(*this, callbacks_); // Compute argmax of the population fitness int current_best = 0; @@ -133,6 +138,8 @@ template class GeneticOptim { if( std::abs(value_ - population_fitness(current_best)) < tol_) { ++static_since_; + } else { + static_since_ = 0; } // Stoping condition: haven't changed for a while diff --git a/fdaPDE/src/optimization/genetic_selection_ops.h b/fdaPDE/src/optimization/genetic_selection_ops.h index 25a88d43..60debc10 100644 --- a/fdaPDE/src/optimization/genetic_selection_ops.h +++ b/fdaPDE/src/optimization/genetic_selection_ops.h @@ -22,11 +22,10 @@ namespace fdapde { -template class RankSelection { private: - using vector_t = Eigen::Matrix; - using matrix_t = Eigen::Matrix; + using vector_t = Eigen::Matrix; + using matrix_t = Eigen::Matrix; std::vector population_order_; std::uniform_real_distribution<> distribution_{0.0, 1.0}; @@ -39,7 +38,7 @@ class RankSelection { * @param sample random uniform sample between 0 and 1 * @return int the rank associated with the sample */ - int index_of_sample(double sample) { + int index_of_sample(double sample) const { int upper_bound = cdf_.size() - 1; int lower_bound = 0; @@ -72,21 +71,16 @@ class RankSelection { population_order_.clear(); cdf_.clear(); - population_order_.reserve(opt.population.size()); + population_order_.reserve(opt.population.cols()); new_population_ = opt.population; - cdf_.reserve(opt.population.size()); + cdf_.reserve(opt.population.cols()); - constexpr double amax = 1.2; - constexpr double amin = 2.0 - amax; + const double denom = (opt.population.cols()+1)*(opt.population.cols()) * 0.5; // N(N+1)/2 double probability_sum = 0.0; for(int i = 0; i < opt.population.cols(); ++i) { - double pi_i = ( - (amax - (double)(amax - amin)*(i-1) - / - (double)(opt.population.cols()-1))/(double)(opt.population.cols()) - ); - + double pi_i = (opt.population.cols() - i) / denom; probability_sum += pi_i; + cdf_.push_back(probability_sum); population_order_.push_back(i); } diff --git a/fdaPDE/src/optimization/nelder_mead.h b/fdaPDE/src/optimization/nelder_mead.h index 121ff8d8..102b9ee1 100644 --- a/fdaPDE/src/optimization/nelder_mead.h +++ b/fdaPDE/src/optimization/nelder_mead.h @@ -27,50 +27,21 @@ template class NelderMead { using vector_t = std::conditional_t, Eigen::Matrix>; using matrix_t = std::conditional_t, Eigen::Matrix>; - vector_t optimum_; // Argmin of the optimum - double value_; // objective value at optimum - int max_iter_; // maximum number of iterations before forced stop - int n_iter_ = 0; // current iteration number - double tol_; // tolerance on error before forced stop - - std::normal_distribution normal_dist_ {0.0, 1.0}; - std::mt19937 rng_; - - std::vector simplex_; // Edges of the simplex - std::vector vertices_values_; // Value of each edge - std::vector vertices_rank_; // Index into the simplex vector, sorted from best to worst + vector_t optimum_; // Argmin of the optimum + double value_; // objective value at optimum + int max_iter_ = 500; // maximum number of iterations before forced stop + int n_iter_ = 0; // current iteration number + double tol_ = 1e-5; // tolerance on error before forced stop + + matrix_t simplex_; // Edges of the simplex + vector_t vertices_values_; // Value of each edge + std::vector vertices_rank_; // Index into the simplex vector, sorted from best to worst double alpha_ = 0.0; // Reflexion coeff double beta_ = 0.0; // Expension coeff double gamma_ = 0.0; // Outer contraction coeff double delta_ = 0.0; // Inner contraction coeff -private: - // Wessing, S. Proper initialization is crucial for the Nelder–Mead simplex search. - // Optim Lett 13, 847–856 (2019). https://doi.org/10.1007/s11590-018-1284-4 - void init_simplex_(const vector_t &x0) { - const int dimension = x0.rows(); - double infnty_norm = x0.cwiseAbs().maxCoeff(); - double scale_factor = std::min(std::max(infnty_norm, 1.0), 10.0); - double last_coeff = scale_factor * (1.0 - (double)std::sqrt(dimension+1))/(double)(dimension); - - // Initialises the vectors with cached values - simplex_.clear(); - vertices_rank_.clear(); - vertices_values_.clear(); - vertices_rank_.resize(dimension+1, 0); - vertices_values_.resize(dimension+1, std::numeric_limits::max()); - simplex_.resize(dimension+1, x0); - - for(int i = 0; i < dimension; ++i) { - simplex_[i][i] += scale_factor; - vertices_rank_[i+1] = i+1; - simplex_[dimension][i] += last_coeff; - } - - simplex_[dimension][dimension-1] += last_coeff; - } - public: static constexpr bool gradient_free = true; static constexpr int static_input_size = N; @@ -78,15 +49,11 @@ template class NelderMead { double obj_curr; public: - // constructors NelderMead() = default; - NelderMead(int max_iter, double tol, unsigned int seed = 0u): - max_iter_(max_iter), - tol_(tol), - rng_(seed){ - } + NelderMead(int max_iter, double tol): max_iter_(max_iter), tol_(tol) + {} template vector_t optimize(ObjectiveT&& objective, const vector_t& x0, Callbacks&&... callbacks) { @@ -109,52 +76,60 @@ template class NelderMead { } else { zero = vector_t::Zero(); } + vector_t centroid = zero; // centroid + vector_t xr = zero; // initial next guess + vector_t tmp = zero; // temporary vec for inner, outer contractions, etc. // Gao, F., Han, L. Implementing the Nelder-Mead simplex algorithm with adaptive parameters. // Comput Optim Appl 51, 259–277 (2012). https://doi.org/10.1007/s10589-010-9329-3 alpha_ = 1.0; - beta_ = 1 + 2.0/(double)dimension; - gamma_ = 0.75- 1.0/(double)(2*dimension); + beta_ = 1 + 2.0/(double)dimension; + gamma_ = 0.75 - 1.0/(double)(2*dimension); delta_ = 1.0 - 1.0/(double)dimension; // Initialise the simplex given x0 - init_simplex_(x0); + // Wessing, S. Proper initialization is crucial for the Nelder–Mead simplex search. + // Optim Lett 13, 847–856 (2019). https://doi.org/10.1007/s11590-018-1284-4 + { + double infnty_norm = x0.cwiseAbs().maxCoeff(); + double scale_factor = std::min(std::max(infnty_norm, 1.0), 10.0); + double last_coeff = scale_factor * (1.0 - (double)std::sqrt(dimension+1))/(double)(dimension); + + // Initialises the vectors with cached values + simplex_ = x0.rowwise().replicate(dimension+1); + vertices_values_ = vector_t::Constant(dimension+1, 1, std::numeric_limits::max()); + vertices_rank_.clear(); + vertices_rank_.resize(dimension+1, 0); - fdapde_assert(vertices_values_.size() == simplex_.size()); - fdapde_assert(vertices_rank_.size() == simplex_.size()); - fdapde_assert(x0.rows()+1 == simplex_.size()); + for(int i = 0; i < dimension; ++i) { + simplex_(i, i) += scale_factor; + vertices_rank_[i+1] = i+1; + simplex_(i, dimension) += last_coeff; + } + } + + fdapde_assert(vertices_values_.size() == simplex_.cols()); + fdapde_assert(vertices_rank_.size() == simplex_.cols()); + fdapde_assert(x0.rows()+1 == simplex_.cols()); // Compute the vertices's values - for(int i = 0; i < simplex_.size(); ++i) - vertices_values_[i] = objective(simplex_[i]); + for(int i = 0; i < simplex_.cols(); ++i) + vertices_values_[i] = objective(simplex_.col(i)); + + // Sort the vertices according to their objective value + std::sort(vertices_rank_.begin(), vertices_rank_.end(), [&](int a, int b) { + return vertices_values_[a] < vertices_values_[b]; + }); while(!stop && n_iter_ < max_iter_) { - // Sort the vertices according to their objective value - std::sort(vertices_rank_.begin(), vertices_rank_.end(), [&](int a, int b) { - return vertices_values_[a] < vertices_values_[b]; - }); - // Centroid calculation - vector_t centroid = zero; // centroid of the [dimension] best vertices - vector_t random_vect = zero; + centroid.setZero(); // centroid of the [dimension] best vertices for(int i = 0; i < dimension; ++i) { - centroid += simplex_[vertices_rank_[i]]; - if(dimension >= 10) { - random_vect[i] = normal_dist_(rng_); - } + centroid += simplex_.col(vertices_rank_[i]); } centroid /= (double)(dimension); - if(dimension >= 10) { - random_vect /= random_vect.norm(); - } - // Perturbation of the centroid to enhance performance for large dimensions - // Optim Lett 13, 1011–1025 (2019). https://doi.org/10.1007/s11590-018-1306-2 - vector_t perturbed_centroid = centroid; - if(false) { - perturbed_centroid += 0.1 * random_vect * (simplex_[vertices_rank_[0]] - simplex_[vertices_rank_[dimension]]).norm(); - } - vector_t xr = perturbed_centroid + alpha_*(perturbed_centroid - simplex_[vertices_rank_[dimension]]); + xr = centroid + alpha_*(centroid - simplex_.col(vertices_rank_[dimension])); // Cache the values used for the if statements require_shrink = false; @@ -168,38 +143,38 @@ template class NelderMead { // Compute the new simplex if( best_val <= xr_val && xr_val < second_worst_val ) { // Reflexion - simplex_[vertices_rank_[dimension]] = xr; + simplex_.col(vertices_rank_[dimension]) = xr; vertices_values_[vertices_rank_[dimension]] = xr_val; } else if( xr_val < best_val ) { // Expansion - vector_t xe = perturbed_centroid + beta_*(xr - perturbed_centroid); - double xe_val = objective(xe); + tmp = centroid + beta_*(xr - centroid); + double xe_val = objective(tmp); stop |= internals::exec_eval_hooks(*this, objective, callbacks_); if(xe_val < xr_val) { - simplex_[vertices_rank_[dimension]] = xe; + simplex_.col(vertices_rank_[dimension]) = tmp; vertices_values_[vertices_rank_[dimension]] = xe_val; } else { - simplex_[vertices_rank_[dimension]] = xr; + simplex_.col(vertices_rank_[dimension]) = xr; vertices_values_[vertices_rank_[dimension]] = xr_val; } } else if( second_worst_val <= xr_val < worst_val ) { // Outer Contraction - vector_t xoc = centroid + gamma_ * (xr - centroid); - double xoc_val = objective(xoc); + tmp = centroid + gamma_ * (xr - centroid); + double xoc_val = objective(tmp); stop |= internals::exec_eval_hooks(*this, objective, callbacks_); if (xoc_val <= xr_val) { - simplex_[vertices_rank_[dimension]] = xoc; + simplex_.col(vertices_rank_[dimension]) = tmp; vertices_values_[vertices_rank_[dimension]] = xoc_val; } else { require_shrink = true; } } else { // Inner Contraction - vector_t xic = centroid - gamma_ * (xr - centroid); - double xic_val = objective(xic); + tmp = centroid - gamma_ * (xr - centroid); + double xic_val = objective(tmp); stop |= internals::exec_eval_hooks(*this, objective, callbacks_); if( xic_val < best_val) { - simplex_[vertices_rank_[dimension]] = xic; + simplex_.col(vertices_rank_[dimension]) = tmp; vertices_values_[vertices_rank_[dimension]] = xic_val; } else { require_shrink = true; @@ -208,36 +183,36 @@ template class NelderMead { // Shrink if(require_shrink) { - const vector_t &best_vertex = simplex_[vertices_rank_[0]]; + const auto &best_vertex = simplex_.col(vertices_rank_[0]); for(int i = 1; i < dimension + 1; ++i) { - simplex_[vertices_rank_[i]] = best_vertex + delta_ * (simplex_[vertices_rank_[i]] - best_vertex); + simplex_.col(vertices_rank_[i]) = best_vertex + delta_ * (simplex_.col(vertices_rank_[i]) - best_vertex); + vertices_values_[vertices_rank_[i]] = objective(simplex_.col(vertices_rank_[i])); } } + // Sort the vertices according to their objective value + std::sort(vertices_rank_.begin(), vertices_rank_.end(), [&](int a, int b) { + return vertices_values_[a] < vertices_values_[b]; + }); + // Stoping criterion stop |= internals::exec_stop_if(*this, objective); - double value_mean = 0.0; - for(double val: vertices_values_) { - value_mean += val; - } - value_mean /= (double)vertices_values_.size(); - double std_dev = 0.0; + // I did not use std::accumulate because I beleive that it can become + // very slow depending on the container & formula + const double value_mean = vertices_values_.mean(); + double value_var = 0.0; for(double val: vertices_values_) { - double x = val - value_mean; - std_dev += x*x; - } - std_dev /= (double)(dimension); - std_dev = std::sqrt(std_dev); - - if(std_dev <= tol_) { - stop = true; + const double x = val - value_mean; + value_var += x*x; } + value_var /= (double)vertices_values_.rows(); + stop |= std::sqrt(value_var) <= tol_; ++n_iter_; } - optimum_ = simplex_[0]; + optimum_ = simplex_.col(vertices_rank_[0]); value_ = objective(optimum_); return optimum_; } diff --git a/fdaPDE/src/optimization/wolfe.h b/fdaPDE/src/optimization/wolfe.h index be861918..c06c7cf3 100644 --- a/fdaPDE/src/optimization/wolfe.h +++ b/fdaPDE/src/optimization/wolfe.h @@ -34,14 +34,14 @@ class WolfeLineSearch { WolfeLineSearch(double alpha, double c1, double c2) : alpha_(alpha), c1_(c1), c2_(c2) { } // bisection method for the weak Wolfe conditions. check "Jorge Nocedal, Stephen J. Wright (2006), Numerical - // Optimization, pag 58" + // Optimization, page 58 template bool adapt_hook(Opt& opt, Obj& obj) { fdapde_static_assert(is_gradient_based_opt_v, THIS_METHOD_IS_FOR_GRADIENT_BASED_OPTIMIZATION_ONLY); - double grad_0 = opt.grad_old.dot(opt.update), val_0 = obj(opt.x_old); + const double grad_0 = opt.grad_old.dot(opt.update), val_0 = obj(opt.x_old); auto grad = obj.gradient(); bool zooming = false; - double c1 = c1_, c2 = c2_; + const double c1 = c1_, c2 = c2_; double alpha_max = alpha_max_, alpha_min = alpha_min_; double alpha_prev = 0; double alpha_curr = alpha_; From b625741b5071fbe64cf55983318694467b18e02a Mon Sep 17 00:00:00 2001 From: Sh3xe Date: Mon, 11 Aug 2025 11:42:44 +0200 Subject: [PATCH 22/22] Updated CG, increased max iter for wolfe.h, fixed issue with lbfgs value update --- fdaPDE/src/optimization/bfgs.h | 7 +-- fdaPDE/src/optimization/conjugate_gradient.h | 53 +++++++++++++++---- .../src/optimization/genetic_mutation_ops.h | 25 ++++----- fdaPDE/src/optimization/genetic_optim.h | 10 ++-- fdaPDE/src/optimization/lbfgs.h | 5 +- fdaPDE/src/optimization/nelder_mead.h | 5 +- fdaPDE/src/optimization/wolfe.h | 8 +-- 7 files changed, 79 insertions(+), 34 deletions(-) diff --git a/fdaPDE/src/optimization/bfgs.h b/fdaPDE/src/optimization/bfgs.h index 141bc419..4bd769cf 100644 --- a/fdaPDE/src/optimization/bfgs.h +++ b/fdaPDE/src/optimization/bfgs.h @@ -70,9 +70,10 @@ template class BFGS { inv_hessian_ = matrix_t::Identity(size, size); } else { inv_hessian_ = matrix_t::Identity(); - } - update = -inv_hessian_ * grad_old; - stop |= internals::exec_grad_hooks(*this, objective, callbacks_); + } + values_.clear(); + update = -inv_hessian_ * grad_old; + stop |= internals::exec_grad_hooks(*this, objective, callbacks_); error = grad_old.norm(); values_.push_back(objective(x_old)); diff --git a/fdaPDE/src/optimization/conjugate_gradient.h b/fdaPDE/src/optimization/conjugate_gradient.h index d7ff7326..9fcf3606 100644 --- a/fdaPDE/src/optimization/conjugate_gradient.h +++ b/fdaPDE/src/optimization/conjugate_gradient.h @@ -36,20 +36,22 @@ template class conjugate_gradient_impl { int max_iter_; // maximum number of iterations before forced stop double tol_; // tolerance on error before forced stop double step_; // update step + bool restart_; public: static constexpr bool gradient_free = false; static constexpr int static_input_size = N; vector_t x_old, x_new, update, grad_old, grad_new; double h; // constructor - conjugate_gradient_impl() : max_iter_(500), tol_(1e-5), step_(1e-2) { } - conjugate_gradient_impl(int max_iter, double tol, double step) : max_iter_(max_iter), tol_(tol), step_(step) { } + conjugate_gradient_impl() : max_iter_(500), tol_(1e-5), step_(1e-2), restart_(true) { } + conjugate_gradient_impl(int max_iter, double tol, double step, bool restart) : max_iter_(max_iter), tol_(tol), step_(step), restart_(restart) { } conjugate_gradient_impl(const conjugate_gradient_impl& other) : - max_iter_(other.max_iter_), tol_(other.tol_), step_(other.step_) { } + max_iter_(other.max_iter_), tol_(other.tol_), step_(other.step_), restart_(other.restart_) { } conjugate_gradient_impl& operator=(const conjugate_gradient_impl& other) { max_iter_ = other.max_iter_; tol_ = other.tol_; step_ = other.step_; + restart_ = other.restart_; return *this; } template @@ -66,6 +68,7 @@ template class conjugate_gradient_impl { auto grad = objective.gradient(); h = step_; n_iter_ = 0; + value_ = std::numeric_limits::max(); x_old = x0, x_new = vector_t::Constant(size, NaN); grad_old = grad(x_old), grad_new = vector_t::Constant(size, NaN); update = -grad_old; @@ -75,21 +78,36 @@ template class conjugate_gradient_impl { while (n_iter_ < max_iter_ && error > tol_ && !stop) { stop |= internals::exec_adapt_hooks(*this, objective, callbacks_); + // update along descent direction x_new = x_old + h * update; grad_new = grad(x_new); // prepare next iteration - update = -grad_new + std::max(0.0, beta(*this)) * update; // update conjugate direction + if(restart_ && std::abs(grad_new.dot(grad_old))/ grad_new.dot(grad_new) >= 0.1 ) { + update = -grad_new; + } else { + update = -grad_new + std::max(0.0, beta(*this)) * update; // update conjugate direction + } + stop |= - (internals::exec_grad_hooks(*this, objective, callbacks_) || internals::exec_stop_if(*this, objective)); + (internals::exec_grad_hooks(*this, objective, callbacks_) || internals::exec_stop_if(*this, objective)); + x_old = x_new; grad_old = grad_new; error = grad_new.norm(); values_.push_back(objective(x_old)); n_iter_++; + + if(values_.back() <= value_) { + value_ = values_.back(); + optimum_ = x_old; + } + } + + if(values_.back() <= value_) { + value_ = values_.back(); + optimum_ = x_old; } - optimum_ = x_old; - value_ = values_.back(); return optimum_; } // observers @@ -102,12 +120,19 @@ template class conjugate_gradient_impl { struct fletcher_reeves_update { template double operator()(const Opt& opt) { return opt.grad_new.norm() / opt.grad_old.dot(opt.grad_old); } }; + struct polak_ribiere_update { template double operator()(const Opt& opt) { return opt.grad_new.dot(opt.grad_new - opt.grad_old) / opt.grad_old.dot(opt.grad_old); } }; +struct polak_ribiere_plus_update { + template double operator()(const Opt& opt) { + return std::max(opt.grad_new.dot(opt.grad_new - opt.grad_old) / opt.grad_old.dot(opt.grad_old), 0.0); + } +}; + } // namespace internals // public CG optimizers @@ -116,16 +141,26 @@ class FletcherReevesCG : public internals::conjugate_gradient_impl; public: FletcherReevesCG() = default; - FletcherReevesCG(int max_iter, double tol, double step) : Base(max_iter, tol, step) { } + FletcherReevesCG(int max_iter, double tol, double step, bool restart) : Base(max_iter, tol, step, restart) { } }; + template class PolakRibiereCG : public internals::conjugate_gradient_impl { using Base = internals::conjugate_gradient_impl; public: PolakRibiereCG() = default; - PolakRibiereCG(int max_iter, double tol, double step) : Base(max_iter, tol, step) { } + PolakRibiereCG(int max_iter, double tol, double step, bool restart) : Base(max_iter, tol, step, restart) { } +}; + +template +class PolakRibierePlsCG : public internals::conjugate_gradient_impl { + using Base = internals::conjugate_gradient_impl; + public: + PolakRibierePlsCG() = default; + PolakRibierePlsCG(int max_iter, double tol, double step, bool restart) : Base(max_iter, tol, step, restart) { } }; + } // namespace fdapde #endif // __FDAPDE_CONJUGATE_GRADIENT_H__ diff --git a/fdaPDE/src/optimization/genetic_mutation_ops.h b/fdaPDE/src/optimization/genetic_mutation_ops.h index 4124acfa..e2e2d16b 100644 --- a/fdaPDE/src/optimization/genetic_mutation_ops.h +++ b/fdaPDE/src/optimization/genetic_mutation_ops.h @@ -43,12 +43,7 @@ class GaussianMutation { } template bool mutate_hook(Opt& opt) { - // Perform binary tournament selection - for(int j = 0; j < opt.population.cols(); ++j) { - for(int i = 0; i < opt.population.rows(); ++i) - opt.population(i,j) += normal_dist_(opt.rng) * variance_; - } - + opt.population += internals::gaussian_matrix(opt.population.rows(), opt.population.cols(), variance_, opt.seed_); variance_ *= multiplier_; return false; } @@ -57,6 +52,7 @@ class GaussianMutation { class CrossoverMutation { private: std::uniform_real_distribution distribution_{0.0,1.0}; + std::uniform_int_distribution indices_distribution_ {0,1}; double mutation_probability_ = 0.3; public: @@ -64,16 +60,21 @@ class CrossoverMutation { CrossoverMutation(double mutation_probability = 0.3) : mutation_probability_(mutation_probability) {} template bool sync_hook(Opt& opt) { + indices_distribution_ = std::uniform_int_distribution(0, opt.population.cols()-1); return false; } template bool mutate_hook(Opt& opt) { - for(int j = 0; j < opt.population.cols() - 1; j += 2) - for(int i = 0; i < opt.population.rows(); ++i) { - if( distribution_(opt.rng) < mutation_probability_) { - double coeff = (opt.population(i, j) + opt.population(i, j+1))/2.0; - opt.population(i, j) = coeff; - opt.population(i, j+1) = coeff; + for(int j = 0; j < opt.population.cols() - 1; j += 2) { + // Find a random pair of "parents" + int partner_idx = indices_distribution_(opt.rng); + + for(int i = 0; i < opt.population.rows(); ++i) { + // With a certain probability, swap + if( distribution_(opt.rng) < mutation_probability_) { + double coeff = (opt.population(i, j) + opt.population(i, partner_idx))/2.0; + opt.population(i, j) = coeff; + } } } return false; diff --git a/fdaPDE/src/optimization/genetic_optim.h b/fdaPDE/src/optimization/genetic_optim.h index c1421679..9dc4aa53 100644 --- a/fdaPDE/src/optimization/genetic_optim.h +++ b/fdaPDE/src/optimization/genetic_optim.h @@ -32,10 +32,9 @@ template class GeneticOptim { int n_iter_ = 0; // current iteration number double tol_ = 1e-2; // tolerance int population_size_ = 100; // The size of any given generation - unsigned seed_ = 0; // Seed for the RNG int static_since_ = 0; // number of iterations that passed since last optimum value change int no_improvement_limit_ = 0; - + public: static constexpr bool gradient_free = true; static constexpr int static_input_size = N; @@ -44,8 +43,9 @@ template class GeneticOptim { vector_t population_fitness; std::mt19937 rng; - vector_t x_curr; - double obj_curr; + vector_t x_curr; + double obj_curr; + unsigned seed_ = 0; public: // constructors @@ -118,6 +118,7 @@ template class GeneticOptim { // Compute the fitness for selection for(int i = 0; i < population_size_; ++i) { population_fitness(i) = objective(population.col(i)); + stop |= internals::exec_eval_hooks(*this, objective, callbacks_); } while (n_iter_ < max_iter_ && !stop) { @@ -127,6 +128,7 @@ template class GeneticOptim { // Compute the fitness for selection for(int i = 0; i < population_size_; ++i) { population_fitness(i) = objective(population.col(i)); + stop |= internals::exec_eval_hooks(*this, objective, callbacks_); } // Compute argmax of the population fitness diff --git a/fdaPDE/src/optimization/lbfgs.h b/fdaPDE/src/optimization/lbfgs.h index 433e6f23..18986a34 100644 --- a/fdaPDE/src/optimization/lbfgs.h +++ b/fdaPDE/src/optimization/lbfgs.h @@ -169,6 +169,7 @@ template class LBFGS { } double error = 0; auto grad = objective.gradient(); + values_.clear(); n_iter_ = 0; update = zero; h = step_; @@ -198,7 +199,7 @@ template class LBFGS { // Stationarity test grad_new = grad(x_new); if (grad_new.isApprox(zero)) { - optimum_ = x_old; + optimum_ = x_new; value_ = objective(optimum_); return optimum_; } @@ -217,7 +218,7 @@ template class LBFGS { stop |= (internals::exec_grad_hooks(*this, objective, callbacks_) || internals::exec_stop_if(*this, objective)); error = grad_new.norm(); - values_.push_back(objective(x_old)); + values_.push_back(objective(x_new)); x_old = x_new; grad_old = grad_new; ++n_iter_; diff --git a/fdaPDE/src/optimization/nelder_mead.h b/fdaPDE/src/optimization/nelder_mead.h index 102b9ee1..fc67c0a7 100644 --- a/fdaPDE/src/optimization/nelder_mead.h +++ b/fdaPDE/src/optimization/nelder_mead.h @@ -113,8 +113,10 @@ template class NelderMead { fdapde_assert(x0.rows()+1 == simplex_.cols()); // Compute the vertices's values - for(int i = 0; i < simplex_.cols(); ++i) + for(int i = 0; i < simplex_.cols(); ++i) { vertices_values_[i] = objective(simplex_.col(i)); + stop |= internals::exec_eval_hooks(*this, objective, callbacks_); + } // Sort the vertices according to their objective value std::sort(vertices_rank_.begin(), vertices_rank_.end(), [&](int a, int b) { @@ -187,6 +189,7 @@ template class NelderMead { for(int i = 1; i < dimension + 1; ++i) { simplex_.col(vertices_rank_[i]) = best_vertex + delta_ * (simplex_.col(vertices_rank_[i]) - best_vertex); vertices_values_[vertices_rank_[i]] = objective(simplex_.col(vertices_rank_[i])); + stop |= internals::exec_eval_hooks(*this, objective, callbacks_); } } diff --git a/fdaPDE/src/optimization/wolfe.h b/fdaPDE/src/optimization/wolfe.h index c06c7cf3..28251c1f 100644 --- a/fdaPDE/src/optimization/wolfe.h +++ b/fdaPDE/src/optimization/wolfe.h @@ -25,14 +25,16 @@ namespace fdapde { // wolfe line search method for adaptive step size class WolfeLineSearch { private: - static constexpr int max_iter_ = 10; + int max_iter_ = 100; double alpha_ = 1.0; double alpha_max_ = std::numeric_limits::infinity(), alpha_min_ = 0; double c1_ = 1e-4, c2_ = 0.9; public: WolfeLineSearch() = default; - WolfeLineSearch(double alpha, double c1, double c2) : alpha_(alpha), c1_(c1), c2_(c2) { } - + WolfeLineSearch(double alpha, double c1, double c2) : alpha_(alpha), c1_(c1), c2_(c2){ } + WolfeLineSearch(double alpha, double c1, double c2, int max_iter) : alpha_(alpha), c1_(c1), c2_(c2), max_iter_(max_iter) { } + WolfeLineSearch(int max_iter) : max_iter_(max_iter) { } + // bisection method for the weak Wolfe conditions. check "Jorge Nocedal, Stephen J. Wright (2006), Numerical // Optimization, page 58 template bool adapt_hook(Opt& opt, Obj& obj) {