diff --git a/fdaPDE/optimization.h b/fdaPDE/optimization.h index 46a1cfbe..d4ae09fe 100644 --- a/fdaPDE/optimization.h +++ b/fdaPDE/optimization.h @@ -61,6 +61,8 @@ template static constexpr bool is_gradient_free_opt_v = is_gradie #include "src/optimization/callbacks.h" #include "src/optimization/backtracking.h" #include "src/optimization/wolfe.h" +#include "src/optimization/genetic_selection_ops.h" +#include "src/optimization/genetic_mutation_ops.h" // algorithms #include "src/optimization/grid_search.h" @@ -70,6 +72,7 @@ template static constexpr bool is_gradient_free_opt_v = is_gradie #include "src/optimization/bfgs.h" #include "src/optimization/lbfgs.h" #include "src/optimization/nelder_mead.h" +#include "src/optimization/genetic_optim.h" // clang-format on diff --git a/fdaPDE/src/linear_algebra/rp_chol.h b/fdaPDE/src/linear_algebra/rp_chol.h index 0b3bb8f3..6aca643a 100644 --- a/fdaPDE/src/linear_algebra/rp_chol.h +++ b/fdaPDE/src/linear_algebra/rp_chol.h @@ -65,9 +65,6 @@ class RpChol { } pivot_set_.merge(pivot_set); std::vector pivot_vec(pivot_set.begin(), pivot_set.end()); - - for(int kk : pivot_vec) std::cout << kk << " "; - std::cout << std::endl; // evaluate columns at pivot_set, remove overlap with previously choosen columns matrix_t G = A(Eigen::all, pivot_vec) - L_.leftCols(i) * L_(pivot_vec, Eigen::all).leftCols(i).transpose(); diff --git a/fdaPDE/src/optimization/bfgs.h b/fdaPDE/src/optimization/bfgs.h index 141bc419..4bd769cf 100644 --- a/fdaPDE/src/optimization/bfgs.h +++ b/fdaPDE/src/optimization/bfgs.h @@ -70,9 +70,10 @@ template class BFGS { inv_hessian_ = matrix_t::Identity(size, size); } else { inv_hessian_ = matrix_t::Identity(); - } - update = -inv_hessian_ * grad_old; - stop |= internals::exec_grad_hooks(*this, objective, callbacks_); + } + values_.clear(); + update = -inv_hessian_ * grad_old; + stop |= internals::exec_grad_hooks(*this, objective, callbacks_); error = grad_old.norm(); values_.push_back(objective(x_old)); diff --git a/fdaPDE/src/optimization/callbacks.h b/fdaPDE/src/optimization/callbacks.h index 5dd2d4dd..c09ad997 100644 --- a/fdaPDE/src/optimization/callbacks.h +++ b/fdaPDE/src/optimization/callbacks.h @@ -81,6 +81,48 @@ template bool exec_stop_if(Opt& optimizer, Obj& obj return b; } +template +bool exec_sync_hooks(Opt& optimizer, std::tuple& callbacks) { + return opt_hooks_loop( + [&](auto&& callback) { + if constexpr (requires(std::decay_t c, Opt opt) { + { c.sync_hook(opt) } -> std::same_as; + }) { + return callback.sync_hook(optimizer); + } + return false; + }, + callbacks); +} + +template +bool exec_mutate_hooks(Opt& optimizer, std::tuple& callbacks) { + return opt_hooks_loop( + [&](auto&& callback) { + if constexpr (requires(std::decay_t c, Opt opt) { + { c.mutate_hook(opt) } -> std::same_as; + }) { + return callback.mutate_hook(optimizer); + } + return false; + }, + callbacks); +} + +template +bool exec_select_hooks(Opt& optimizer, std::tuple& callbacks) { + return opt_hooks_loop( + [&](auto&& callback) { + if constexpr (requires(std::decay_t c, Opt opt) { + { c.select_hook(opt) } -> std::same_as; + }) { + return callback.select_hook(optimizer); + } + return false; + }, + callbacks); +} + } // namespace internals } // namespace fdapde diff --git a/fdaPDE/src/optimization/conjugate_gradient.h b/fdaPDE/src/optimization/conjugate_gradient.h index 10eeadd5..9fcf3606 100644 --- a/fdaPDE/src/optimization/conjugate_gradient.h +++ b/fdaPDE/src/optimization/conjugate_gradient.h @@ -36,20 +36,22 @@ template class conjugate_gradient_impl { int max_iter_; // maximum number of iterations before forced stop double tol_; // tolerance on error before forced stop double step_; // update step + bool restart_; public: static constexpr bool gradient_free = false; static constexpr int static_input_size = N; vector_t x_old, x_new, update, grad_old, grad_new; double h; // constructor - conjugate_gradient_impl() : max_iter_(500), tol_(1e-5), step_(1e-2) { } - conjugate_gradient_impl(int max_iter, double tol, double step) : max_iter_(max_iter), tol_(tol), step_(step) { } + conjugate_gradient_impl() : max_iter_(500), tol_(1e-5), step_(1e-2), restart_(true) { } + conjugate_gradient_impl(int max_iter, double tol, double step, bool restart) : max_iter_(max_iter), tol_(tol), step_(step), restart_(restart) { } conjugate_gradient_impl(const conjugate_gradient_impl& other) : - max_iter_(other.max_iter_), tol_(other.tol_), step_(other.step_) { } + max_iter_(other.max_iter_), tol_(other.tol_), step_(other.step_), restart_(other.restart_) { } conjugate_gradient_impl& operator=(const conjugate_gradient_impl& other) { max_iter_ = other.max_iter_; tol_ = other.tol_; step_ = other.step_; + restart_ = other.restart_; return *this; } template @@ -66,6 +68,7 @@ template class conjugate_gradient_impl { auto grad = objective.gradient(); h = step_; n_iter_ = 0; + value_ = std::numeric_limits::max(); x_old = x0, x_new = vector_t::Constant(size, NaN); grad_old = grad(x_old), grad_new = vector_t::Constant(size, NaN); update = -grad_old; @@ -75,21 +78,36 @@ template class conjugate_gradient_impl { while (n_iter_ < max_iter_ && error > tol_ && !stop) { stop |= internals::exec_adapt_hooks(*this, objective, callbacks_); + // update along descent direction x_new = x_old + h * update; grad_new = grad(x_new); // prepare next iteration - update = -grad_new + std::max(0.0, beta(*this)) * update; // update conjugate direction + if(restart_ && std::abs(grad_new.dot(grad_old))/ grad_new.dot(grad_new) >= 0.1 ) { + update = -grad_new; + } else { + update = -grad_new + std::max(0.0, beta(*this)) * update; // update conjugate direction + } + stop |= - (internals::exec_grad_hooks(*this, objective, callbacks_) || internals::exec_stop_if(*this, objective)); + (internals::exec_grad_hooks(*this, objective, callbacks_) || internals::exec_stop_if(*this, objective)); + x_old = x_new; grad_old = grad_new; error = grad_new.norm(); values_.push_back(objective(x_old)); n_iter_++; + + if(values_.back() <= value_) { + value_ = values_.back(); + optimum_ = x_old; + } + } + + if(values_.back() <= value_) { + value_ = values_.back(); + optimum_ = x_old; } - optimum_ = x_old; - value_ = values_.back(); return optimum_; } // observers @@ -100,11 +118,18 @@ template class conjugate_gradient_impl { }; struct fletcher_reeves_update { - template double operator()(const Opt& opt) { return opt.grad_new.norm() / opt.grad_old.norm(); } + template double operator()(const Opt& opt) { return opt.grad_new.norm() / opt.grad_old.dot(opt.grad_old); } }; + struct polak_ribiere_update { template double operator()(const Opt& opt) { - return opt.grad_new.dot(opt.grad_new - opt.grad_old) / opt.grad_old.norm(); + return opt.grad_new.dot(opt.grad_new - opt.grad_old) / opt.grad_old.dot(opt.grad_old); + } +}; + +struct polak_ribiere_plus_update { + template double operator()(const Opt& opt) { + return std::max(opt.grad_new.dot(opt.grad_new - opt.grad_old) / opt.grad_old.dot(opt.grad_old), 0.0); } }; @@ -116,16 +141,26 @@ class FletcherReevesCG : public internals::conjugate_gradient_impl; public: FletcherReevesCG() = default; - FletcherReevesCG(int max_iter, double tol, double step) : Base(max_iter, tol, step) { } + FletcherReevesCG(int max_iter, double tol, double step, bool restart) : Base(max_iter, tol, step, restart) { } }; + template class PolakRibiereCG : public internals::conjugate_gradient_impl { using Base = internals::conjugate_gradient_impl; public: PolakRibiereCG() = default; - PolakRibiereCG(int max_iter, double tol, double step) : Base(max_iter, tol, step) { } + PolakRibiereCG(int max_iter, double tol, double step, bool restart) : Base(max_iter, tol, step, restart) { } +}; + +template +class PolakRibierePlsCG : public internals::conjugate_gradient_impl { + using Base = internals::conjugate_gradient_impl; + public: + PolakRibierePlsCG() = default; + PolakRibierePlsCG(int max_iter, double tol, double step, bool restart) : Base(max_iter, tol, step, restart) { } }; + } // namespace fdapde #endif // __FDAPDE_CONJUGATE_GRADIENT_H__ diff --git a/fdaPDE/src/optimization/genetic_mutation_ops.h b/fdaPDE/src/optimization/genetic_mutation_ops.h new file mode 100644 index 00000000..e2e2d16b --- /dev/null +++ b/fdaPDE/src/optimization/genetic_mutation_ops.h @@ -0,0 +1,86 @@ + +// This file is part of fdaPDE, a C++ library for physics-informed +// spatial and functional data analysis. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +#ifndef __FDAPDE_GENETIC_MUTATION_OPS_H__ +#define __FDAPDE_GENETIC_MUTATION_OPS_H__ + +#include "header_check.h" + +namespace fdapde { + +class GaussianMutation { +private: + double initial_variance_ = 2.5; + double variance_ = 2.5; + double multiplier_ = 0.95; + std::normal_distribution normal_dist_{0.0,1.0}; + +public: + // constructors + GaussianMutation(double initial_variance = 2.5, double multiplier = 0.95): + initial_variance_(initial_variance), + variance_(initial_variance), + multiplier_(multiplier) + {} + + template bool sync_hook(Opt& opt) { + variance_ = initial_variance_ * opt.population.rowwise().norm().mean(); + return false; + } + + template bool mutate_hook(Opt& opt) { + opt.population += internals::gaussian_matrix(opt.population.rows(), opt.population.cols(), variance_, opt.seed_); + variance_ *= multiplier_; + return false; + } +}; + +class CrossoverMutation { +private: + std::uniform_real_distribution distribution_{0.0,1.0}; + std::uniform_int_distribution indices_distribution_ {0,1}; + double mutation_probability_ = 0.3; + +public: + // constructors + CrossoverMutation(double mutation_probability = 0.3) : mutation_probability_(mutation_probability) {} + + template bool sync_hook(Opt& opt) { + indices_distribution_ = std::uniform_int_distribution(0, opt.population.cols()-1); + return false; + } + + template bool mutate_hook(Opt& opt) { + for(int j = 0; j < opt.population.cols() - 1; j += 2) { + // Find a random pair of "parents" + int partner_idx = indices_distribution_(opt.rng); + + for(int i = 0; i < opt.population.rows(); ++i) { + // With a certain probability, swap + if( distribution_(opt.rng) < mutation_probability_) { + double coeff = (opt.population(i, j) + opt.population(i, partner_idx))/2.0; + opt.population(i, j) = coeff; + } + } + } + return false; + } +}; + +} // namespace fdapde + +#endif // __FDAPDE_GENETIC_MUTATION_OPS_H__ \ No newline at end of file diff --git a/fdaPDE/src/optimization/genetic_optim.h b/fdaPDE/src/optimization/genetic_optim.h new file mode 100644 index 00000000..9dc4aa53 --- /dev/null +++ b/fdaPDE/src/optimization/genetic_optim.h @@ -0,0 +1,184 @@ +// This file is part of fdaPDE, a C++ library for physics-informed +// spatial and functional data analysis. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +#ifndef __FDAPDE_GENETIC_OPTIM_H__ +#define __FDAPDE_GENETIC_OPTIM_H__ + +#include "header_check.h" + +namespace fdapde { + +template class GeneticOptim { +private: + using vector_t = std::conditional_t, Eigen::Matrix>; + using matrix_t = std::conditional_t, Eigen::Matrix>; + + vector_t optimum_; + double value_ = std::numeric_limits::max(); // objective value at optimum + int max_iter_ = 500; // maximum number of iterations before forced stop + int n_iter_ = 0; // current iteration number + double tol_ = 1e-2; // tolerance + int population_size_ = 100; // The size of any given generation + int static_since_ = 0; // number of iterations that passed since last optimum value change + int no_improvement_limit_ = 0; + +public: + static constexpr bool gradient_free = true; + static constexpr int static_input_size = N; + + matrix_t population; + vector_t population_fitness; + std::mt19937 rng; + + vector_t x_curr; + double obj_curr; + unsigned seed_ = 0; + +public: + // constructors + GeneticOptim() = default; + + GeneticOptim( + int max_iter, + double tol, + int no_improvement_limit, + int population_size, + unsigned seed = 0) : + seed_(seed), + no_improvement_limit_(no_improvement_limit), + max_iter_(max_iter), + tol_(tol), + population_size_(population_size), + rng(seed) { + fdapde_assert(population_size_ > 0); + } + + // copy semantic + GeneticOptim(const GeneticOptim& other) : + population_size_(other.population_size_), + tol_(other.tol_), + no_improvement_limit_(other.no_improvement_limit_), + population_fitness(other.population_fitness), + max_iter_(other.max_iter_), + seed_(other.seed_), + population(other.population){ + } + + GeneticOptim& operator=(const GeneticOptim& other) { + max_iter_ = other.max_iter_; + tol_ = other.tol_; + no_improvement_limit_ = other.no_improvement_limit_; + population_fitness = other.population_fitness; + population_size_ = other.population_size_; + seed_ = other.seed_; + population = other.population; + return *this; + } + + template + vector_t optimize(ObjectiveT&& objective, const vector_t& x0, Callbacks&&... callbacks) { + fdapde_static_assert( + std::is_same().operator()(vector_t())) FDAPDE_COMMA double>::value, + INVALID_CALL_TO_OPTIMIZE__OBJECTIVE_FUNCTOR_NOT_CALLABLE_AT_VECTOR_TYPE + ); + + std::tuple callbacks_ {callbacks...}; + + // whether or not the callbacks's stoping criteria are met + bool stop = false; + value_ = std::numeric_limits::max(); + n_iter_ = 0; + static_since_ = 0; + population = x0.rowwise().replicate(population_size_); + population_fitness = vector_t::Zero(population_size_, 1); + fdapde_assert(population_size_ == population.cols()); + fdapde_assert(population_size_ == population_fitness.rows()); + + // Reset the state of all callbacks and sync their state with the + // parameters in GeneticOptim + stop |= internals::exec_sync_hooks(*this, callbacks_); + + // In the case of this algorithm, post_update... is the mutation process + // so we apply it before the main loop + stop |= internals::exec_mutate_hooks(*this, callbacks_); + + // Compute the fitness for selection + for(int i = 0; i < population_size_; ++i) { + population_fitness(i) = objective(population.col(i)); + stop |= internals::exec_eval_hooks(*this, objective, callbacks_); + } + + while (n_iter_ < max_iter_ && !stop) { + stop |= internals::exec_select_hooks(*this, callbacks_); + stop |= internals::exec_mutate_hooks(*this, callbacks_); + + // Compute the fitness for selection + for(int i = 0; i < population_size_; ++i) { + population_fitness(i) = objective(population.col(i)); + stop |= internals::exec_eval_hooks(*this, objective, callbacks_); + } + + // Compute argmax of the population fitness + int current_best = 0; + for(int i = 1; i < population_size_; ++i) { + if(population_fitness(i) < population_fitness(current_best)) + current_best = i; + } + + if( std::abs(value_ - population_fitness(current_best)) < tol_) { + ++static_since_; + } else { + static_since_ = 0; + } + + // Stoping condition: haven't changed for a while + stop |= internals::exec_stop_if(*this, objective); + stop |= static_since_ > no_improvement_limit_; + + // Update + ++n_iter_; + value_ = population_fitness(current_best); + optimum_ = population.col(current_best); + } + + return optimum_; + } + + /** + * @brief Returns the argmin of the last call to [optimize] + * + * @return vector_t + */ + vector_t optimum() const { return optimum_; } + + /** + * @brief Returns the minimum of the last call to [optimize] + * + * @return double + */ + double value() const { return value_; } + + /** + * @brief Returns the number of iterations performed by the method on the last call to [optimize] + * + * @return int + */ + int n_iter() const { return n_iter_; } +}; + +} // namespace fdapde + +#endif // __FDAPDE_GENETIC_OPTIM_H__ diff --git a/fdaPDE/src/optimization/genetic_selection_ops.h b/fdaPDE/src/optimization/genetic_selection_ops.h new file mode 100644 index 00000000..60debc10 --- /dev/null +++ b/fdaPDE/src/optimization/genetic_selection_ops.h @@ -0,0 +1,150 @@ + +// This file is part of fdaPDE, a C++ library for physics-informed +// spatial and functional data analysis. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +#ifndef __FDAPDE_GENETIC_SELECTION_OPS_H__ +#define __FDAPDE_GENETIC_SELECTION_OPS_H__ + +#include "header_check.h" + +namespace fdapde { + +class RankSelection { +private: + using vector_t = Eigen::Matrix; + using matrix_t = Eigen::Matrix; + + std::vector population_order_; + std::uniform_real_distribution<> distribution_{0.0, 1.0}; + std::vector cdf_; + matrix_t new_population_; + + /** + * @brief Given a random number between 0 and 1, returns the index of selected rank with the probability of rank i \pi_i = (amax - (amax-amin)*(i-1)/(m-1))/m + * + * @param sample random uniform sample between 0 and 1 + * @return int the rank associated with the sample + */ + int index_of_sample(double sample) const { + int upper_bound = cdf_.size() - 1; + int lower_bound = 0; + + // Handle edge cases + if (sample <= cdf_[0]) { + return 0; + } + if (sample > cdf_[upper_bound]) { + return upper_bound; + } + + // Binary search to find the appropriate interval + while (lower_bound < upper_bound) { + int middle = lower_bound + (upper_bound - lower_bound) / 2; + if (cdf_[middle] < sample) { + lower_bound = middle + 1; + } else { + upper_bound = middle; + } + } + + return upper_bound; + } + +public: + // constructors + RankSelection() = default; + + template bool sync_hook(Opt& opt) { + population_order_.clear(); + cdf_.clear(); + + population_order_.reserve(opt.population.cols()); + new_population_ = opt.population; + cdf_.reserve(opt.population.cols()); + + const double denom = (opt.population.cols()+1)*(opt.population.cols()) * 0.5; // N(N+1)/2 + double probability_sum = 0.0; + for(int i = 0; i < opt.population.cols(); ++i) { + double pi_i = (opt.population.cols() - i) / denom; + probability_sum += pi_i; + + cdf_.push_back(probability_sum); + population_order_.push_back(i); + } + return false; + } + + template bool select_hook(Opt& opt) { + std::sort( + population_order_.begin(), + population_order_.end(), + [&](int a, int b) { + return opt.population_fitness(a) < opt.population_fitness(b); + } + ); + + for(int i = 0; i < opt.population.cols(); ++i) { + int selected = index_of_sample(distribution_(opt.rng)); + new_population_.col(i) = (opt.population.col(population_order_[selected])); + } + opt.population = new_population_; + + return false; + } +}; + +class BinaryTournamentSelection { +private: + std::uniform_int_distribution distribution_; + +public: + // constructors + BinaryTournamentSelection(int population_size = 10): + distribution_(0,population_size-1) + {} + + template bool sync_hook(Opt& opt) { + fdapde_assert(opt.population.size() >= 2); + distribution_ = std::uniform_int_distribution{0,static_cast(opt.population.cols())-1}; + return false; + } + + template bool select_hook(Opt& opt) { + fdapde_assert(distribution_.max() == opt.population.cols() - 1); + fdapde_assert(distribution_.min() == 0); + + // Perform binary tournament selection + for(int i = 0; i < opt.population.cols(); i += 1) { + int id_first = distribution_(opt.rng); + int id_second = distribution_(opt.rng); + if(id_first == id_second) + continue; + + double fit_first = opt.population_fitness(id_first); + double fit_second = opt.population_fitness(id_second); + if(fit_first <= fit_second) { + opt.population.col(id_second) = opt.population.col(id_first); + } else { + opt.population.col(id_first) = opt.population.col(id_second); + } + } + return false; + } +}; + +} // namespace fdapde + +#endif // __FDAPDE_GENETIC_SELECTION_OPS_H__ \ No newline at end of file diff --git a/fdaPDE/src/optimization/lbfgs.h b/fdaPDE/src/optimization/lbfgs.h index 0e596afe..18986a34 100644 --- a/fdaPDE/src/optimization/lbfgs.h +++ b/fdaPDE/src/optimization/lbfgs.h @@ -21,109 +21,234 @@ namespace fdapde { + +/** + * @brief Implementation of Broyden–Fletcher–Goldfarb–Shanno algorithm for unconstrained nonlinear optimization + * + * @tparam N dimension of the problem + * @tparam Args list of callbacks that will be called at different points of the algorithm. + */ template class LBFGS { - private: +private: using vector_t = std::conditional_t, Eigen::Matrix>; - using matrix_t = - std::conditional_t, Eigen::Matrix>; + using matrix_t = std::conditional_t, Eigen::Matrix>; vector_t optimum_; - double value_; // objective value at optimum - int n_iter_ = 0; // current iteration number - std::vector values_; // explored objective values during optimization - - int max_iter_; // maximum number of iterations before forced stop - double tol_; // tolerance on error before forced stop - double step_; // update step - int mem_size_ = 10; // number of vector used for approximating the objective hessian - Eigen::Matrix grad_mem_, x_mem_; - public: + double value_; // objective value at optimum + int n_iter_ = 0; // current iteration number + std::vector values_; // explored objective values during optimization + + int max_iter_; // maximum number of iterations before forced stop + double tol_; // tolerance on error before forced stop + double step_; // update step + int mem_size_ = 10; // number of vector used for the hessian approx + + Eigen::Matrix delta_grad_memory_, delta_x_memory_; + +public: static constexpr bool gradient_free = false; static constexpr int static_input_size = N; vector_t x_old, x_new, update, grad_old, grad_new; double h; + +private: + vector_t compute_hess_inv_grad_mul_(const vector_t &grad, double initial_coeff) { + vector_t q = grad; + + // If the iteration count is too low, then we cannot find [mem_size_] vectors, + // so we instead use the last [n_iter_] to compute the approximation of the hessian. + int current_memory = n_iter_ < mem_size_ ? n_iter_: mem_size_; + + Eigen::VectorXd alphas (current_memory); + alphas.setZero(); + + for(int i = 0; std::cmp_less(i, current_memory); ++i) { + int k = (n_iter_ + mem_size_ - i - 1) % mem_size_; + vector_t delta_x = delta_x_memory_.col(k); + vector_t delta_grad = delta_grad_memory_.col(k); + + alphas(i) = delta_x.dot(q) / delta_grad.dot(delta_x); + q -= alphas(i) * delta_grad; + } + + // H_0^k (Our initial guess as the inverse hessian at x_old) is chosen to be + // an identity matrix times a constant anyways so we don't have to store any matrix + vector_t r = initial_coeff /* \times Identity \times */ * q; + + for(int i = current_memory - 1; i >= 0; --i) { + int k = (n_iter_ + mem_size_ - i - 1) % mem_size_; + vector_t delta_x = delta_x_memory_.col(k); + vector_t delta_grad = delta_grad_memory_.col(k); + auto beta = delta_grad.dot(r) / delta_grad.dot(delta_x); + r += delta_x*(alphas(i) - beta); + } + + return r; + } + +public: // constructors - LBFGS() : max_iter_(500), tol_(1e-5), step_(1e-2) { } - LBFGS(int max_iter, double tol, double step, int mem_size) : - max_iter_(max_iter), tol_(tol), step_(step), mem_size_(mem_size) { + LBFGS(): + max_iter_(500), tol_(1e-5), step_(1e-2), mem_size_(5) { + } + + /** + * @brief Construct a new LBFGS instance + * + * @param max_iter maximum number of iterations performed + * @param memory_size size of the memory used to approximate the inverse hessian matrix in the LBFGH alg. + * @param tol tolerance used as the stoping criterion (|\nabla f_k|_2 < tol) + * @param step default step used, may be overwritten by the line search method + */ + LBFGS(int max_iter, double tol, double step, int memory_size) : + max_iter_(max_iter), + mem_size_(memory_size), + tol_(tol), step_(step){ fdapde_assert(mem_size_ >= 0); } + + // copy semantic LBFGS(const LBFGS& other) : - max_iter_(other.max_iter_), tol_(other.tol_), step_(other.step_), mem_size_(other.mem_size_) { } + mem_size_(other.mem_size_), + max_iter_(other.max_iter_), + n_iter_(other.n_iter_), + tol_(other.tol_), + delta_grad_memory_(other.delta_grad_memory_), + delta_x_memory_(other.delta_x_memory_), + value_(other.value_), + step_(other.step_), + optimum_(other.optimum_), + values_(other.values_) { + } + LBFGS& operator=(const LBFGS& other) { + mem_size_ = other.mem_size_; max_iter_ = other.max_iter_; + n_iter_ = other.n_iter_; tol_ = other.tol_; + delta_grad_memory_ = other.delta_grad_memory_; + delta_x_memory_ = other.delta_x_memory_; + value_ = other.value_; step_ = other.step_; - mem_size_ = other.mem_size_; + optimum_ = other.optimum_; + values_ = other.values_; return *this; } + + /** + * @brief Minimizes a function using the L-BFGS method + * + * @tparam ObjectiveT Type of the function to be minimized + * @tparam Functor Types of the callbacks + * @param objective function to be minimized + * @param x0 initial point + * @param func + */ template - vector_t optimize(ObjectiveT&& objective, const vector_t& x0, Callbacks&&... callbacks) { + vector_t optimize( + ObjectiveT&& objective, + const vector_t& x0, + Callbacks&&... callbacks + ) { fdapde_static_assert( std::is_same().operator()(vector_t())) FDAPDE_COMMA double>::value, INVALID_CALL_TO_OPTIMIZE__OBJECTIVE_FUNCTOR_NOT_CALLABLE_AT_VECTOR_TYPE); - constexpr double NaN = std::numeric_limits::quiet_NaN(); + + // whether or not the callbacks's stoping criteria are met + bool stop = false; + + // Callback management std::tuple callbacks_ {callbacks...}; - bool stop = false; // asserted true in case of forced stop - double error = std::numeric_limits::max(); - double gamma = 1.0; - int size = N == Dynamic ? x0.rows() : N; + + // Initialize the state + vector_t zero; + if constexpr (N == Dynamic) { + zero = vector_t::Zero(x0.rows()); + } else { + zero = vector_t::Zero(); + } + double error = 0; auto grad = objective.gradient(); - h = step_; + values_.clear(); n_iter_ = 0; - x_old = x0, x_new = vector_t::Constant(size, NaN); - grad_old = grad(x_old), grad_new = vector_t::Constant(size, NaN); - update = -grad_old; + update = zero; + h = step_; + x_old = x0, x_new = x0; + delta_x_memory_.resize(x0.rows(), mem_size_); + delta_grad_memory_.resize(x0.rows(), mem_size_); + + // Stationarity test + grad_old = grad(x_old); stop |= internals::exec_grad_hooks(*this, objective, callbacks_); + if (grad_old.isApprox(zero)) { + optimum_ = x_old; + value_ = objective(optimum_); + return optimum_; + } error = grad_old.norm(); - values_.push_back(objective(x_old)); - x_mem_.resize(x0.rows(), mem_size_); - grad_mem_.resize(x0.rows(), mem_size_); - + + // state to calculate the (inverse Hessian * grad) approx + double initial_coeff = 1.0; + while (n_iter_ < max_iter_ && error > tol_ && !stop) { + // Update along descent direction + update = -compute_hess_inv_grad_mul_(grad_old, initial_coeff); stop |= internals::exec_adapt_hooks(*this, objective, callbacks_); - // update along descent direction x_new = x_old + h * update; + + // Stationarity test grad_new = grad(x_new); - // prepare next iteration - // update inverse Hessian approximation - int col_idx = n_iter_ % mem_size_; - grad_mem_.col(col_idx) = grad_new - grad_old; - x_mem_.col(col_idx) = x_new - x_old; - gamma = x_mem_.col(col_idx).dot(grad_mem_.col(col_idx)) / grad_mem_.col(col_idx).norm(); - // compute update direction - vector_t q = grad_new; - n_iter_++; - int current_mem = n_iter_ < mem_size_ ? n_iter_: mem_size_; - std::vector alpha(current_mem, 0); - for (int i = 0; std::cmp_less(i, current_mem); ++i) { - int k = (n_iter_ + mem_size_ - i - 1) % mem_size_; - alpha[i] = x_mem_.col(k).dot(q) / grad_mem_.col(k).dot(x_mem_.col(k)); - q -= alpha[i] * grad_mem_.col(k); - } - // H_0^k = I (initial guess of the inverse hessian) - update = -gamma * q; - for (int i = current_mem - 1; i >= 0; --i) { - int k = (n_iter_ + mem_size_ - i - 1) % mem_size_; - double beta = grad_mem_.col(k).dot(update) / grad_mem_.col(k).dot(x_mem_.col(k)); - update -= x_mem_.col(k) * (alpha[i] + beta); + if (grad_new.isApprox(zero)) { + optimum_ = x_new; + value_ = objective(optimum_); + return optimum_; } - stop |= - (internals::exec_grad_hooks(*this, objective, callbacks_) || internals::exec_stop_if(*this, objective)); + + // Update the memory + auto x_diff = x_new - x_old; + auto grad_diff = grad_new - grad_old; + + delta_grad_memory_.col(n_iter_ % mem_size_) = grad_diff; + delta_x_memory_.col(n_iter_ % mem_size_) = x_diff; + + // Calculate the inverse Hessian approximation + initial_coeff = x_diff.dot(grad_diff) / grad_diff.dot(grad_diff); + + // Prepare next iteration + stop |= (internals::exec_grad_hooks(*this, objective, callbacks_) || internals::exec_stop_if(*this, objective)); + + error = grad_new.norm(); + values_.push_back(objective(x_new)); x_old = x_new; grad_old = grad_new; - error = grad_new.norm(); - values_.push_back(objective(x_old)); - } + ++n_iter_; + } + optimum_ = x_old; value_ = values_.back(); return optimum_; } - // observers + + /** + * @brief Returns the argmin of the last call to [optimize] + * + * @return vector_t + */ vector_t optimum() const { return optimum_; } + + /** + * @brief Returns the minimum of the last call to [optimize] + * + * @return double + */ double value() const { return value_; } + + /** + * @brief Returns the number of iterations performed by the method on the last call to [optimize] + * + * @return int + */ int n_iter() const { return n_iter_; } - const std::vector& values() const { return values_; } }; } // namespace fdapde diff --git a/fdaPDE/src/optimization/nelder_mead.h b/fdaPDE/src/optimization/nelder_mead.h index 3de00be5..fc67c0a7 100644 --- a/fdaPDE/src/optimization/nelder_mead.h +++ b/fdaPDE/src/optimization/nelder_mead.h @@ -21,171 +21,206 @@ namespace fdapde { +// Implementation of Nelder-Mead algorithm for gradient-free unconstrained nonlinear optimization template class NelderMead { - private: +private: using vector_t = std::conditional_t, Eigen::Matrix>; - using matrix_t = - std::conditional_t, Eigen::Matrix>; - - vector_t optimum_; - double value_; // objective value at optimum - int max_iter_; // maximum number of iterations before forced stop - double tol_; // tolerance on error before forced stop - int n_iter_ = 0; // current iteration number - int seed_; // employed for random centroid perturbation - double alpha_; // reflexion coeff - double beta_; // expension coeff - double gamma_; // outer contraction coeff - double delta_; // inner contraction coeff - - Eigen::Matrix simplex_; // simplex vertices - std::vector vertices_values_; // objective values at simplex vertices - std::vector vertices_rank_; // simplex vertices sorted from lower to higher objective value - public: - vector_t x_curr; - double obj_curr; + using matrix_t = std::conditional_t, Eigen::Matrix>; + + vector_t optimum_; // Argmin of the optimum + double value_; // objective value at optimum + int max_iter_ = 500; // maximum number of iterations before forced stop + int n_iter_ = 0; // current iteration number + double tol_ = 1e-5; // tolerance on error before forced stop + + matrix_t simplex_; // Edges of the simplex + vector_t vertices_values_; // Value of each edge + std::vector vertices_rank_; // Index into the simplex vector, sorted from best to worst + + double alpha_ = 0.0; // Reflexion coeff + double beta_ = 0.0; // Expension coeff + double gamma_ = 0.0; // Outer contraction coeff + double delta_ = 0.0; // Inner contraction coeff + +public: static constexpr bool gradient_free = true; static constexpr int static_input_size = N; + vector_t x_curr; + double obj_curr; + +public: // constructors NelderMead() = default; - NelderMead(int max_iter, double tol, int seed = fdapde::random_seed) : - max_iter_(max_iter), tol_(tol), seed_(seed) { } + + NelderMead(int max_iter, double tol): max_iter_(max_iter), tol_(tol) + {} template vector_t optimize(ObjectiveT&& objective, const vector_t& x0, Callbacks&&... callbacks) { fdapde_static_assert( std::is_same().operator()(vector_t())) FDAPDE_COMMA double>::value, INVALID_CALL_TO_OPTIMIZE__OBJECTIVE_FUNCTOR_NOT_ACCEPTING_VECTORTYPE); + fdapde_assert(x0.rows() > 0); + std::tuple callbacks_ {callbacks...}; - double dims = x0.rows(); + bool stop = false; - bool shrink = false; + bool require_shrink = false; n_iter_ = 0; - vector_t centroid; - // adaptive parameters initialization - // Gao, F., Han, L. Implementing the Nelder-Mead simplex algorithm with adaptive parameters. Comput Optim Appl - // 51, 259–277 (2012). https://doi.org/10.1007/s10589-010-9329-3 + vector_t zero; + const int dimension = x0.rows(); + fdapde_assert(dimension >= 1); + if constexpr (N == Dynamic) { + zero = vector_t::Zero(x0.rows()); + } else { + zero = vector_t::Zero(); + } + vector_t centroid = zero; // centroid + vector_t xr = zero; // initial next guess + vector_t tmp = zero; // temporary vec for inner, outer contractions, etc. + + // Gao, F., Han, L. Implementing the Nelder-Mead simplex algorithm with adaptive parameters. + // Comput Optim Appl 51, 259–277 (2012). https://doi.org/10.1007/s10589-010-9329-3 alpha_ = 1.0; - beta_ = 1.0 + 2. / dims; - gamma_ = 0.75 - 1. / (2 * dims); - delta_ = 1.0 - 1. / dims; - // simplex initialization - // Wessing, S. Proper initialization is crucial for the Nelder–Mead simplex search. Optim Lett 13, 847–856 - // (2019). https://doi.org/10.1007/s11590-018-1284-4 - double c_h = std::min(std::max(x0.template lpNorm(), 1.0), 10.0); - simplex_ = x0.replicate(1, dims + 1); - simplex_.block(0, 0, dims, dims) += vector_t::Constant(dims, c_h).asDiagonal(); - simplex_.col(dims).array() += c_h * (1.0 - std::sqrt(dims + 1)) / dims; - for (int i = 0; i < dims + 1; ++i) { - x_curr = simplex_.col(i); - obj_curr = objective(simplex_.col(i)); + beta_ = 1 + 2.0/(double)dimension; + gamma_ = 0.75 - 1.0/(double)(2*dimension); + delta_ = 1.0 - 1.0/(double)dimension; + + // Initialise the simplex given x0 + // Wessing, S. Proper initialization is crucial for the Nelder–Mead simplex search. + // Optim Lett 13, 847–856 (2019). https://doi.org/10.1007/s11590-018-1284-4 + { + double infnty_norm = x0.cwiseAbs().maxCoeff(); + double scale_factor = std::min(std::max(infnty_norm, 1.0), 10.0); + double last_coeff = scale_factor * (1.0 - (double)std::sqrt(dimension+1))/(double)(dimension); + + // Initialises the vectors with cached values + simplex_ = x0.rowwise().replicate(dimension+1); + vertices_values_ = vector_t::Constant(dimension+1, 1, std::numeric_limits::max()); + vertices_rank_.clear(); + vertices_rank_.resize(dimension+1, 0); + + for(int i = 0; i < dimension; ++i) { + simplex_(i, i) += scale_factor; + vertices_rank_[i+1] = i+1; + simplex_(i, dimension) += last_coeff; + } + } + + fdapde_assert(vertices_values_.size() == simplex_.cols()); + fdapde_assert(vertices_rank_.size() == simplex_.cols()); + fdapde_assert(x0.rows()+1 == simplex_.cols()); + + // Compute the vertices's values + for(int i = 0; i < simplex_.cols(); ++i) { + vertices_values_[i] = objective(simplex_.col(i)); stop |= internals::exec_eval_hooks(*this, objective, callbacks_); - vertices_values_.push_back(obj_curr); } - for (int i = 0; i < dims + 1; ++i) { vertices_rank_.push_back(i); } - // sort vertices according to their objective value + + // Sort the vertices according to their objective value std::sort(vertices_rank_.begin(), vertices_rank_.end(), [&](int a, int b) { return vertices_values_[a] < vertices_values_[b]; }); - - while (n_iter_ < max_iter_ && !stop) { - // centroid calculation (using the dims best vertices) - centroid.setZero(); - for (int i = 0; i < dims; ++i) { centroid += simplex_.col(vertices_rank_[i]); } - centroid /= dims; - // Fajfar, I., Bűrmen, Á. & Puhan, J. The Nelder–Mead simplex algorithm with perturbed centroid for - // high-dimensional function optimization. Optim Lett 13, 1011–1025 (2019). - // https://doi.org/10.1007/s11590-018-1306-2 - vector_t perturbed_centroid = centroid; - if (dims >= 10) { - vector_t random_delta = internals::gaussian_matrix(1, dims, 1.0, seed_); - random_delta /= random_delta.norm(); - perturbed_centroid += - 0.1 * random_delta * (simplex_.col(vertices_rank_[0]) - simplex_.col(vertices_rank_[dims])).norm(); + + while(!stop && n_iter_ < max_iter_) { + // Centroid calculation + centroid.setZero(); // centroid of the [dimension] best vertices + for(int i = 0; i < dimension; ++i) { + centroid += simplex_.col(vertices_rank_[i]); } - vector_t xr = perturbed_centroid + alpha_ * (perturbed_centroid - simplex_.col(vertices_rank_[dims])); - - // simplex update - shrink = false; - double xr_val = objective(xr); // (perturbed) centroid objective value - double fb_val = vertices_values_[vertices_rank_[0]]; - double fw_val = vertices_values_[vertices_rank_[dims]]; - double sw_val = vertices_values_[vertices_rank_[dims - 1]]; - x_curr = xr; - obj_curr = xr_val; + centroid /= (double)(dimension); + + xr = centroid + alpha_*(centroid - simplex_.col(vertices_rank_[dimension])); + + // Cache the values used for the if statements + require_shrink = false; + const double xr_val = objective(xr); + const double best_val = vertices_values_[vertices_rank_[0]]; + const double worst_val = vertices_values_[vertices_rank_[dimension]]; + const double second_worst_val = vertices_values_[vertices_rank_[dimension-1]]; + stop |= internals::exec_eval_hooks(*this, objective, callbacks_); - - if (fb_val <= xr_val && xr_val < sw_val) { - // reflection - simplex_.col(vertices_rank_[dims]) = xr; - vertices_values_[vertices_rank_[dims]] = xr_val; - } else if (xr_val < fb_val) { - // expansion - vector_t xe = perturbed_centroid + beta_ * (xr - perturbed_centroid); - double xe_val = objective(xe); - x_curr = xe; - obj_curr = xe_val; + + // Compute the new simplex + if( best_val <= xr_val && xr_val < second_worst_val ) { + // Reflexion + simplex_.col(vertices_rank_[dimension]) = xr; + vertices_values_[vertices_rank_[dimension]] = xr_val; + } else if( xr_val < best_val ) { + // Expansion + tmp = centroid + beta_*(xr - centroid); + double xe_val = objective(tmp); stop |= internals::exec_eval_hooks(*this, objective, callbacks_); - simplex_.col(vertices_rank_[dims]) = xe_val < xr_val ? xe : xr; - vertices_values_[vertices_rank_[dims]] = xe_val < xr_val ? xe_val : xr_val; - } else if (sw_val <= xr_val && xr_val < fw_val) { - // outer contraction - vector_t xoc = centroid + gamma_ * (xr - centroid); - double xoc_val = objective(xoc); - x_curr = xoc; - obj_curr = xoc_val; + if(xe_val < xr_val) { + simplex_.col(vertices_rank_[dimension]) = tmp; + vertices_values_[vertices_rank_[dimension]] = xe_val; + } else { + simplex_.col(vertices_rank_[dimension]) = xr; + vertices_values_[vertices_rank_[dimension]] = xr_val; + } + } else if( second_worst_val <= xr_val < worst_val ) { + // Outer Contraction + tmp = centroid + gamma_ * (xr - centroid); + double xoc_val = objective(tmp); stop |= internals::exec_eval_hooks(*this, objective, callbacks_); if (xoc_val <= xr_val) { - simplex_.col(vertices_rank_[dims]) = xoc; - vertices_values_[vertices_rank_[dims]] = xoc_val; + simplex_.col(vertices_rank_[dimension]) = tmp; + vertices_values_[vertices_rank_[dimension]] = xoc_val; } else { - shrink = true; + require_shrink = true; } } else { - // inner contraction - vector_t xic = centroid - gamma_ * (xr - centroid); - double xic_val = objective(xic); - x_curr = xic; - obj_curr = xic_val; + // Inner Contraction + tmp = centroid - gamma_ * (xr - centroid); + double xic_val = objective(tmp); stop |= internals::exec_eval_hooks(*this, objective, callbacks_); - if (xic_val < fb_val) { - simplex_.col(vertices_rank_[dims]) = xic; - vertices_values_[vertices_rank_[dims]] = xic_val; + if( xic_val < best_val) { + simplex_.col(vertices_rank_[dimension]) = tmp; + vertices_values_[vertices_rank_[dimension]] = xic_val; } else { - shrink = true; + require_shrink = true; } } - // shrink - if (shrink) { - auto best_vertex = simplex_.col(vertices_rank_[0]); - for (int i = 1; i < dims + 1; ++i) { - simplex_.col(vertices_rank_[i]) = - best_vertex + delta_ * (simplex_.col(vertices_rank_[i]) - best_vertex); + + // Shrink + if(require_shrink) { + const auto &best_vertex = simplex_.col(vertices_rank_[0]); + for(int i = 1; i < dimension + 1; ++i) { + simplex_.col(vertices_rank_[i]) = best_vertex + delta_ * (simplex_.col(vertices_rank_[i]) - best_vertex); + vertices_values_[vertices_rank_[i]] = objective(simplex_.col(vertices_rank_[i])); + stop |= internals::exec_eval_hooks(*this, objective, callbacks_); } } - // prepare next iteration - double mean = std::accumulate(vertices_values_.begin(), vertices_values_.end(), 0.0) / (dims + 1); - double std_dev = - std::accumulate(vertices_values_.begin(), vertices_values_.end(), 0.0, [&](double a, double b) { - return a + (std::pow(b - mean, 2)); - }); - std_dev = std::sqrt(std_dev / dims); - stop = std_dev < tol_; - // sort vertices according to their objective value + // Sort the vertices according to their objective value std::sort(vertices_rank_.begin(), vertices_rank_.end(), [&](int a, int b) { return vertices_values_[a] < vertices_values_[b]; }); - stop |= internals::exec_stop_if(*this, objective); - n_iter_++; + + // Stoping criterion + stop |= internals::exec_stop_if(*this, objective); + + // I did not use std::accumulate because I beleive that it can become + // very slow depending on the container & formula + const double value_mean = vertices_values_.mean(); + double value_var = 0.0; + for(double val: vertices_values_) { + const double x = val - value_mean; + value_var += x*x; + } + value_var /= (double)vertices_values_.rows(); + + stop |= std::sqrt(value_var) <= tol_; + ++n_iter_; } + optimum_ = simplex_.col(vertices_rank_[0]); value_ = objective(optimum_); return optimum_; } - // getters - const vector_t& optimum() const { return optimum_; } + + vector_t optimum() const { return optimum_; } double value() const { return value_; } int n_iter() const { return n_iter_; } }; diff --git a/fdaPDE/src/optimization/wolfe.h b/fdaPDE/src/optimization/wolfe.h index be861918..28251c1f 100644 --- a/fdaPDE/src/optimization/wolfe.h +++ b/fdaPDE/src/optimization/wolfe.h @@ -25,23 +25,25 @@ namespace fdapde { // wolfe line search method for adaptive step size class WolfeLineSearch { private: - static constexpr int max_iter_ = 10; + int max_iter_ = 100; double alpha_ = 1.0; double alpha_max_ = std::numeric_limits::infinity(), alpha_min_ = 0; double c1_ = 1e-4, c2_ = 0.9; public: WolfeLineSearch() = default; - WolfeLineSearch(double alpha, double c1, double c2) : alpha_(alpha), c1_(c1), c2_(c2) { } - + WolfeLineSearch(double alpha, double c1, double c2) : alpha_(alpha), c1_(c1), c2_(c2){ } + WolfeLineSearch(double alpha, double c1, double c2, int max_iter) : alpha_(alpha), c1_(c1), c2_(c2), max_iter_(max_iter) { } + WolfeLineSearch(int max_iter) : max_iter_(max_iter) { } + // bisection method for the weak Wolfe conditions. check "Jorge Nocedal, Stephen J. Wright (2006), Numerical - // Optimization, pag 58" + // Optimization, page 58 template bool adapt_hook(Opt& opt, Obj& obj) { fdapde_static_assert(is_gradient_based_opt_v, THIS_METHOD_IS_FOR_GRADIENT_BASED_OPTIMIZATION_ONLY); - double grad_0 = opt.grad_old.dot(opt.update), val_0 = obj(opt.x_old); + const double grad_0 = opt.grad_old.dot(opt.update), val_0 = obj(opt.x_old); auto grad = obj.gradient(); bool zooming = false; - double c1 = c1_, c2 = c2_; + const double c1 = c1_, c2 = c2_; double alpha_max = alpha_max_, alpha_min = alpha_min_; double alpha_prev = 0; double alpha_curr = alpha_;