diff --git a/HISTORY.md b/HISTORY.md index e6a04fb3c..89ac9b4ec 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -35,6 +35,12 @@ * Fix CMake package export ([#198](https://github.com/mlpack/ensmallen/pull/198)). + * Fix CMA-ES inconsistencies + ([#193](https://github.com/mlpack/ensmallen/pull/193)). + + * Allow early stop callback to accept a lambda function + ([#165](https://github.com/mlpack/ensmallen/pull/165)). + * Allow early stop callback to accept a lambda function ([#165](https://github.com/mlpack/ensmallen/pull/165)). @@ -55,7 +61,7 @@ ([#183](https://github.com/mlpack/ensmallen/pull/183)). * Remove deprecated methods from PrimalDualSolver implementation - ([#185](https://github.com/mlpack/ensmallen/pull/185). + ([#185](https://github.com/mlpack/ensmallen/pull/185)). * Update logo ([#186](https://github.com/mlpack/ensmallen/pull/186)). diff --git a/doc/optimizers.md b/doc/optimizers.md index a99151421..a71022a5a 100644 --- a/doc/optimizers.md +++ b/doc/optimizers.md @@ -584,6 +584,7 @@ matrix within an iterative procedure using the covariance matrix. * `CMAES<`_`SelectionPolicyType`_`>(`_`lambda, lowerBound, upperBound`_`)` * `CMAES<`_`SelectionPolicyType`_`>(`_`lambda, lowerBound, upperBound, batchSize`_`)` * `CMAES<`_`SelectionPolicyType`_`>(`_`lambda, lowerBound, upperBound, batchSize, maxIterations, tolerance, selectionPolicy`_`)` + * `CMAES<`_`SelectionPolicyType`_`>(`_`lambda, lowerBound, upperBound, batchSize, maxIterations, tolerance, selectionPolicy, initialSigma`_`)` The _`SelectionPolicyType`_ template parameter refers to the strategy used to compute the (approximate) objective function. The `FullSelection` and @@ -606,10 +607,11 @@ For convenience the following types can be used: | `size_t` | **`maxIterations`** | Maximum number of iterations. | `1000` | | `double` | **`tolerance`** | Maximum absolute tolerance to terminate algorithm. | `1e-5` | | `SelectionPolicyType` | **`selectionPolicy`** | Instantiated selection policy used to calculate the objective. | `SelectionPolicyType()` | +| `double` | **`initialSigma`** | The initial step size. | `0.6` | Attributes of the optimizer may also be changed via the member methods `Lambda()`, `LowerBound()`, `UpperBound()`, `BatchSize()`, `MaxIterations()`, -`Tolerance()`, and `SelectionPolicy()`. +`Tolerance()`, `SelectionPolicy()` and `InitialSigma()`. The `selectionPolicy` attribute allows an instantiated `SelectionPolicyType` to be given. The `FullSelection` policy has no need to be instantiated and thus diff --git a/include/ensmallen_bits/cmaes/cmaes.hpp b/include/ensmallen_bits/cmaes/cmaes.hpp index 9ee2efc0c..a0e059089 100644 --- a/include/ensmallen_bits/cmaes/cmaes.hpp +++ b/include/ensmallen_bits/cmaes/cmaes.hpp @@ -66,6 +66,7 @@ class CMAES * @param maxIterations Maximum number of iterations allowed (0 means no * limit). * @param tolerance Maximum absolute tolerance to terminate algorithm. + * @param initialSigma The initial step size. * @param selectionPolicy Instantiated selection policy used to calculate the * objective. */ @@ -75,7 +76,8 @@ class CMAES const size_t batchSize = 32, const size_t maxIterations = 1000, const double tolerance = 1e-5, - const SelectionPolicyType& selectionPolicy = SelectionPolicyType()); + const SelectionPolicyType& selectionPolicy = SelectionPolicyType(), + const double initialSigma = 0.6); /** * Optimize the given function using CMA-ES. The given starting point will be @@ -97,9 +99,9 @@ class CMAES MatType& iterate, CallbackTypes&&... callbacks); - //! Get the step size. + //! Get the population size. size_t PopulationSize() const { return lambda; } - //! Modify the step size. + //! Modify the population size. size_t& PopulationSize() { return lambda; } //! Get the lower bound of decision variables. @@ -107,9 +109,9 @@ class CMAES //! Modify the lower bound of decision variables. double& LowerBound() { return lowerBound; } - //! Get the upper bound of decision variables + //! Get the upper bound of decision variables. double UpperBound() const { return upperBound; } - //! Modify the upper bound of decision variables + //! Modify the upper bound of decision variables. double& UpperBound() { return upperBound; } //! Get the batch size. @@ -132,6 +134,11 @@ class CMAES //! Modify the selection policy. SelectionPolicyType& SelectionPolicy() { return selectionPolicy; } + //! Get the initial step size. + double InitialSigma() const { return initialSigma; } + //! Modify the initial step size. + double& InitialSigma() { return initialSigma; } + private: //! Population size. size_t lambda; @@ -139,7 +146,7 @@ class CMAES //! Lower bound of decision variables. double lowerBound; - //! Upper bound of decision variables + //! Upper bound of decision variables. double upperBound; //! The batch size for processing. @@ -153,6 +160,15 @@ class CMAES //! The selection policy used to calculate the objective. SelectionPolicyType selectionPolicy; + + //! Initial step size. + double initialSigma; + + //! Methods used to transform the candidates into the constraints. + template + void BoundaryTransform(BaseMatType& matrix); + template + void BoundaryTransformInverse(BaseMatType& matrix); }; /** diff --git a/include/ensmallen_bits/cmaes/cmaes_impl.hpp b/include/ensmallen_bits/cmaes/cmaes_impl.hpp index 32a05dde9..88db35623 100644 --- a/include/ensmallen_bits/cmaes/cmaes_impl.hpp +++ b/include/ensmallen_bits/cmaes/cmaes_impl.hpp @@ -17,6 +17,7 @@ // In case it hasn't been included yet. #include "cmaes.hpp" +#include #include @@ -29,15 +30,20 @@ CMAES::CMAES(const size_t lambda, const size_t batchSize, const size_t maxIterations, const double tolerance, - const SelectionPolicyType& selectionPolicy) : + const SelectionPolicyType& selectionPolicy, + const double initialSigma) : lambda(lambda), lowerBound(lowerBound), upperBound(upperBound), batchSize(batchSize), maxIterations(maxIterations), tolerance(tolerance), - selectionPolicy(selectionPolicy) -{ /* Nothing to do. */ } + selectionPolicy(selectionPolicy), + initialSigma(initialSigma) +{ + assert(this->lowerBound != this->upperBound && "The values of " + "lower bound and upper bound must be different."); +} //! Optimize the function (minimize). template @@ -78,7 +84,7 @@ typename MatType::elem_type CMAES::Optimize( // Step size control parameters. BaseMatType sigma(2, 1); // sigma is vector-shaped. - sigma(0) = 0.3 * (upperBound - lowerBound); + sigma(0) = initialSigma; const double cs = (muEffective + 2) / (iterate.n_elem + muEffective + 5); const double ds = 1 + cs + 2 * std::max(std::sqrt((muEffective - 1) / (iterate.n_elem + 1)) - 1, 0.0); @@ -99,13 +105,16 @@ typename MatType::elem_type CMAES::Optimize( std::vector mPosition(2, BaseMatType(iterate.n_rows, iterate.n_cols)); - mPosition[0] = lowerBound + arma::randu( - iterate.n_rows, iterate.n_cols) * (upperBound - lowerBound); + BaseMatType initialVal; + initialVal.randu(iterate.n_rows, iterate.n_cols); + initialVal += (BaseMatType)(iterateIn); + mPosition[0] = initialVal; BaseMatType step(iterate.n_rows, iterate.n_cols); step.zeros(); // Calculate the first objective function. + BoundaryTransform(mPosition[0]); ElemType currentObjective = 0; for (size_t f = 0; f < numFunctions; f += batchSize) { @@ -177,6 +186,7 @@ typename MatType::elem_type CMAES::Optimize( pPosition[idx(j)] = mPosition[idx0] + sigma(idx0) * pStep[idx(j)]; // Calculate the objective function. + BoundaryTransform(pPosition[idx(j)]); pObjective(idx(j)) = selectionPolicy.Select(function, batchSize, pPosition[idx(j)], callbacks...); } @@ -191,6 +201,7 @@ typename MatType::elem_type CMAES::Optimize( mPosition[idx1] = mPosition[idx0] + sigma(idx0) * step; // Calculate the objective function. + BoundaryTransform(mPosition[idx1]); currentObjective = selectionPolicy.Select(function, batchSize, mPosition[idx1], callbacks...); @@ -313,6 +324,90 @@ typename MatType::elem_type CMAES::Optimize( return overallObjective; } +// Transforms the candidate into the given bounds. +template +template +void CMAES::BoundaryTransform(BaseMatType& matrix) +{ + typedef typename BaseMatType::elem_type ElemType; + const double diff = (upperBound - lowerBound) / 2.0; + const double al = std::min(diff, (1 + std::abs(lowerBound)) / 20.0); + const double au = std::min(diff, (1 + std::abs(upperBound)) / 20.0); + const double xlow = lowerBound - 2 * al - diff; + const double xup = upperBound + 2 * au + diff; + const double r = 2 * (upperBound - lowerBound + al + au); + + for (size_t col = 0; col < matrix.n_cols; col++) + { + for (size_t row = 0; row < matrix.n_rows; row++) + { + ElemType y = matrix(row, col); + // Boundary transformation shift into feasible pre-image. + if (y < xlow) + { + y += (ElemType)(r * (1 + (xlow - y) / r)); + } + else if (y > xup) + { + y -= (ElemType)(r * (1 + (y - xup) / r)); + } + else if (y < lowerBound - al) + { + y += (ElemType)(2 * (lowerBound - al - y)); + } + else if (y > upperBound + au) + { + y -= (ElemType)(2 * (y - upperBound - au)); + } + // Boundary transformation. + if (y < lowerBound + al) + { + y = (ElemType)(lowerBound + (y - (lowerBound - al)) * + (y - (lowerBound - al)) / 4.0 / al); + } + else if (y > upperBound - au) + { + y = (ElemType)(upperBound - (y - (upperBound + au)) * + (y - (upperBound + au)) / 4.0 / au); + } + + matrix(row, col) = y; + } + } +} + +// Computes the inverse of the transformation. +template +template +void CMAES::BoundaryTransformInverse(BaseMatType& matrix) +{ + typedef typename BaseMatType::elem_type ElemType; + const double diff = (upperBound - lowerBound) / 2.0; + const double al = std::min(diff, (1 + std::abs(lowerBound)) / 20.0); + const double au = std::min(diff, (1 + std::abs(upperBound)) / 20.0); + + for (size_t col = 0; col < matrix.n_cols; col++) + { + for (size_t row = 0; row < matrix.n_rows; row++) + { + ElemType y = matrix(row, col); + + if (y < lowerBound + al) + { + y = (ElemType)(lowerBound - al) + 2 * + std::sqrt(std::abs(al * (y - lowerBound))); + } + else if (y > upperBound - au) + { + y = (ElemType)(upperBound + au) - 2 * + std::sqrt(std::abs(au * (upperBound - y))); + } + + matrix(row, col) = y; + } + } +} + } // namespace ens #endif diff --git a/tests/cmaes_test.cpp b/tests/cmaes_test.cpp index 45c7f1722..bbbf193ba 100644 --- a/tests/cmaes_test.cpp +++ b/tests/cmaes_test.cpp @@ -50,7 +50,7 @@ TEST_CASE("CMAESLogisticRegressionTest", "[CMAESTest]") responses, testResponses, shuffledResponses); LogisticRegression<> lr(shuffledData, shuffledResponses, 0.5); - CMAES<> cmaes(0, -1, 1, 32, 200, 1e-3); + CMAES<> cmaes(0, -20, 20, 32, 200, 1e-3); arma::mat coordinates = lr.GetInitialPoint(); cmaes.Optimize(lr, coordinates); @@ -85,7 +85,7 @@ TEST_CASE("ApproxCMAESLogisticRegressionTest", "[CMAESTest]") responses, testResponses, shuffledResponses); LogisticRegression<> lr(shuffledData, shuffledResponses, 0.5); - ApproxCMAES<> cmaes(0, -1, 1, 32, 200, 1e-3); + ApproxCMAES<> cmaes(0, -20, 20, 32, 200, 1e-3); arma::mat coordinates = lr.GetInitialPoint(); cmaes.Optimize(lr, coordinates); @@ -120,7 +120,7 @@ TEST_CASE("CMAESLogisticRegressionFMatTest", "[CMAESTest]") responses, testResponses, shuffledResponses); LogisticRegression lr(shuffledData, shuffledResponses, 0.5); - CMAES<> cmaes(0, -1, 1, 32, 200, 1e-3); + CMAES<> cmaes(0, -20, 20, 32, 200, 1e-3); arma::fmat coordinates = lr.GetInitialPoint(); cmaes.Optimize(lr, coordinates); @@ -155,7 +155,7 @@ TEST_CASE("ApproxCMAESLogisticRegressionFMatTest", "[CMAESTest]") responses, testResponses, shuffledResponses); LogisticRegression lr(shuffledData, shuffledResponses, 0.5); - ApproxCMAES<> cmaes(0, -1, 1, 32, 200, 1e-3); + ApproxCMAES<> cmaes(0, -20, 20, 32, 200, 1e-3); arma::fmat coordinates = lr.GetInitialPoint(); cmaes.Optimize(lr, coordinates); @@ -172,3 +172,35 @@ TEST_CASE("ApproxCMAESLogisticRegressionFMatTest", "[CMAESTest]") REQUIRE(success == true); } + +/** + * Test to check if the boundary transformation + * works as expected. + */ +TEST_CASE("BoundaryConditionTestFunction", "[CMAESTest]") +{ + const size_t trials = 3; + for(size_t trail=0; trail < trials; ++trail) + { + arma::mat data, testData, shuffledData; + arma::Row responses, testResponses, shuffledResponses; + + LogisticRegressionTestData(data, testData, shuffledData, + responses, testResponses, shuffledResponses); + LogisticRegression<> lr(shuffledData, shuffledResponses, 0.5); + + CMAES<> optimizer(0, -10, 10, 32, 200, -1); + arma::mat coordinates = lr.GetInitialPoint(); + optimizer.Optimize(lr, coordinates); + + for (size_t col = 0; col < coordinates.n_cols; col++) + { + for (size_t row = 0; row < coordinates.n_rows; row++) + { + bool success = coordinates(row, col) <= 10 && + coordinates(row, col) >= -10; + REQUIRE(success == true); + } + } + } +}