From 16672124194786ce058ca63336e99c37b2295088 Mon Sep 17 00:00:00 2001 From: Gabriel Kronberger Date: Thu, 1 Feb 2024 09:50:28 +0100 Subject: [PATCH 1/4] Use 'scientific format' for coefficients in infix formatter. --- source/formatter/infix.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/source/formatter/infix.cpp b/source/formatter/infix.cpp index cdbbf3b..d96daa8 100644 --- a/source/formatter/infix.cpp +++ b/source/formatter/infix.cpp @@ -10,10 +10,10 @@ auto InfixFormatter::FormatNode(Tree const& tree, Operon::Mapsecond); } else { @@ -22,7 +22,7 @@ auto InfixFormatter::FormatNode(Tree const& tree, Operon::Map Date: Thu, 1 Feb 2024 09:51:23 +0100 Subject: [PATCH 2/4] Increase number of digits for coefficients in infix format from 6 to 8 --- cli/source/operon_gp.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/source/operon_gp.cpp b/cli/source/operon_gp.cpp index 687d17c..3c21f34 100644 --- a/cli/source/operon_gp.cpp +++ b/cli/source/operon_gp.cpp @@ -383,7 +383,7 @@ auto main(int argc, char** argv) -> int }; gp.Run(executor, random, report); - fmt::print("{}\n", Operon::InfixFormatter::Format(best.Genotype, problem.GetDataset(), 6)); + fmt::print("{}\n", Operon::InfixFormatter::Format(best.Genotype, problem.GetDataset(), 8)); } catch (std::exception& e) { fmt::print(stderr, "error: {}\n", e.what()); return EXIT_FAILURE; From ddd1bf6c12ff84cc2cb763f21b34047aaadde6d1 Mon Sep 17 00:00:00 2001 From: Gabriel Kronberger Date: Thu, 1 Feb 2024 09:53:21 +0100 Subject: [PATCH 3/4] Print all individuals (without scaling) in the first front as a final step in NSGP. --- cli/source/operon_nsgp.cpp | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/cli/source/operon_nsgp.cpp b/cli/source/operon_nsgp.cpp index ac3da60..b9165b2 100644 --- a/cli/source/operon_nsgp.cpp +++ b/cli/source/operon_nsgp.cpp @@ -396,7 +396,18 @@ auto main(int argc, char** argv) -> int }; gp.Run(executor, random, report); - fmt::print("{}\n", Operon::InfixFormatter::Format(best.Genotype, problem.GetDataset(), std::numeric_limits::digits)); + fmt::print("Best individual:\n"); + fmt::print("{}\n", Operon::InfixFormatter::Format(best.Genotype, problem.GetDataset(), 8)); + + auto const& pop = gp.Parents(); + + // print all solutions in the first front + fmt::print("All individuals in the Pareto front:\n"); + for(auto ind = pop.begin(); ind < pop.end(); ind++) { + if(ind->Rank == 0) { + fmt::print("{}\n", Operon::InfixFormatter::Format(ind->Genotype, problem.GetDataset(), 8)); + } + } } catch (std::exception& e) { fmt::print(stderr, "error: {}\n", e.what()); return EXIT_FAILURE; From 3934ba96b878f521cdd656cfed7d87714876d1c6 Mon Sep 17 00:00:00 2001 From: Gabriel Kronberger Date: Thu, 4 Apr 2024 13:35:25 +0200 Subject: [PATCH 4/4] Add command line option to produce Pareto front, scale all individuals in the Pareto front, and apply scaling for the reports only when linear-scaling is turned on. --- cli/source/operon_nsgp.cpp | 91 ++++++++++++++++++++++++-------------- cli/source/util.cpp | 1 + 2 files changed, 60 insertions(+), 32 deletions(-) diff --git a/cli/source/operon_nsgp.cpp b/cli/source/operon_nsgp.cpp index 9a82745..04a6617 100644 --- a/cli/source/operon_nsgp.cpp +++ b/cli/source/operon_nsgp.cpp @@ -36,6 +36,9 @@ #include "util.hpp" #include "operator_factory.hpp" +void Scale(Operon::Individual& ind, Operon::Span estimated, Operon::Span target, Operon::Scalar& a, Operon::Scalar& b); + + auto main(int argc, char** argv) -> int { auto opts = Operon::InitOptions("operon_gp", "Genetic programming symbolic regression"); @@ -300,23 +303,8 @@ auto main(int argc, char** argv) -> int Operon::Scalar a{1.0}; Operon::Scalar b{0.0}; auto linearScaling = taskflow.emplace([&]() { - auto [a_, b_] = Operon::FitLeastSquares(estimatedTrain, targetTrain); - a = static_cast(a_); - b = static_cast(b_); - // add scaling terms to the tree - auto& nodes = best.Genotype.Nodes(); - auto const sz = nodes.size(); - if (std::abs(a - Operon::Scalar{1}) > std::numeric_limits::epsilon()) { - nodes.emplace_back(Operon::Node::Constant(a)); - nodes.emplace_back(Operon::NodeType::Mul); - } - if (std::abs(b) > std::numeric_limits::epsilon()) { - nodes.emplace_back(Operon::Node::Constant(b)); - nodes.emplace_back(Operon::NodeType::Add); - } - if (nodes.size() > sz) { - best.Genotype.UpdateNodes(); - } + if (scale) + Scale(best, estimatedTrain, targetTrain, a, b); }); double r2Train{}; @@ -327,13 +315,17 @@ auto main(int argc, char** argv) -> int double maeTest{}; auto scaleTrain = taskflow.emplace([&]() { - Eigen::Map> estimated(estimatedTrain.data(), std::ssize(estimatedTrain)); - estimated = estimated * a + b; + if (scale) { + Eigen::Map> estimated(estimatedTrain.data(), std::ssize(estimatedTrain)); + estimated = estimated * a + b; + } }); auto scaleTest = taskflow.emplace([&]() { - Eigen::Map> estimated(estimatedTest.data(), std::ssize(estimatedTest)); - estimated = estimated * a + b; + if (scale) { + Eigen::Map> estimated(estimatedTest.data(), std::ssize(estimatedTest)); + estimated = estimated * a + b; + } }); auto calcStats = taskflow.emplace([&]() { @@ -358,9 +350,13 @@ auto main(int argc, char** argv) -> int auto calculateOffMemory = taskflow.transform_reduce(off.begin(), off.end(), totalMemory, std::plus{}, [&](auto const& ind) { return getSize(ind); }); // define task graph - linearScaling.succeed(evalTrain, evalTest); - linearScaling.precede(scaleTrain, scaleTest); - calcStats.succeed(scaleTrain, scaleTest); + //if (scale) { + linearScaling.succeed(evalTrain, evalTest); + linearScaling.precede(scaleTrain, scaleTest); + calcStats.succeed(scaleTrain, scaleTest); + //} else { + // calcStats.succeed(evalTrain, evalTest); + //} calcStats.precede(calculateLength, calculateQuality, calculatePopMemory, calculateOffMemory); executor.corun(taskflow); @@ -394,18 +390,27 @@ auto main(int argc, char** argv) -> int }; gp.Run(executor, random, report); - fmt::print("Best individual:\n"); + fmt::print("Best individual:\n"); fmt::print("{}\n", Operon::InfixFormatter::Format(best.Genotype, problem.GetDataset(), 8)); auto const& pop = gp.Parents(); - // print all solutions in the first front - fmt::print("All individuals in the Pareto front:\n"); - for(auto ind = pop.begin(); ind < pop.end(); ind++) { - if(ind->Rank == 0) { - fmt::print("{}\n", Operon::InfixFormatter::Format(ind->Genotype, problem.GetDataset(), 8)); - } - } + // print all solutions in the first front + if (result["show-pareto-front"].as()) { + fmt::print("All individuals in the Pareto front:\n"); + for(auto ind = pop.begin(); ind < pop.end(); ind++) { + Operon::Individual cur = *ind; + if(cur.Rank == 0) { + if (scale) { + Operon::Scalar a{1.0}; + Operon::Scalar b{0.0}; + auto estimatedTrain = Operon::Interpreter::Evaluate(cur.Genotype, problem.GetDataset(), trainingRange); + Scale(cur, estimatedTrain, targetTrain, a, b); + } + fmt::print("{}\n", Operon::InfixFormatter::Format(cur.Genotype, problem.GetDataset(), 8)); + } + } + } } catch (std::exception& e) { fmt::print(stderr, "error: {}\n", e.what()); return EXIT_FAILURE; @@ -413,3 +418,25 @@ auto main(int argc, char** argv) -> int return 0; } + + + +void Scale(Operon::Individual& ind, Operon::Span estimated, Operon::Span target, Operon::Scalar& a, Operon::Scalar& b) { + auto [a_, b_] = Operon::FitLeastSquares(estimated, target); + a = static_cast(a_); + b = static_cast(b_); + // add scaling terms to the tree + auto& nodes = ind.Genotype.Nodes(); + auto const sz = nodes.size(); + if (std::abs(a - Operon::Scalar{1}) > std::numeric_limits::epsilon()) { + nodes.emplace_back(Operon::Node::Constant(a)); + nodes.emplace_back(Operon::NodeType::Mul); + } + if (std::abs(b) > std::numeric_limits::epsilon()) { + nodes.emplace_back(Operon::Node::Constant(b)); + nodes.emplace_back(Operon::NodeType::Add); + } + if (nodes.size() > sz) { + ind.Genotype.UpdateNodes(); + } +} diff --git a/cli/source/util.cpp b/cli/source/util.cpp index 3058150..0f4ad0d 100644 --- a/cli/source/util.cpp +++ b/cli/source/util.cpp @@ -183,6 +183,7 @@ auto InitOptions(std::string const& name, std::string const& desc, int width) -> ("disable-symbols", "Comma-separated list of disabled symbols ("+symbols+")", cxxopts::value()) ("symbolic", "Operate in symbolic mode - no coefficient tuning or coefficient mutation", cxxopts::value()->default_value("false")) ("show-primitives", "Display the primitive set used by the algorithm") + ("show-pareto-front", "Displays all expressions in the first Pareto front of the final generation", cxxopts::value()->default_value("false")) ("threads", "Number of threads to use for parallelism", cxxopts::value()->default_value("0")) ("timelimit", "Time limit after which the algorithm will terminate", cxxopts::value()->default_value(std::to_string(std::numeric_limits::max()))) ("debug", "Debug mode (more information displayed)")