From 16672124194786ce058ca63336e99c37b2295088 Mon Sep 17 00:00:00 2001
From: Gabriel Kronberger <gabriel.kronberger@heuristiclab.com>
Date: Thu, 1 Feb 2024 09:50:28 +0100
Subject: [PATCH 1/4] Use 'scientific format' for coefficients in infix
 formatter.

---
 source/formatter/infix.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/source/formatter/infix.cpp b/source/formatter/infix.cpp
index cdbbf3b..d96daa8 100644
--- a/source/formatter/infix.cpp
+++ b/source/formatter/infix.cpp
@@ -10,10 +10,10 @@ auto InfixFormatter::FormatNode(Tree const& tree, Operon::Map<Operon::Hash, std:
 {
     const auto& s = tree[i];
     if (s.IsConstant()) {
-        auto formatString = fmt::format(fmt::runtime(s.Value < 0 ? "({{:.{}f}})" : "{{:.{}f}}"), decimalPrecision);
+        auto formatString = fmt::format(fmt::runtime(s.Value < 0 ? "({{:.{}g}})" : "{{:.{}g}}"), decimalPrecision);
         fmt::format_to(std::back_inserter(current), fmt::runtime(formatString), s.Value);
     } else if (s.IsVariable()) {
-        auto formatString = fmt::format(fmt::runtime(s.Value < 0 ? "(({{:.{}f}}) * {{}})" : "({{:.{}f}} * {{}})"), decimalPrecision);
+        auto formatString = fmt::format(fmt::runtime(s.Value < 0 ? "(({{:.{}g}}) * {{}})" : "({{:.{}g}} * {{}})"), decimalPrecision);
         if (auto it = variableNames.find(s.HashValue); it != variableNames.end()) {
             fmt::format_to(std::back_inserter(current), fmt::runtime(formatString), s.Value, it->second);
         } else {
@@ -22,7 +22,7 @@ auto InfixFormatter::FormatNode(Tree const& tree, Operon::Map<Operon::Hash, std:
     } else {
         if (s.Value != 1) {
             fmt::format_to(std::back_inserter(current), "(");
-            auto formatString = fmt::format(fmt::runtime(s.Value < 0 ? "({{:.{}f}})" : "{{:.{}f}}"), decimalPrecision);
+            auto formatString = fmt::format(fmt::runtime(s.Value < 0 ? "({{:.{}g}})" : "{{:.{}g}}"), decimalPrecision);
             fmt::format_to(std::back_inserter(current), fmt::runtime(formatString), s.Value);
             fmt::format_to(std::back_inserter(current), " * ");
         }
@@ -131,4 +131,4 @@ auto InfixFormatter::Format(Tree const& tree, Operon::Map<Operon::Hash, std::str
     return { result.begin(), result.end() };
 }
 
-} // namespace Operon
\ No newline at end of file
+} // namespace Operon

From ae473214ef86638e418d1fb6a4ca6405602010ff Mon Sep 17 00:00:00 2001
From: Gabriel Kronberger <gabriel.kronberger@heuristiclab.com>
Date: Thu, 1 Feb 2024 09:51:23 +0100
Subject: [PATCH 2/4] Increase number of digits for coefficients in infix
 format from 6 to 8

---
 cli/source/operon_gp.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cli/source/operon_gp.cpp b/cli/source/operon_gp.cpp
index 687d17c..3c21f34 100644
--- a/cli/source/operon_gp.cpp
+++ b/cli/source/operon_gp.cpp
@@ -383,7 +383,7 @@ auto main(int argc, char** argv) -> int
         };
 
         gp.Run(executor, random, report);
-        fmt::print("{}\n", Operon::InfixFormatter::Format(best.Genotype, problem.GetDataset(), 6));
+        fmt::print("{}\n", Operon::InfixFormatter::Format(best.Genotype, problem.GetDataset(), 8));
     } catch (std::exception& e) {
         fmt::print(stderr, "error: {}\n", e.what());
         return EXIT_FAILURE;

From ddd1bf6c12ff84cc2cb763f21b34047aaadde6d1 Mon Sep 17 00:00:00 2001
From: Gabriel Kronberger <gabriel.kronberger@heuristiclab.com>
Date: Thu, 1 Feb 2024 09:53:21 +0100
Subject: [PATCH 3/4] Print all individuals (without scaling) in the first
 front as a final step in NSGP.

---
 cli/source/operon_nsgp.cpp | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/cli/source/operon_nsgp.cpp b/cli/source/operon_nsgp.cpp
index ac3da60..b9165b2 100644
--- a/cli/source/operon_nsgp.cpp
+++ b/cli/source/operon_nsgp.cpp
@@ -396,7 +396,18 @@ auto main(int argc, char** argv) -> int
         };
 
         gp.Run(executor, random, report);
-        fmt::print("{}\n", Operon::InfixFormatter::Format(best.Genotype, problem.GetDataset(), std::numeric_limits<Operon::Scalar>::digits));
+	fmt::print("Best individual:\n");
+        fmt::print("{}\n", Operon::InfixFormatter::Format(best.Genotype, problem.GetDataset(), 8));
+
+        auto const& pop = gp.Parents();
+
+	// print all solutions in the first front
+	fmt::print("All individuals in the Pareto front:\n");
+	for(auto ind = pop.begin(); ind < pop.end(); ind++) {
+	  if(ind->Rank == 0) {
+            fmt::print("{}\n", Operon::InfixFormatter::Format(ind->Genotype, problem.GetDataset(), 8));
+	  }
+	}
     } catch (std::exception& e) {
         fmt::print(stderr, "error: {}\n", e.what());
         return EXIT_FAILURE;

From 3934ba96b878f521cdd656cfed7d87714876d1c6 Mon Sep 17 00:00:00 2001
From: Gabriel Kronberger <gabriel.kronberger@heuristiclab.com>
Date: Thu, 4 Apr 2024 13:35:25 +0200
Subject: [PATCH 4/4] Add command line option to produce Pareto front, scale
 all individuals in the Pareto front, and apply scaling for the reports only
 when linear-scaling is turned on.

---
 cli/source/operon_nsgp.cpp | 91 ++++++++++++++++++++++++--------------
 cli/source/util.cpp        |  1 +
 2 files changed, 60 insertions(+), 32 deletions(-)

diff --git a/cli/source/operon_nsgp.cpp b/cli/source/operon_nsgp.cpp
index 9a82745..04a6617 100644
--- a/cli/source/operon_nsgp.cpp
+++ b/cli/source/operon_nsgp.cpp
@@ -36,6 +36,9 @@
 #include "util.hpp"
 #include "operator_factory.hpp"
 
+void Scale(Operon::Individual& ind, Operon::Span<float const> estimated, Operon::Span<float const> target, Operon::Scalar& a, Operon::Scalar& b);
+
+
 auto main(int argc, char** argv) -> int
 {
     auto opts = Operon::InitOptions("operon_gp", "Genetic programming symbolic regression");
@@ -300,23 +303,8 @@ auto main(int argc, char** argv) -> int
             Operon::Scalar a{1.0};
             Operon::Scalar b{0.0};
             auto linearScaling = taskflow.emplace([&]() {
-                auto [a_, b_] = Operon::FitLeastSquares(estimatedTrain, targetTrain);
-                a = static_cast<Operon::Scalar>(a_);
-                b = static_cast<Operon::Scalar>(b_);
-                // add scaling terms to the tree
-                auto& nodes = best.Genotype.Nodes();
-                auto const sz = nodes.size();
-                if (std::abs(a - Operon::Scalar{1}) > std::numeric_limits<Operon::Scalar>::epsilon()) {
-                    nodes.emplace_back(Operon::Node::Constant(a));
-                    nodes.emplace_back(Operon::NodeType::Mul);
-                }
-                if (std::abs(b) > std::numeric_limits<Operon::Scalar>::epsilon()) {
-                    nodes.emplace_back(Operon::Node::Constant(b));
-                    nodes.emplace_back(Operon::NodeType::Add);
-                }
-                if (nodes.size() > sz) {
-                    best.Genotype.UpdateNodes();
-                }
+                if (scale)
+                    Scale(best, estimatedTrain, targetTrain, a, b);
             });
 
             double r2Train{};
@@ -327,13 +315,17 @@ auto main(int argc, char** argv) -> int
             double maeTest{};
 
             auto scaleTrain = taskflow.emplace([&]() {
-                Eigen::Map<Eigen::Array<Operon::Scalar, -1, 1>> estimated(estimatedTrain.data(), std::ssize(estimatedTrain));
-                estimated = estimated * a + b;
+                if (scale) {
+                    Eigen::Map<Eigen::Array<Operon::Scalar, -1, 1>> estimated(estimatedTrain.data(), std::ssize(estimatedTrain));
+                    estimated = estimated * a + b;
+                }
             });
 
             auto scaleTest = taskflow.emplace([&]() {
-                Eigen::Map<Eigen::Array<Operon::Scalar, -1, 1>> estimated(estimatedTest.data(), std::ssize(estimatedTest));
-                estimated = estimated * a + b;
+                if (scale) {
+                    Eigen::Map<Eigen::Array<Operon::Scalar, -1, 1>> estimated(estimatedTest.data(), std::ssize(estimatedTest));
+                    estimated = estimated * a + b;
+                }
             });
 
             auto calcStats = taskflow.emplace([&]() {
@@ -358,9 +350,13 @@ auto main(int argc, char** argv) -> int
             auto calculateOffMemory = taskflow.transform_reduce(off.begin(), off.end(), totalMemory, std::plus{}, [&](auto const& ind) { return getSize(ind); });
 
             // define task graph
-            linearScaling.succeed(evalTrain, evalTest);
-            linearScaling.precede(scaleTrain, scaleTest);
-            calcStats.succeed(scaleTrain, scaleTest);
+            //if (scale) {
+                linearScaling.succeed(evalTrain, evalTest);
+                linearScaling.precede(scaleTrain, scaleTest);
+                calcStats.succeed(scaleTrain, scaleTest);
+            //} else {
+            //    calcStats.succeed(evalTrain, evalTest);
+            //}
             calcStats.precede(calculateLength, calculateQuality, calculatePopMemory, calculateOffMemory);
 
             executor.corun(taskflow);
@@ -394,18 +390,27 @@ auto main(int argc, char** argv) -> int
         };
 
         gp.Run(executor, random, report);
-	fmt::print("Best individual:\n");
+        fmt::print("Best individual:\n");
         fmt::print("{}\n", Operon::InfixFormatter::Format(best.Genotype, problem.GetDataset(), 8));
 
         auto const& pop = gp.Parents();
 
-	// print all solutions in the first front
-	fmt::print("All individuals in the Pareto front:\n");
-	for(auto ind = pop.begin(); ind < pop.end(); ind++) {
-	  if(ind->Rank == 0) {
-            fmt::print("{}\n", Operon::InfixFormatter::Format(ind->Genotype, problem.GetDataset(), 8));
-	  }
-	}
+        // print all solutions in the first front
+        if (result["show-pareto-front"].as<bool>()) {
+            fmt::print("All individuals in the Pareto front:\n");
+            for(auto ind = pop.begin(); ind < pop.end(); ind++) {
+              Operon::Individual cur = *ind;
+              if(cur.Rank == 0) {
+                  if (scale) {
+                      Operon::Scalar a{1.0};
+                      Operon::Scalar b{0.0};
+                      auto estimatedTrain = Operon::Interpreter<Operon::Scalar, Operon::DefaultDispatch>::Evaluate(cur.Genotype, problem.GetDataset(), trainingRange);
+                      Scale(cur, estimatedTrain, targetTrain, a, b);
+                  }
+                  fmt::print("{}\n", Operon::InfixFormatter::Format(cur.Genotype, problem.GetDataset(), 8));
+              }
+            }
+        }
     } catch (std::exception& e) {
         fmt::print(stderr, "error: {}\n", e.what());
         return EXIT_FAILURE;
@@ -413,3 +418,25 @@ auto main(int argc, char** argv) -> int
 
     return 0;
 }
+
+
+
+void Scale(Operon::Individual& ind, Operon::Span<float const> estimated, Operon::Span<float const> target, Operon::Scalar& a, Operon::Scalar& b) {
+    auto [a_, b_] = Operon::FitLeastSquares(estimated, target);
+    a = static_cast<Operon::Scalar>(a_);
+    b = static_cast<Operon::Scalar>(b_);
+    // add scaling terms to the tree
+    auto& nodes = ind.Genotype.Nodes();
+    auto const sz = nodes.size();
+    if (std::abs(a - Operon::Scalar{1}) > std::numeric_limits<Operon::Scalar>::epsilon()) {
+        nodes.emplace_back(Operon::Node::Constant(a));
+        nodes.emplace_back(Operon::NodeType::Mul);
+    }
+    if (std::abs(b) > std::numeric_limits<Operon::Scalar>::epsilon()) {
+        nodes.emplace_back(Operon::Node::Constant(b));
+        nodes.emplace_back(Operon::NodeType::Add);
+    }
+    if (nodes.size() > sz) {
+        ind.Genotype.UpdateNodes();
+    }
+}
diff --git a/cli/source/util.cpp b/cli/source/util.cpp
index 3058150..0f4ad0d 100644
--- a/cli/source/util.cpp
+++ b/cli/source/util.cpp
@@ -183,6 +183,7 @@ auto InitOptions(std::string const& name, std::string const& desc, int width) ->
         ("disable-symbols", "Comma-separated list of disabled symbols ("+symbols+")", cxxopts::value<std::string>())
         ("symbolic", "Operate in symbolic mode - no coefficient tuning or coefficient mutation", cxxopts::value<bool>()->default_value("false"))
         ("show-primitives", "Display the primitive set used by the algorithm")
+        ("show-pareto-front", "Displays all expressions in the first Pareto front of the final generation", cxxopts::value<bool>()->default_value("false"))
         ("threads", "Number of threads to use for parallelism", cxxopts::value<size_t>()->default_value("0"))
         ("timelimit", "Time limit after which the algorithm will terminate", cxxopts::value<size_t>()->default_value(std::to_string(std::numeric_limits<size_t>::max())))
         ("debug", "Debug mode (more information displayed)")