From 40938c5b31a7f4a529aa7184c2d16449816f5b47 Mon Sep 17 00:00:00 2001
From: "Peter G. Jensen" <root@petergjoel.dk>
Date: Wed, 18 May 2022 14:22:08 +0200
Subject: [PATCH 1/9] should fix hidden-action problem for Q-learning, seems
 without easy fix for M-learning

---
 src/CMakeLists.txt     |  2 +-
 src/MLearning.cpp      | 12 +++++-----
 src/MLearning.h        | 10 ++++-----
 src/QLearning.h        | 12 +++++-----
 src/RefinementTree.cpp | 50 +++++++++++++++++++++++++++++-------------
 src/RefinementTree.h   | 12 +++++-----
 src/SimpleMLearning.h  |  8 +++----
 7 files changed, 64 insertions(+), 42 deletions(-)
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 7ac68d4..f53c93b 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -3,7 +3,7 @@ project(PRLearn C CXX)
 set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_INCLUDE_CURRENT_DIR ON)
 
-find_package(Boost 1.54 REQUIRED COMPONENTS headers REQUIRED)
+find_package(Boost 1.54 REQUIRED)
 
 add_library(prlearn SHARED ${HEADER_FILES} MLearning.cpp SimpleMLearning.cpp RefinementTree.cpp structs.cpp)
 add_library(prlearnStatic STATIC ${HEADER_FILES} MLearning.cpp SimpleMLearning.cpp RefinementTree.cpp structs.cpp)
diff --git a/src/MLearning.cpp b/src/MLearning.cpp
index dae3256..541c7cf 100644
--- a/src/MLearning.cpp
+++ b/src/MLearning.cpp
@@ -1,24 +1,24 @@
 /*
  * Copyright Peter G. Jensen
- *  
+ *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * the Free Software Foundation, either version 3 of the License, or
  * (at your option) any later version.
- * 
+ *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
- * 
+ *
  * You should have received a copy of the GNU Lesser General Public License
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-/* 
+/*
  * File:   MLearning.cpp
  * Author: Peter G. Jensen
- * 
+ *
  * Created on July 25, 2017, 9:58 AM
  */
 
@@ -225,7 +225,7 @@ namespace prlearn {
         }
     }
 
-    void MLearning::update(const std::vector<MLearning>&, bool) 
+    void MLearning::update(const std::vector<MLearning>&, bool)
     {
     }
 
diff --git a/src/MLearning.h b/src/MLearning.h
index 6c41223..53c9b61 100644
--- a/src/MLearning.h
+++ b/src/MLearning.h
@@ -1,22 +1,22 @@
 /*
  * Copyright Peter G. Jensen
- *  
+ *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * the Free Software Foundation, either version 3 of the License, or
  * (at your option) any later version.
- * 
+ *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
- * 
+ *
  * You should have received a copy of the GNU Lesser General Public License
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 
-/* 
+/*
  * File:   MLearning.h
  * Author: Peter G. Jensen
  *
@@ -32,7 +32,7 @@
 #include <map>
 #include <limits>
 
-namespace prlearn { 
+namespace prlearn {
 
     class MLearning {
     public:
diff --git a/src/QLearning.h b/src/QLearning.h
index 3617c2f..7d6e718 100644
--- a/src/QLearning.h
+++ b/src/QLearning.h
@@ -1,21 +1,21 @@
 /*
  * Copyright Peter G. Jensen
- *  
+ *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * the Free Software Foundation, either version 3 of the License, or
  * (at your option) any later version.
- * 
+ *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
- * 
+ *
  * You should have received a copy of the GNU Lesser General Public License
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-/* 
+/*
  * File:   QLearning.h
  * Author: Peter G. Jensen
  *
@@ -46,6 +46,7 @@ namespace prlearn {
 
         void addSample(size_t dimen, // dimensions
                 const double* f_val, const double* t_val, // source, destination-states
+                size_t* next_labels, size_t n_labels,
                 size_t label, // action chosen,
                 size_t dest, double value, // destination, cost
                 const std::vector<QLearning>& clouds, // other points
@@ -67,6 +68,7 @@ namespace prlearn {
     template<typename Regressor>
     void QLearning<Regressor>::addSample(size_t dimen, // dimensions
             const double* f_var, const double* t_var, // doubles
+            size_t* next_labels, size_t n_labels,     // possible actions
             size_t label, size_t dest, double value, // cost
             const std::vector<QLearning<Regressor>>&clouds, // other points
             bool minimization, const double delta, const propts_t& options) {
@@ -74,7 +76,7 @@ namespace prlearn {
         auto toDone = 0.0;
 
         if (dest != 0 && options._discount != 0)
-            toDone = clouds[dest]._regressor.getBestQ(t_var, minimization); // 0 is a special sink-node.
+            toDone = clouds[dest]._regressor.getBestQ(t_var, minimization, next_labels, n_labels); // 0 is a special sink-node.
         auto nval = value;
         // if future is not a weird number, then add it (discounted)
         if (!std::isinf(toDone) && !std::isnan(toDone)) {
diff --git a/src/RefinementTree.cpp b/src/RefinementTree.cpp
index 158b9ab..b5604df 100644
--- a/src/RefinementTree.cpp
+++ b/src/RefinementTree.cpp
@@ -1,24 +1,24 @@
 /*
  * Copyright Peter G. Jensen
- *  
+ *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * the Free Software Foundation, either version 3 of the License, or
  * (at your option) any later version.
- * 
+ *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
- * 
+ *
  * You should have received a copy of the GNU Lesser General Public License
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-/* 
+/*
  * File:   RefinementTree.cpp
  * Author: Peter G. Jensen
- * 
+ *
  * Created on July 18, 2017, 5:09 PM
  */
 
@@ -62,7 +62,7 @@ namespace prlearn {
     }
 
     qvar_t
-    RefinementTree::lookup(size_t label, const double* point, size_t) const {
+    RefinementTree::lookup(size_t label, const double* point, size_t dimen) const {
         el_t lf(label);
         auto res = std::lower_bound(std::begin(_mapping), std::end(_mapping), lf);
         if (res == std::end(_mapping) || res->_label != label)
@@ -73,17 +73,37 @@ namespace prlearn {
         return qvar_t(node._predictor._q.avg(), node._predictor._cnt, node._predictor._q._variance);
     }
 
-    double RefinementTree::getBestQ(const double* point, bool minimization) const {
+    double RefinementTree::getBestQ(const double* point, bool minimization, size_t* next_labels, size_t n_labels) const {
         auto val = std::numeric_limits<double>::infinity();
         if (!minimization)
             val = -val;
-        for (const el_t& el : _mapping) {
-            auto node = _nodes[el._nid].get_leaf(point, el._nid, _nodes);
-            auto v = _nodes[node]._predictor._q.avg();
-            if (!std::isinf(v) && !std::isnan(v))
-                val = minimization ?
-                    std::min(v, val) :
-                std::max(v, val);
+        if(next_labels == nullptr)
+        {
+            for (const el_t& el : _mapping) {
+                auto node = _nodes[el._nid].get_leaf(point, el._nid, _nodes);
+                auto v = _nodes[node]._predictor._q.avg();
+                if (!std::isinf(v) && !std::isnan(v))
+                    val = minimization ?
+                        std::min(v, val) :
+                    std::max(v, val);
+            }
+        }
+        else {
+            size_t j = 0;
+            for(size_t i = 0; i < n_labels; ++i)
+            {
+                for(;j < _mapping.size() && _mapping[j]._label < next_labels[i]; ++j) {};
+                if(j >= _mapping.size()) return val;
+                if(_mapping[j]._label != next_labels[i])
+                    continue;
+                const auto& res = _mapping[i];
+                auto node = _nodes[res._nid].get_leaf(point, res._nid, _nodes);
+                auto v = _nodes[node]._predictor._q.avg();
+                if (!std::isinf(v) && !std::isnan(v))
+                    val = minimization ?
+                        std::min(v, val) :
+                    std::max(v, val);
+            }
         }
         return val;
     }
@@ -162,7 +182,7 @@ namespace prlearn {
                 _predictor._data[i]._hmid += point[i];
             }
 
-            // update the split-filters 
+            // update the split-filters
             _predictor._data[i]._splitfilter.add(_predictor._data[i]._lowq,
                     _predictor._data[i]._highq,
                     delta * options._indefference,
diff --git a/src/RefinementTree.h b/src/RefinementTree.h
index eb3794b..45924f4 100644
--- a/src/RefinementTree.h
+++ b/src/RefinementTree.h
@@ -1,22 +1,22 @@
 /*
  * Copyright Peter G. Jensen
- *  
+ *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * the Free Software Foundation, either version 3 of the License, or
  * (at your option) any later version.
- * 
+ *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
- * 
+ *
  * You should have received a copy of the GNU Lesser General Public License
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 
-/* 
+/*
  * File:   RefinementTree.h
  * Author: Peter G. Jensen
  *
@@ -48,7 +48,7 @@ namespace prlearn {
 
         void print(std::ostream& s, size_t tabs, std::map<size_t, size_t>& edge_map) const;
 
-        double getBestQ(const double* val, bool minimization) const;
+        double getBestQ(const double* val, bool minimization, size_t* next_labels = nullptr, size_t n_labels = 0) const;
 
     protected:
 
@@ -78,7 +78,7 @@ namespace prlearn {
         };
 
         struct node_t {
-            // we could do these two values as a single pointer 
+            // we could do these two values as a single pointer
             // which dynamically allocates enough space for both split and pred_t
             // including space for the run-time sized arrays.
             // however, this is at current time of writing a premature optimization.
diff --git a/src/SimpleMLearning.h b/src/SimpleMLearning.h
index e2f9776..c66ada2 100644
--- a/src/SimpleMLearning.h
+++ b/src/SimpleMLearning.h
@@ -1,22 +1,22 @@
 /*
  * Copyright Peter G. Jensen
- *  
+ *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * the Free Software Foundation, either version 3 of the License, or
  * (at your option) any later version.
- * 
+ *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
- * 
+ *
  * You should have received a copy of the GNU Lesser General Public License
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 
-/* 
+/*
  * File:   SimpleMLearning.h
  * Author: Peter G. Jensen
  *

From 757eb9c92bf1171b902b6da2a66ceb07e7217554 Mon Sep 17 00:00:00 2001
From: "Peter G. Jensen" <root@petergjoel.dk>
Date: Wed, 18 May 2022 14:27:52 +0200
Subject: [PATCH 2/9] fixing warning

---
 src/RefinementTree.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/RefinementTree.cpp b/src/RefinementTree.cpp
index b5604df..b7c349f 100644
--- a/src/RefinementTree.cpp
+++ b/src/RefinementTree.cpp
@@ -62,12 +62,11 @@ namespace prlearn {
     }
 
     qvar_t
-    RefinementTree::lookup(size_t label, const double* point, size_t dimen) const {
+    RefinementTree::lookup(size_t label, const double* point) const {
         el_t lf(label);
         auto res = std::lower_bound(std::begin(_mapping), std::end(_mapping), lf);
         if (res == std::end(_mapping) || res->_label != label)
             return qvar_t(std::numeric_limits<double>::quiet_NaN(), 0, 0);
-        assert(dimen == _dimen);
         auto n = _nodes[res->_nid].get_leaf(point, res->_nid, _nodes);
         auto& node = _nodes[n];
         return qvar_t(node._predictor._q.avg(), node._predictor._cnt, node._predictor._q._variance);

From eba3adee3b891b45a8a6c64056ff807b784b4c82 Mon Sep 17 00:00:00 2001
From: "Peter G. Jensen" <root@petergjoel.dk>
Date: Wed, 18 May 2022 14:47:30 +0200
Subject: [PATCH 3/9] fixing getBestQ of simple regressor type

---
 src/SimpleRegressor.h | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/src/SimpleRegressor.h b/src/SimpleRegressor.h
index bb79bd3..239dbc2 100644
--- a/src/SimpleRegressor.h
+++ b/src/SimpleRegressor.h
@@ -1,21 +1,21 @@
 /*
  * Copyright Peter G. Jensen
- *  
+ *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * the Free Software Foundation, either version 3 of the License, or
  * (at your option) any later version.
- * 
+ *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
- * 
+ *
  * You should have received a copy of the GNU Lesser General Public License
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-/* 
+/*
  * File:   SimpleRegressor.h
  * Author: Peter G. Jensen
  *
@@ -52,15 +52,24 @@ namespace prlearn {
                 return qvar_t{std::numeric_limits<double>::quiet_NaN(), 0, 0};
         }
 
-        double getBestQ(const double*, bool minimization) const {
+        double getBestQ(const double*, bool minimization, size_t* next_labels = nullptr, size_t n_labels = 0) const {
             double res = std::numeric_limits<double>::infinity();
             if (!minimization)
                 res = -res;
+            size_t j = 0;
             for (auto& e : _labels)
+            {
+                if(next_labels != nullptr)
+                {
+                    for(;j < n_labels && next_labels[j] < e._label; ++j) {}
+                    if(j >= n_labels) return res;
+                    if(next_labels[j] != e._label) continue;
+                }
                 if (!std::isinf(e._value.avg()) && !std::isnan(e._value.avg()))
                     res = minimization ?
                         std::min(res, e._value.avg()) :
                     std::max(res, e._value.avg());
+            }
             return res;
         }
 

From 97eba9b4f303c9050e0cad62a014e733238b3de0 Mon Sep 17 00:00:00 2001
From: "Peter G. Jensen" <root@petergjoel.dk>
Date: Wed, 18 May 2022 14:55:32 +0200
Subject: [PATCH 4/9] making interfaces compatible between m and q learning

---
 src/MLearning.cpp       |  3 ++-
 src/MLearning.h         |  1 +
 src/SimpleMLearning.cpp | 12 ++++++------
 src/SimpleMLearning.h   |  1 +
 4 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/src/MLearning.cpp b/src/MLearning.cpp
index 541c7cf..495d254 100644
--- a/src/MLearning.cpp
+++ b/src/MLearning.cpp
@@ -104,7 +104,8 @@ namespace prlearn {
     }
 
     void MLearning::addSample(size_t dimen, const double* f_var,
-            const double* t_var, size_t label,
+            const double* t_var, size_t*, size_t,
+            size_t label,
             size_t dest, double value, const std::vector<MLearning>& clouds,
             bool minimization, const double delta,
             const propts_t& options) {
diff --git a/src/MLearning.h b/src/MLearning.h
index 53c9b61..86e0cbd 100644
--- a/src/MLearning.h
+++ b/src/MLearning.h
@@ -45,6 +45,7 @@ namespace prlearn {
 
         void addSample(size_t dimen, // dimensions
                 const double* f_var, const double* t_var, // doubles
+                size_t* next_labels, size_t n_labels, // actions in dest, ignored in m learning
                 size_t label, // edge chosen, edge taken
                 size_t dest, double value, // cost
                 const std::vector<MLearning>& clouds, // other points
diff --git a/src/SimpleMLearning.cpp b/src/SimpleMLearning.cpp
index 472d799..6ce3774 100644
--- a/src/SimpleMLearning.cpp
+++ b/src/SimpleMLearning.cpp
@@ -1,22 +1,22 @@
 /*
  * Copyright Peter G. Jensen
- *  
+ *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * the Free Software Foundation, either version 3 of the License, or
  * (at your option) any later version.
- * 
+ *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
- * 
+ *
  * You should have received a copy of the GNU Lesser General Public License
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 
-/* 
+/*
  * File:   SimpleMLearning.cpp
  * Author: Peter G. Jensen
  *
@@ -30,7 +30,7 @@ namespace prlearn {
     SimpleMLearning::~SimpleMLearning() {
     }
 
-    void SimpleMLearning::addSample(size_t, const double*, const double*, size_t label, size_t dest, double value, const std::vector<SimpleMLearning>& clouds, bool minimization, const double, const propts_t& options) {
+    void SimpleMLearning::addSample(size_t, const double*, const double*, size_t*, size_t, size_t label, size_t dest, double value, const std::vector<SimpleMLearning>& clouds, bool minimization, const double, const propts_t& options) {
         node_t act;
         act._label = label;
         auto lb = std::lower_bound(std::begin(_nodes), std::end(_nodes), act);
@@ -63,7 +63,7 @@ namespace prlearn {
         for (size_t i = 0; i < tabs; ++i) s << "\t";
         s << "{\"id\":" << (this - other.data()) << ",";
         bool first = true;
-        
+
         for (auto& el : _nodes) {
             if (!first) s << ",";
             first = false;
diff --git a/src/SimpleMLearning.h b/src/SimpleMLearning.h
index c66ada2..cb820a9 100644
--- a/src/SimpleMLearning.h
+++ b/src/SimpleMLearning.h
@@ -46,6 +46,7 @@ namespace prlearn {
 
         void addSample(size_t dimen, // dimensions
                 const double*, const double*, // doubles
+                size_t* next_labels, size_t n_labels, // ignored by m-learning
                 size_t label,
                 size_t dest, double value, // cost
                 const std::vector<SimpleMLearning>& clouds, // other points

From a5abe2616fd5f556c9a875061bcbfb5dcff52c5b Mon Sep 17 00:00:00 2001
From: "Peter G. Jensen" <root@petergjoel.dk>
Date: Wed, 18 May 2022 15:04:00 +0200
Subject: [PATCH 5/9] missing variable

---
 src/RefinementTree.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/RefinementTree.cpp b/src/RefinementTree.cpp
index b7c349f..2b49b36 100644
--- a/src/RefinementTree.cpp
+++ b/src/RefinementTree.cpp
@@ -62,7 +62,7 @@ namespace prlearn {
     }
 
     qvar_t
-    RefinementTree::lookup(size_t label, const double* point) const {
+    RefinementTree::lookup(size_t label, const double* point, size_t) const {
         el_t lf(label);
         auto res = std::lower_bound(std::begin(_mapping), std::end(_mapping), lf);
         if (res == std::end(_mapping) || res->_label != label)

From d37496233cc1ab25ff0e43f0eac75cc84b95ea8c Mon Sep 17 00:00:00 2001
From: "Peter G. Jensen" <root@petergjoel.dk>
Date: Wed, 18 May 2022 15:38:13 +0200
Subject: [PATCH 6/9] using wrong variable

---
 src/RefinementTree.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/RefinementTree.cpp b/src/RefinementTree.cpp
index 2b49b36..2d5b2dd 100644
--- a/src/RefinementTree.cpp
+++ b/src/RefinementTree.cpp
@@ -95,7 +95,7 @@ namespace prlearn {
                 if(j >= _mapping.size()) return val;
                 if(_mapping[j]._label != next_labels[i])
                     continue;
-                const auto& res = _mapping[i];
+                const auto& res = _mapping[j];
                 auto node = _nodes[res._nid].get_leaf(point, res._nid, _nodes);
                 auto v = _nodes[node]._predictor._q.avg();
                 if (!std::isinf(v) && !std::isnan(v))

From 1b9e4f1e88ec0e9bdffe127311939942b4fed15e Mon Sep 17 00:00:00 2001
From: "Peter G. Jensen" <root@petergjoel.dk>
Date: Fri, 5 Aug 2022 14:19:56 +0200
Subject: [PATCH 7/9] switched to correct computation of variance

---
 src/MLearning.cpp       | 48 ++++++++++++++--------------------------
 src/RefinementTree.cpp  |  6 ++---
 src/SimpleMLearning.cpp |  4 ++--
 src/SimpleRegressor.h   |  2 +-
 src/structs.cpp         | 49 +++++++++++------------------------------
 src/structs.h           | 35 ++++++++++++++++++++++-------
 6 files changed, 63 insertions(+), 81 deletions(-)

diff --git a/src/MLearning.cpp b/src/MLearning.cpp
index 495d254..7b7aabe 100644
--- a/src/MLearning.cpp
+++ b/src/MLearning.cpp
@@ -242,11 +242,13 @@ namespace prlearn {
         avg_t mean, old_mean;
         std::vector<qvar_t> sample_qvar;
         std::vector<qvar_t> old_var;
+        avg_t svar, ovar;
+
         double fut = 0;
         for (auto& s : _samples) {
             auto best = minimize ? std::numeric_limits<double>::infinity() :
                     -std::numeric_limits<double>::infinity();
-            double var = 0;
+            double squared = 0;
             if (s._size == 0 || s._cloud == 0 || discount == 0) {
                 best = 0;
             } else {
@@ -255,10 +257,10 @@ namespace prlearn {
                     auto c = clouds[s._cloud]._nodes[s._nodes[i]]._q.avg();
                     fut = std::min(fut, c);
                     if (c == best)
-                        var = std::min(var, clouds[s._cloud]._nodes[s._nodes[i]]._q._variance);
+                        squared = std::min(squared, clouds[s._cloud]._nodes[s._nodes[i]]._q.squared());
                     else if ((c < best && minimize) || (c > best && !minimize)) {
                         best = c;
-                        var = clouds[s._cloud]._nodes[s._nodes[i]]._q._variance;
+                        squared = clouds[s._cloud]._nodes[s._nodes[i]]._q.squared();
                     }
                 }
             }
@@ -269,14 +271,14 @@ namespace prlearn {
             best *= discount;
             // dont look too far into the future for the variance.
             // if we do, it will grow in horrible ways and be useless.
-            var *= std::min(0.5, discount);
+            squared *= std::min(0.5, discount);
             for (size_t d = 0; d < dimen; ++d) {
                 if (s._variance) {
                     auto v = s._variance[d];
                     v.first.avg() += best;
                     v.second.avg() += best;
-                    v.first._variance = std::max(v.first._variance, var);
-                    v.second._variance = std::max(v.second._variance, var);
+                    v.first.squared() = std::max(v.first.squared(), squared);
+                    v.second.squared() = std::max(v.second.squared(), squared);
                     tmpq[d].first.addPoints(v.first.cnt(), v.first.avg());
                     tmpq[d].second.addPoints(v.second.cnt(), v.second.avg());
                     mean.addPoints(v.first.cnt(), v.first.avg());
@@ -288,8 +290,8 @@ namespace prlearn {
                     auto v = s._old[d];
                     v.first.avg() += best;
                     v.second.avg() += best;
-                    v.first._variance = std::max(v.first._variance, var);
-                    v.second._variance = std::max(v.second._variance, var);
+                    v.first.squared() = std::max(v.first.squared(), squared);
+                    v.second.squared() = std::max(v.second.squared(), squared);
                     old_mean.addPoints(v.first.cnt(), v.first.avg());
                     old_mean.addPoints(v.second.cnt(), v.second.avg());
                     old_var.push_back(v.first);
@@ -298,44 +300,28 @@ namespace prlearn {
             }
         }
 
-        avg_t svar, ovar;
+
         auto vars = std::make_unique < avg_t[]>(dimen * 2);
         bool first = true;
         size_t dimcnt = 0;
         for (auto& s : sample_qvar) {
-            {
-                const auto dif = std::abs(s.avg() - mean._avg);
-                const auto std = std::sqrt(s._variance);
-                auto var = (std::pow(dif + std, 2.0) + std::pow(dif - std, 2.0)) / 2.0;
-                svar.addPoints(s.cnt(), var);
-            }
             auto id = dimcnt;
-            auto dmin = tmpq[id].first.avg();
             if (!first) {
-                dmin = tmpq[dimcnt].second.avg();
                 id = dimen + dimcnt;
             }
-            {
-                const auto dif = std::abs(s.avg() - dmin);
-                const auto std = std::sqrt(s._variance);
-                auto var = (std::pow(dif + std, 2.0) + std::pow(dif - std, 2.0)) / 2.0;
-                vars[id].addPoints(s.cnt(), var);
-            }
+            vars[id].addPoints(s.cnt(), s.squared());
             if (!first)
                 dimcnt = (dimcnt + 1) % dimen;
             first = !first;
+            svar.addPoints(s.cnt(), s.squared());
         }
 
-        for (auto& s : old_var) {
-            const auto dif = std::abs(s.avg() - old_mean._avg);
-            const auto std = std::sqrt(s._variance);
-            auto var = (std::pow(dif + std, 2.0) + std::pow(dif - std, 2.0)) / 2.0;
-            ovar.addPoints(s.cnt(), var);
-        }
+        for (auto& s : old_var)
+            ovar.addPoints(s.cnt(), s.squared());
 
         for (size_t i = 0; i < dimen; ++i) {
-            tmpq[i].first._variance = vars[i]._avg;
-            tmpq[i].second._variance = vars[i + dimen]._avg;
+            tmpq[i].first.squared() = vars[i]._avg;
+            tmpq[i].second.squared() = vars[i + dimen]._avg;
         }
 
         qvar_t nq(mean._avg, mean._cnt / (dimen * 2), svar._avg);
diff --git a/src/RefinementTree.cpp b/src/RefinementTree.cpp
index 2d5b2dd..86e361d 100644
--- a/src/RefinementTree.cpp
+++ b/src/RefinementTree.cpp
@@ -69,7 +69,7 @@ namespace prlearn {
             return qvar_t(std::numeric_limits<double>::quiet_NaN(), 0, 0);
         auto n = _nodes[res->_nid].get_leaf(point, res->_nid, _nodes);
         auto& node = _nodes[n];
-        return qvar_t(node._predictor._q.avg(), node._predictor._cnt, node._predictor._q._variance);
+        return qvar_t(node._predictor._q.avg(), node._predictor._cnt, node._predictor._q.squared());
     }
 
     double RefinementTree::getBestQ(const double* point, bool minimization, size_t* next_labels, size_t n_labels) const {
@@ -232,12 +232,12 @@ namespace prlearn {
                 if (nodes[slow]._predictor._q.cnt() == 0) {
                     nodes[slow]._predictor._q.cnt() = 1;
                     nodes[slow]._predictor._q.avg() = oq.avg();
-                    nodes[slow]._predictor._q._variance = 0;
+                    nodes[slow]._predictor._q.squared() = std::pow(oq.avg(), 2.0);
                 }
                 if (nodes[shigh]._predictor._q.cnt() == 0) {
                     nodes[shigh]._predictor._q.cnt() = 1;
                     nodes[shigh]._predictor._q.avg() = oq.avg();
-                    nodes[shigh]._predictor._q._variance = 0;
+                    nodes[shigh]._predictor._q.squared() = std::pow(oq.avg(), 2.0);
                 }
             }
             nodes[shigh]._predictor._cnt = nodes[shigh]._predictor._q.cnt();
diff --git a/src/SimpleMLearning.cpp b/src/SimpleMLearning.cpp
index 6ce3774..65b1da9 100644
--- a/src/SimpleMLearning.cpp
+++ b/src/SimpleMLearning.cpp
@@ -110,14 +110,14 @@ namespace prlearn {
             for(auto& s : n._succssors)
             {
                 const auto dif = std::abs(s._cost.avg() - nq._avg);
-                const auto std = std::sqrt(s._cost._variance);
+                const auto std = std::sqrt(s._cost.variance());
                 auto var = (std::pow(dif + std, 2.0) + std::pow(dif - std, 2.0)) / 2.0;
                 nv.addPoints(s._cost.cnt(), var);
             }
             n._q = qvar_t(nq._avg, nq._cnt, nv._avg);
             if ((minimization && n._q.avg() <= rq.avg()) ||
                     (!minimization && n._q.avg() >= rq.avg())) {
-                if(n._q.avg() != rq.avg() || n._q._variance < rq._variance || n._q.cnt() > rq.cnt())
+                if(n._q.avg() != rq.avg() || n._q.variance() < rq.variance() || n._q.cnt() > rq.cnt())
                     rq = n._q;
             }
         }
diff --git a/src/SimpleRegressor.h b/src/SimpleRegressor.h
index 239dbc2..25f2154 100644
--- a/src/SimpleRegressor.h
+++ b/src/SimpleRegressor.h
@@ -47,7 +47,7 @@ namespace prlearn {
             auto res = std::lower_bound(std::begin(_labels), std::end(_labels), lf);
 
             if (res != std::end(_labels) && res->_label == label)
-                return qvar_t{res->_value.avg(), (double)res->_cnt, res->_value._variance};
+                return qvar_t{res->_value.avg(), (double)res->_cnt, res->_value.squared()};
             else
                 return qvar_t{std::numeric_limits<double>::quiet_NaN(), 0, 0};
         }
diff --git a/src/structs.cpp b/src/structs.cpp
index b4c3692..dc1d056 100644
--- a/src/structs.cpp
+++ b/src/structs.cpp
@@ -1,21 +1,21 @@
 /*
  * Copyright Peter G. Jensen
- *  
+ *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * the Free Software Foundation, either version 3 of the License, or
  * (at your option) any later version.
- * 
+ *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
- * 
+ *
  * You should have received a copy of the GNU Lesser General Public License
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-/* 
+/*
  * File:   structs.cpp
  * Author: Peter G. Jensen
  *
@@ -44,7 +44,7 @@ namespace prlearn {
     void qvar_t::print(std::ostream& stream) const {
         stream << "[";
         stream << (*(avg_t*)this);
-        stream << ", " << _variance << "]";
+        stream << ", " << variance() << "]";
     }
 
     std::ostream& operator<<(std::ostream& o, const qvar_t& v) {
@@ -59,29 +59,15 @@ namespace prlearn {
             return a;
         qvar_t res = a;
         res.addPoints(b._cnt, b._avg);
-        const auto adif = std::abs(res._avg - a._avg);
-        const auto bdif = std::abs(res._avg - b._avg);
-        const auto astd = std::sqrt(a._variance);
-        const auto bstd = std::sqrt(b._variance);
-        auto ca = std::pow(adif + astd, 2.0) + std::pow(adif - astd, 2.0);
-        auto cb = std::pow(bdif + bstd, 2.0) + std::pow(bdif - bstd, 2.0);
-        avg_t tmp;
-        tmp.addPoints(a._cnt, ca / 2.0);
-        tmp.addPoints(b._cnt, cb / 2.0);
-        res._variance = tmp._avg;
+        res._sq = (a._sq * (a._cnt / res._cnt)) + (b._sq * (b._cnt / res._cnt));
         return res;
     }
 
     qvar_t& qvar_t::operator+=(double d) {
         assert(!std::isinf(d));
         avg_t::operator+=(d);
-        auto nvar = std::pow(d - _avg, 2.0);
-        assert(!std::isinf(nvar));
-        if (_cnt == 1) _variance = nvar;
-        else {
-            nvar -= _variance;
-            _variance += nvar / _cnt;
-        }
+        auto diff = std::pow(d, 2.0) - _sq;
+        _sq += diff / _cnt;
         return *this;
     }
 
@@ -89,18 +75,9 @@ namespace prlearn {
         assert(weight >= 0);
         assert(_cnt >= 0);
         if (weight == 0) return;
-        auto oa = _avg;
         avg_t::addPoints(weight, d);
-        auto nvar = std::abs((d - oa)*(d - _avg));
-        assert(!std::isinf(nvar));
-        if (_cnt == weight) _variance = nvar;
-        else {
-            nvar -= _variance;
-            _variance += (nvar * weight) / _cnt;
-        }
-        assert(_variance >= 0);
-        assert(!std::isnan(_variance));
-        assert(!std::isinf(_variance));
+        auto diff = std::pow(d, 2.0) - _sq;
+        _sq += diff * (weight / _cnt);
     }
 
     double triangular_cdf(double mid, double width, double point) {
@@ -117,10 +94,10 @@ namespace prlearn {
         constexpr double minvar = 0.0001;
         if (std::min(a.cnt(), b.cnt()) <= 1)
             return;
-        if (a._variance == b._variance && a.avg() == b.avg())
+        if (a.variance() == b.variance() && a.avg() == b.avg())
             return;
-        auto vara = std::max(minvar, a._variance);
-        auto varb = std::max(minvar, b._variance);
+        auto vara = std::max(minvar, a.variance());
+        auto varb = std::max(minvar, b.variance());
 
         double tval = std::abs(a.avg() - b.avg()) / std::sqrt(((vara * a.cnt()) + (varb * b.cnt())) / (a.cnt() * b.cnt()));
 
diff --git a/src/structs.h b/src/structs.h
index 6ca44fa..7b9d230 100644
--- a/src/structs.h
+++ b/src/structs.h
@@ -1,21 +1,21 @@
 /*
  * Copyright Peter G. Jensen
- *  
+ *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * the Free Software Foundation, either version 3 of the License, or
  * (at your option) any later version.
- * 
+ *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
- * 
+ *
  * You should have received a copy of the GNU Lesser General Public License
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-/* 
+/*
  * File:   structs.h
  * Author: Peter G. Jensen
  *
@@ -33,6 +33,8 @@
 #include <cassert>
 #include <vector>
 #include <ostream>
+#include <iostream>
+
 namespace prlearn {
 
     struct avg_t {
@@ -54,7 +56,7 @@ namespace prlearn {
             } else {
                 _cnt += weight;
                 double diff = d - _avg;
-                _avg += ((diff * weight) / (double) _cnt); // add only "share" of difference
+                _avg += diff * (weight / _cnt); // add only "share" of difference
             }
             assert(!std::isnan(_avg));
         }
@@ -96,15 +98,14 @@ namespace prlearn {
 
         qvar_t() = default;
 
-        qvar_t(double d, double w, double v) {
+        qvar_t(double d, double w, double squared) {
             _avg = d;
             _cnt = w;
-            _variance = v;
+            _sq = squared;
         };
         // this is a dirty hijack!
         qvar_t& operator+=(double d);
         void addPoints(double weight, double d);
-        double _variance = 0;
 
         auto& avg() {
             return _avg;
@@ -127,6 +128,24 @@ namespace prlearn {
         }
         void print(std::ostream& stream) const;
         static qvar_t approximate(const qvar_t& a, const qvar_t& b);
+        double variance() const {
+            auto pow = std::pow(_avg, 2.0);
+            if(pow >= _sq)
+                return 0;
+            auto var = std::sqrt(_sq - pow);
+            return var;
+        }
+
+        double& squared() {
+            return _sq;
+        }
+
+        const double& squared() const {
+            return _sq;
+        }
+
+    private:
+        double _sq = 0;
     };
 
     struct splitfilter_t {

From 949204932e25eea91269717d1a471ba6f621368a Mon Sep 17 00:00:00 2001
From: "Peter G. Jensen" <root@petergjoel.dk>
Date: Fri, 5 Aug 2022 14:57:04 +0200
Subject: [PATCH 8/9] mimicking old Mlearning setup

---
 src/MLearning.cpp | 54 ++++++++++++++++++++++++++++++-----------------
 src/structs.h     | 11 ++++++----
 2 files changed, 42 insertions(+), 23 deletions(-)

diff --git a/src/MLearning.cpp b/src/MLearning.cpp
index 7b7aabe..2173dc7 100644
--- a/src/MLearning.cpp
+++ b/src/MLearning.cpp
@@ -242,13 +242,11 @@ namespace prlearn {
         avg_t mean, old_mean;
         std::vector<qvar_t> sample_qvar;
         std::vector<qvar_t> old_var;
-        avg_t svar, ovar;
-
         double fut = 0;
         for (auto& s : _samples) {
             auto best = minimize ? std::numeric_limits<double>::infinity() :
                     -std::numeric_limits<double>::infinity();
-            double squared = 0;
+            double var = 0;
             if (s._size == 0 || s._cloud == 0 || discount == 0) {
                 best = 0;
             } else {
@@ -257,10 +255,10 @@ namespace prlearn {
                     auto c = clouds[s._cloud]._nodes[s._nodes[i]]._q.avg();
                     fut = std::min(fut, c);
                     if (c == best)
-                        squared = std::min(squared, clouds[s._cloud]._nodes[s._nodes[i]]._q.squared());
+                        var = std::min(var, clouds[s._cloud]._nodes[s._nodes[i]]._q.variance());
                     else if ((c < best && minimize) || (c > best && !minimize)) {
                         best = c;
-                        squared = clouds[s._cloud]._nodes[s._nodes[i]]._q.squared();
+                        var = clouds[s._cloud]._nodes[s._nodes[i]]._q.variance();
                     }
                 }
             }
@@ -271,14 +269,14 @@ namespace prlearn {
             best *= discount;
             // dont look too far into the future for the variance.
             // if we do, it will grow in horrible ways and be useless.
-            squared *= std::min(0.5, discount);
+            var *= std::min(0.5, discount);
             for (size_t d = 0; d < dimen; ++d) {
                 if (s._variance) {
                     auto v = s._variance[d];
                     v.first.avg() += best;
                     v.second.avg() += best;
-                    v.first.squared() = std::max(v.first.squared(), squared);
-                    v.second.squared() = std::max(v.second.squared(), squared);
+                    v.first.set_variance(std::max(v.first.variance(), var));
+                    v.second.set_variance(std::max(v.second.variance(), var));
                     tmpq[d].first.addPoints(v.first.cnt(), v.first.avg());
                     tmpq[d].second.addPoints(v.second.cnt(), v.second.avg());
                     mean.addPoints(v.first.cnt(), v.first.avg());
@@ -290,8 +288,8 @@ namespace prlearn {
                     auto v = s._old[d];
                     v.first.avg() += best;
                     v.second.avg() += best;
-                    v.first.squared() = std::max(v.first.squared(), squared);
-                    v.second.squared() = std::max(v.second.squared(), squared);
+                    v.first.set_variance(std::max(v.first.variance(), var));
+                    v.second.set_variance(std::max(v.second.variance(), var));
                     old_mean.addPoints(v.first.cnt(), v.first.avg());
                     old_mean.addPoints(v.second.cnt(), v.second.avg());
                     old_var.push_back(v.first);
@@ -300,32 +298,50 @@ namespace prlearn {
             }
         }
 
-
+        avg_t svar, ovar;
         auto vars = std::make_unique < avg_t[]>(dimen * 2);
         bool first = true;
         size_t dimcnt = 0;
         for (auto& s : sample_qvar) {
+            {
+                const auto dif = std::abs(s.avg() - mean._avg);
+                const auto std = std::sqrt(s.variance());
+                auto var = (std::pow(dif + std, 2.0) + std::pow(dif - std, 2.0)) / 2.0;
+                svar.addPoints(s.cnt(), var);
+            }
             auto id = dimcnt;
+            auto dmin = tmpq[id].first.avg();
             if (!first) {
+                dmin = tmpq[dimcnt].second.avg();
                 id = dimen + dimcnt;
             }
-            vars[id].addPoints(s.cnt(), s.squared());
+            {
+                const auto dif = std::abs(s.avg() - dmin);
+                const auto std = std::sqrt(s.variance());
+                auto var = (std::pow(dif + std, 2.0) + std::pow(dif - std, 2.0)) / 2.0;
+                vars[id].addPoints(s.cnt(), var);
+            }
             if (!first)
                 dimcnt = (dimcnt + 1) % dimen;
             first = !first;
-            svar.addPoints(s.cnt(), s.squared());
         }
 
-        for (auto& s : old_var)
-            ovar.addPoints(s.cnt(), s.squared());
+        for (auto& s : old_var) {
+            const auto dif = std::abs(s.avg() - old_mean._avg);
+            const auto std = std::sqrt(s.variance());
+            auto var = (std::pow(dif + std, 2.0) + std::pow(dif - std, 2.0)) / 2.0;
+            ovar.addPoints(s.cnt(), var);
+        }
 
         for (size_t i = 0; i < dimen; ++i) {
-            tmpq[i].first.squared() = vars[i]._avg;
-            tmpq[i].second.squared() = vars[i + dimen]._avg;
+            tmpq[i].first.set_variance(vars[i]._avg);
+            tmpq[i].second.set_variance(vars[i + dimen]._avg);
         }
 
-        qvar_t nq(mean._avg, mean._cnt / (dimen * 2), svar._avg);
-        qvar_t oq(old_mean._avg, old_mean._cnt / (dimen * 2), ovar._avg);
+        qvar_t nq(mean._avg, mean._cnt / (dimen * 2), 0);
+        nq.set_variance(svar._avg);
+        qvar_t oq(old_mean._avg, old_mean._cnt / (dimen * 2), 0);
+        oq.set_variance(ovar._avg);
         return std::make_pair(nq, oq);
     }
 
diff --git a/src/structs.h b/src/structs.h
index 7b9d230..fbbfe5f 100644
--- a/src/structs.h
+++ b/src/structs.h
@@ -132,14 +132,17 @@ namespace prlearn {
             auto pow = std::pow(_avg, 2.0);
             if(pow >= _sq)
                 return 0;
-            auto var = std::sqrt(_sq - pow);
-            return var;
+            return _sq - pow;
         }
 
-        double& squared() {
-            return _sq;
+        void set_variance(double var) {
+            _sq = std::pow(_avg, 2.0) + var;
         }
 
+       double& squared() {
+            return _sq;
+       }
+
         const double& squared() const {
             return _sq;
         }

From c5861eda6d781fd4eb397759b8452bff46b1499b Mon Sep 17 00:00:00 2001
From: "Peter G. Jensen" <root@petergjoel.dk>
Date: Sat, 7 Oct 2023 10:47:45 +0200
Subject: [PATCH 9/9] fixing constraint lookup

---
 src/RefinementTree.cpp | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/RefinementTree.cpp b/src/RefinementTree.cpp
index 86e361d..0c8c26a 100644
--- a/src/RefinementTree.cpp
+++ b/src/RefinementTree.cpp
@@ -88,13 +88,12 @@ namespace prlearn {
             }
         }
         else {
-            size_t j = 0;
             for(size_t i = 0; i < n_labels; ++i)
             {
+                size_t j = 0;
                 for(;j < _mapping.size() && _mapping[j]._label < next_labels[i]; ++j) {};
-                if(j >= _mapping.size()) return val;
-                if(_mapping[j]._label != next_labels[i])
-                    continue;
+                if(j >= _mapping.size()) continue;
+                if(_mapping[j]._label != next_labels[i]) continue;
                 const auto& res = _mapping[j];
                 auto node = _nodes[res._nid].get_leaf(point, res._nid, _nodes);
                 auto v = _nodes[node]._predictor._q.avg();