Merge pull request #5 from DEIS-Tools/hidden_actions

Fixing issues with hidden actions. Also improves on variance computation.
DEIS-Tools · Oct 19, 2023 · 9358116 · 9358116
2 parents 65df567 + c5861ed
commit 9358116
Show file tree

Hide file tree

Showing 11 changed files with 150 additions and 117 deletions.
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
@@ -3,7 +3,7 @@ project(PRLearn C CXX)
 set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_INCLUDE_CURRENT_DIR ON)
 
-find_package(Boost 1.54 REQUIRED COMPONENTS headers REQUIRED)
+find_package(Boost 1.54 REQUIRED)
 
 add_library(prlearn SHARED ${HEADER_FILES} MLearning.cpp SimpleMLearning.cpp RefinementTree.cpp structs.cpp)
 add_library(prlearnStatic STATIC ${HEADER_FILES} MLearning.cpp SimpleMLearning.cpp RefinementTree.cpp structs.cpp)

diff --git a/src/MLearning.cpp b/src/MLearning.cpp
@@ -1,24 +1,24 @@
 /*
  * Copyright Peter G. Jensen
- *  
+ *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * the Free Software Foundation, either version 3 of the License, or
  * (at your option) any later version.
- * 
+ *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
- * 
+ *
  * You should have received a copy of the GNU Lesser General Public License
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-/* 
+/*
  * File:   MLearning.cpp
  * Author: Peter G. Jensen
- * 
+ *
  * Created on July 25, 2017, 9:58 AM
  */
 
@@ -104,7 +104,8 @@ namespace prlearn {
     }
 
     void MLearning::addSample(size_t dimen, const double* f_var,
-            const double* t_var, size_t label,
+            const double* t_var, size_t*, size_t,
+            size_t label,
             size_t dest, double value, const std::vector<MLearning>& clouds,
             bool minimization, const double delta,
             const propts_t& options) {
@@ -225,7 +226,7 @@ namespace prlearn {
         }
     }
 
-    void MLearning::update(const std::vector<MLearning>&, bool) 
+    void MLearning::update(const std::vector<MLearning>&, bool)
     {
     }
 
@@ -254,10 +255,10 @@ namespace prlearn {
                     auto c = clouds[s._cloud]._nodes[s._nodes[i]]._q.avg();
                     fut = std::min(fut, c);
                     if (c == best)
-                        var = std::min(var, clouds[s._cloud]._nodes[s._nodes[i]]._q._variance);
+                        var = std::min(var, clouds[s._cloud]._nodes[s._nodes[i]]._q.variance());
                     else if ((c < best && minimize) || (c > best && !minimize)) {
                         best = c;
-                        var = clouds[s._cloud]._nodes[s._nodes[i]]._q._variance;
+                        var = clouds[s._cloud]._nodes[s._nodes[i]]._q.variance();
                     }
                 }
             }
@@ -274,8 +275,8 @@ namespace prlearn {
                     auto v = s._variance[d];
                     v.first.avg() += best;
                     v.second.avg() += best;
-                    v.first._variance = std::max(v.first._variance, var);
-                    v.second._variance = std::max(v.second._variance, var);
+                    v.first.set_variance(std::max(v.first.variance(), var));
+                    v.second.set_variance(std::max(v.second.variance(), var));
                     tmpq[d].first.addPoints(v.first.cnt(), v.first.avg());
                     tmpq[d].second.addPoints(v.second.cnt(), v.second.avg());
                     mean.addPoints(v.first.cnt(), v.first.avg());
@@ -287,8 +288,8 @@ namespace prlearn {
                     auto v = s._old[d];
                     v.first.avg() += best;
                     v.second.avg() += best;
-                    v.first._variance = std::max(v.first._variance, var);
-                    v.second._variance = std::max(v.second._variance, var);
+                    v.first.set_variance(std::max(v.first.variance(), var));
+                    v.second.set_variance(std::max(v.second.variance(), var));
                     old_mean.addPoints(v.first.cnt(), v.first.avg());
                     old_mean.addPoints(v.second.cnt(), v.second.avg());
                     old_var.push_back(v.first);
@@ -304,7 +305,7 @@ namespace prlearn {
         for (auto& s : sample_qvar) {
             {
                 const auto dif = std::abs(s.avg() - mean._avg);
-                const auto std = std::sqrt(s._variance);
+                const auto std = std::sqrt(s.variance());
                 auto var = (std::pow(dif + std, 2.0) + std::pow(dif - std, 2.0)) / 2.0;
                 svar.addPoints(s.cnt(), var);
             }
@@ -316,7 +317,7 @@ namespace prlearn {
             }
             {
                 const auto dif = std::abs(s.avg() - dmin);
-                const auto std = std::sqrt(s._variance);
+                const auto std = std::sqrt(s.variance());
                 auto var = (std::pow(dif + std, 2.0) + std::pow(dif - std, 2.0)) / 2.0;
                 vars[id].addPoints(s.cnt(), var);
             }
@@ -327,18 +328,20 @@ namespace prlearn {
 
         for (auto& s : old_var) {
             const auto dif = std::abs(s.avg() - old_mean._avg);
-            const auto std = std::sqrt(s._variance);
+            const auto std = std::sqrt(s.variance());
             auto var = (std::pow(dif + std, 2.0) + std::pow(dif - std, 2.0)) / 2.0;
             ovar.addPoints(s.cnt(), var);
         }
 
         for (size_t i = 0; i < dimen; ++i) {
-            tmpq[i].first._variance = vars[i]._avg;
-            tmpq[i].second._variance = vars[i + dimen]._avg;
+            tmpq[i].first.set_variance(vars[i]._avg);
+            tmpq[i].second.set_variance(vars[i + dimen]._avg);
         }
 
-        qvar_t nq(mean._avg, mean._cnt / (dimen * 2), svar._avg);
-        qvar_t oq(old_mean._avg, old_mean._cnt / (dimen * 2), ovar._avg);
+        qvar_t nq(mean._avg, mean._cnt / (dimen * 2), 0);
+        nq.set_variance(svar._avg);
+        qvar_t oq(old_mean._avg, old_mean._cnt / (dimen * 2), 0);
+        oq.set_variance(ovar._avg);
         return std::make_pair(nq, oq);
     }
 

diff --git a/src/MLearning.h b/src/MLearning.h
@@ -1,22 +1,22 @@
 /*
  * Copyright Peter G. Jensen
- *  
+ *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * the Free Software Foundation, either version 3 of the License, or
  * (at your option) any later version.
- * 
+ *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
- * 
+ *
  * You should have received a copy of the GNU Lesser General Public License
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 
-/* 
+/*
  * File:   MLearning.h
  * Author: Peter G. Jensen
  *
@@ -32,7 +32,7 @@
 #include <map>
 #include <limits>
 
-namespace prlearn { 
+namespace prlearn {
 
     class MLearning {
     public:
@@ -45,6 +45,7 @@ namespace prlearn {
 
         void addSample(size_t dimen, // dimensions
                 const double* f_var, const double* t_var, // doubles
+                size_t* next_labels, size_t n_labels, // actions in dest, ignored in m learning
                 size_t label, // edge chosen, edge taken
                 size_t dest, double value, // cost
                 const std::vector<MLearning>& clouds, // other points

diff --git a/src/QLearning.h b/src/QLearning.h
@@ -1,21 +1,21 @@
 /*
  * Copyright Peter G. Jensen
- *  
+ *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * the Free Software Foundation, either version 3 of the License, or
  * (at your option) any later version.
- * 
+ *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
- * 
+ *
  * You should have received a copy of the GNU Lesser General Public License
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-/* 
+/*
  * File:   QLearning.h
  * Author: Peter G. Jensen
  *
@@ -46,6 +46,7 @@ namespace prlearn {
 
         void addSample(size_t dimen, // dimensions
                 const double* f_val, const double* t_val, // source, destination-states
+                size_t* next_labels, size_t n_labels,
                 size_t label, // action chosen,
                 size_t dest, double value, // destination, cost
                 const std::vector<QLearning>& clouds, // other points
@@ -67,14 +68,15 @@ namespace prlearn {
     template<typename Regressor>
     void QLearning<Regressor>::addSample(size_t dimen, // dimensions
             const double* f_var, const double* t_var, // doubles
+            size_t* next_labels, size_t n_labels,     // possible actions
             size_t label, size_t dest, double value, // cost
             const std::vector<QLearning<Regressor>>&clouds, // other points
             bool minimization, const double delta, const propts_t& options) {
         // The ALPHA part of Q-learning is handled inside the regressors
         auto toDone = 0.0;
 
         if (dest != 0 && options._discount != 0)
-            toDone = clouds[dest]._regressor.getBestQ(t_var, minimization); // 0 is a special sink-node.
+            toDone = clouds[dest]._regressor.getBestQ(t_var, minimization, next_labels, n_labels); // 0 is a special sink-node.
         auto nval = value;
         // if future is not a weird number, then add it (discounted)
         if (!std::isinf(toDone) && !std::isnan(toDone)) {

diff --git a/src/RefinementTree.cpp b/src/RefinementTree.cpp
@@ -1,24 +1,24 @@
 /*
  * Copyright Peter G. Jensen
- *  
+ *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * the Free Software Foundation, either version 3 of the License, or
  * (at your option) any later version.
- * 
+ *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
- * 
+ *
  * You should have received a copy of the GNU Lesser General Public License
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-/* 
+/*
  * File:   RefinementTree.cpp
  * Author: Peter G. Jensen
- * 
+ *
  * Created on July 18, 2017, 5:09 PM
  */
 
@@ -67,23 +67,41 @@ namespace prlearn {
         auto res = std::lower_bound(std::begin(_mapping), std::end(_mapping), lf);
         if (res == std::end(_mapping) || res->_label != label)
             return qvar_t(std::numeric_limits<double>::quiet_NaN(), 0, 0);
-        assert(dimen == _dimen);
         auto n = _nodes[res->_nid].get_leaf(point, res->_nid, _nodes);
         auto& node = _nodes[n];
-        return qvar_t(node._predictor._q.avg(), node._predictor._cnt, node._predictor._q._variance);
+        return qvar_t(node._predictor._q.avg(), node._predictor._cnt, node._predictor._q.squared());
     }
 
-    double RefinementTree::getBestQ(const double* point, bool minimization) const {
+    double RefinementTree::getBestQ(const double* point, bool minimization, size_t* next_labels, size_t n_labels) const {
         auto val = std::numeric_limits<double>::infinity();
         if (!minimization)
             val = -val;
-        for (const el_t& el : _mapping) {
-            auto node = _nodes[el._nid].get_leaf(point, el._nid, _nodes);
-            auto v = _nodes[node]._predictor._q.avg();
-            if (!std::isinf(v) && !std::isnan(v))
-                val = minimization ?
-                    std::min(v, val) :
-                std::max(v, val);
+        if(next_labels == nullptr)
+        {
+            for (const el_t& el : _mapping) {
+                auto node = _nodes[el._nid].get_leaf(point, el._nid, _nodes);
+                auto v = _nodes[node]._predictor._q.avg();
+                if (!std::isinf(v) && !std::isnan(v))
+                    val = minimization ?
+                        std::min(v, val) :
+                    std::max(v, val);
+            }
+        }
+        else {
+            for(size_t i = 0; i < n_labels; ++i)
+            {
+                size_t j = 0;
+                for(;j < _mapping.size() && _mapping[j]._label < next_labels[i]; ++j) {};
+                if(j >= _mapping.size()) continue;
+                if(_mapping[j]._label != next_labels[i]) continue;
+                const auto& res = _mapping[j];
+                auto node = _nodes[res._nid].get_leaf(point, res._nid, _nodes);
+                auto v = _nodes[node]._predictor._q.avg();
+                if (!std::isinf(v) && !std::isnan(v))
+                    val = minimization ?
+                        std::min(v, val) :
+                    std::max(v, val);
+            }
         }
         return val;
     }
@@ -162,7 +180,7 @@ namespace prlearn {
                 _predictor._data[i]._hmid += point[i];
             }
 
-            // update the split-filters 
+            // update the split-filters
             _predictor._data[i]._splitfilter.add(_predictor._data[i]._lowq,
                     _predictor._data[i]._highq,
                     delta * options._indefference,
@@ -213,12 +231,12 @@ namespace prlearn {
                 if (nodes[slow]._predictor._q.cnt() == 0) {
                     nodes[slow]._predictor._q.cnt() = 1;
                     nodes[slow]._predictor._q.avg() = oq.avg();
-                    nodes[slow]._predictor._q._variance = 0;
+                    nodes[slow]._predictor._q.squared() = std::pow(oq.avg(), 2.0);
                 }
                 if (nodes[shigh]._predictor._q.cnt() == 0) {
                     nodes[shigh]._predictor._q.cnt() = 1;
                     nodes[shigh]._predictor._q.avg() = oq.avg();
-                    nodes[shigh]._predictor._q._variance = 0;
+                    nodes[shigh]._predictor._q.squared() = std::pow(oq.avg(), 2.0);
                 }
             }
             nodes[shigh]._predictor._cnt = nodes[shigh]._predictor._q.cnt();

diff --git a/src/RefinementTree.h b/src/RefinementTree.h
@@ -1,22 +1,22 @@
 /*
  * Copyright Peter G. Jensen
- *  
+ *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
  * the Free Software Foundation, either version 3 of the License, or
  * (at your option) any later version.
- * 
+ *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
- * 
+ *
  * You should have received a copy of the GNU Lesser General Public License
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 
-/* 
+/*
  * File:   RefinementTree.h
  * Author: Peter G. Jensen
  *
@@ -48,7 +48,7 @@ namespace prlearn {
 
         void print(std::ostream& s, size_t tabs, std::map<size_t, size_t>& edge_map) const;
 
-        double getBestQ(const double* val, bool minimization) const;
+        double getBestQ(const double* val, bool minimization, size_t* next_labels = nullptr, size_t n_labels = 0) const;
 
     protected:
 
@@ -78,7 +78,7 @@ namespace prlearn {
         };
 
         struct node_t {
-            // we could do these two values as a single pointer 
+            // we could do these two values as a single pointer
             // which dynamically allocates enough space for both split and pred_t
             // including space for the run-time sized arrays.
             // however, this is at current time of writing a premature optimization.