Logistic regression (#7)

Conjugate gradient Logistic Regression.
romanwerpachowski · Nov 27, 2021 · 2653576 · 2653576
1 parent d6b0e38
commit 2653576
Show file tree

Hide file tree

Showing 26 changed files with 937 additions and 12 deletions.
diff --git a/.github/workflows/github-actions-demo.yml b/.github/workflows/github-actions-demo.yml
@@ -11,12 +11,17 @@ jobs:
       - name: Debug build
         run: ./scripts/ci_debug_build.sh
       - name: Release build
-        run: ./scripts/ci_release_build.sh
+        run: ./scripts/ci_release_build.sh      
+  Documentation:
+    runs-on: ubuntu-latest
+    needs: Continuous-Integration
+    if: github.ref == 'refs/heads/master'
+    steps:
       - name: Update documentation
         run: |
             ./scripts/ci_documentation.sh
             git config --global user.name 'Roman Werpachowski'
             git config --global user.email 'romanwerpachowski@users.noreply.github.com'
             git commit -am "Auto-update documentation" || echo "No changes to commit"
-            git pull
+            git pull --ff-only
             git push
diff --git a/Benchmarks/Benchmarks.vcxproj b/Benchmarks/Benchmarks.vcxproj
@@ -159,6 +159,7 @@
     <ClCompile Include="bm_KMeans.cpp" />
     <ClCompile Include="bm_LinearAlgebra.cpp" />
     <ClCompile Include="bm_LinearRegression.cpp" />
+    <ClCompile Include="bm_LogisticRegression.cpp" />
     <ClCompile Include="bm_Statistics.cpp" />
     <ClCompile Include="main.cpp" />
   </ItemGroup>

diff --git a/Benchmarks/Benchmarks.vcxproj.filters b/Benchmarks/Benchmarks.vcxproj.filters
@@ -39,6 +39,9 @@
     <ClCompile Include="bm_KMeans.cpp">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="bm_LogisticRegression.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <None Include="SConscript" />

diff --git a/Benchmarks/bm_LogisticRegression.cpp b/Benchmarks/bm_LogisticRegression.cpp
@@ -0,0 +1,29 @@
+/* (C) 2021 Roman Werpachowski. */
+#include <random>
+#include <benchmark/benchmark.h>
+#include "ML/LogisticRegression.hpp"
+
+
+template <unsigned int D> static void conjugate_gradient_logistic_regression(benchmark::State& state)
+{
+	const auto sample_size = static_cast<Eigen::Index>(state.range(0));
+	const Eigen::MatrixXd X(Eigen::MatrixXd::Random(D, sample_size));
+	const Eigen::VectorXd beta(Eigen::VectorXd::Random(D));
+	Eigen::VectorXd y(X.transpose() * beta + 0.02 * Eigen::VectorXd::Random(sample_size));
+	for (Eigen::Index i = 0; i < sample_size; ++i) {
+		y[i] = y[i] > 0 ? 1 : -1;
+	}
+	ml::ConjugateGradientLogisticRegression lr;
+	lr.set_maximum_steps(1000); // Most people will give up at that point.
+	for (auto _ : state) {
+		lr.fit(X, y);
+	}
+	state.SetComplexityN(state.range(0));
+}
+
+constexpr auto conjugate_gradient_logistic_regression_5d = conjugate_gradient_logistic_regression<5>;
+constexpr auto conjugate_gradient_logistic_regression_50d = conjugate_gradient_logistic_regression<50>;
+constexpr auto conjugate_gradient_logistic_regression_500d = conjugate_gradient_logistic_regression<500>;
+
+BENCHMARK(conjugate_gradient_logistic_regression_5d)->RangeMultiplier(10)->Range(10, 1000)->Complexity();
+BENCHMARK(conjugate_gradient_logistic_regression_50d)->RangeMultiplier(10)->Range(100, 10000)->Complexity();
diff --git a/Demo/Demo.vcxproj b/Demo/Demo.vcxproj
@@ -151,6 +151,7 @@
   <ItemGroup>
     <None Include="classification_tree.py" />
     <None Include="linear_regression.py" />
+    <None Include="logistic_regression.py" />
     <None Include="mousie.py" />
     <None Include="SConscript" />
   </ItemGroup>

diff --git a/Demo/Demo.vcxproj.filters b/Demo/Demo.vcxproj.filters
@@ -41,5 +41,8 @@
     <None Include="mousie.py">
       <Filter>Python</Filter>
     </None>
+    <None Include="logistic_regression.py">
+      <Filter>Python</Filter>
+    </None>
   </ItemGroup>
 </Project>
diff --git a/Demo/linear_regression.py b/Demo/linear_regression.py
@@ -17,7 +17,7 @@ def main():
 
 *** LINEAR REGRESSION DEMO ***
 
-Times different versions of linear regression against standard Python libraries (scipy and sklearn).
+Benchmarks different versions of linear regression against standard Python libraries (scipy and sklearn).
 
 """)
     np.random.seed(1066)

diff --git a/Demo/logistic_regression.py b/Demo/logistic_regression.py
@@ -0,0 +1,60 @@
+"""Demo program for logistic_regression module.
+
+(C) 2021 Roman Werpachowski.
+"""
+import time
+import warnings
+
+import numpy as np
+import pandas as pd
+from sklearn.linear_model import LogisticRegression
+
+from cppyml import logistic_regression
+
+def main():
+    print("""
+
+*** LINEAR REGRESSION DEMO ***
+
+Times logistic regression against sklearn.
+
+""")
+    np.random.seed(1066)
+    n_timing_iters = 100
+    n = 100
+    d = 5
+
+    X = np.random.randn(n, d)
+    w = np.random.randn(d)
+    z = np.matmul(X, w) + np.random.randn(n) * 0.6
+    y = np.sign(z)
+    y[y == 0] = 1
+    y01 = (1 + y) / 2
+
+    cglr = logistic_regression.ConjugateGradientLogisticRegression()
+    lam = 0.2
+    tol = 1e-7
+    cglr .set_lam(lam)
+    cglr .set_weight_absolute_tolerance(tol)
+    cglr .set_weight_relative_tolerance(tol)    
+
+    t0 = time.perf_counter()
+    for _ in range(n_timing_iters):
+        result = cglr.fit(X, y)
+    t1 = time.perf_counter()
+
+    print("cppyml time: %g" % (t1 - t0))
+    print("cppyml result: %s" % result)
+
+    lr = LogisticRegression(tol=tol, C=1/lam)
+
+    t0 = time.perf_counter()    
+    for _ in range(n_timing_iters):
+        lr.fit(X, y)
+    t1 = time.perf_counter()
+    print("sklearn.linear_model.LogisticRegression time: %g" % (t1 - t0))
+    print("sklearn.linear_model.LogisticRegression result: coef=%s, r2=%g" % (lr.coef_, lr.score(X, y)))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/LocalDependencies.props b/LocalDependencies.props
@@ -12,7 +12,7 @@
       <AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
     </Link>
     <ClCompile>
-      <AdditionalIncludeDirectories>C:\Users\Roman\source\repos\googletest\googletest\include;C:\Users\Roman\source\repos\pybind11\include;C:\Users\Roman\Anaconda3\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>C:\Users\Roman\source\repos\googletest\googletest\include;C:\Users\Roman\source\repos\pybind11\include;C:\Users\Roman\anaconda3\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
     </ClCompile>
     <Lib>
       <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>

diff --git a/ML.sln b/ML.sln
@@ -85,11 +85,9 @@ Global
 		{6C644C16-A00C-4A85-A188-89858632E87E}.ReleaseStatic|x64.ActiveCfg = ReleaseStatic|x64
 		{6C644C16-A00C-4A85-A188-89858632E87E}.ReleaseStatic|x64.Build.0 = ReleaseStatic|x64
 		{54C9836C-51BD-4E2A-B4C1-0F4C8A76CA64}.Debug|x64.ActiveCfg = Debug|x64
-		{54C9836C-51BD-4E2A-B4C1-0F4C8A76CA64}.Debug|x64.Build.0 = Debug|x64
 		{54C9836C-51BD-4E2A-B4C1-0F4C8A76CA64}.DebugStatic|x64.ActiveCfg = DebugStatic|x64
 		{54C9836C-51BD-4E2A-B4C1-0F4C8A76CA64}.DebugStatic|x64.Build.0 = DebugStatic|x64
 		{54C9836C-51BD-4E2A-B4C1-0F4C8A76CA64}.Release|x64.ActiveCfg = Release|x64
-		{54C9836C-51BD-4E2A-B4C1-0F4C8A76CA64}.Release|x64.Build.0 = Release|x64
 		{54C9836C-51BD-4E2A-B4C1-0F4C8A76CA64}.ReleaseStatic|x64.ActiveCfg = ReleaseStatic|x64
 		{54C9836C-51BD-4E2A-B4C1-0F4C8A76CA64}.ReleaseStatic|x64.Build.0 = ReleaseStatic|x64
 		{01635AC7-6106-38C7-AF84-47C4C972B55A}.Debug|x64.ActiveCfg = Debug|x64

diff --git a/ML/LogisticRegression.cpp b/ML/LogisticRegression.cpp
@@ -0,0 +1,206 @@
+/* (C) 2021 Roman Werpachowski. */
+#include <cassert>
+#include <cmath>
+#include <stdexcept>
+#include "LinearAlgebra.hpp"
+#include "LogisticRegression.hpp"
+
+namespace ml
+{    
+    LogisticRegression::~LogisticRegression()
+    {}
+
+    double LogisticRegression::probability(Eigen::Ref<const Eigen::VectorXd> x, double y, Eigen::Ref<const Eigen::VectorXd> w)
+    {
+        assert(y == -1 || y == 1);
+        return 1 / (1 + exp(-y * w.dot(x)));
+    }
+
+    double LogisticRegression::log_likelihood(Eigen::Ref<const Eigen::MatrixXd> X, Eigen::Ref<const Eigen::VectorXd> y, Eigen::Ref<const Eigen::VectorXd> w, const double lam)
+    {
+        if (!(lam >= 0)) {
+            throw std::domain_error("Lambda must be non-negative");
+        }
+        if (y.size() != X.cols()) {
+            throw std::invalid_argument("Size mismatch: y.size() != X.cols()");
+        }
+        if (w.size() != X.rows()) {
+            throw std::invalid_argument("Size mismatch: w.size() != X.rows()");
+        }
+        double l = 0;
+        for (Eigen::Index i = 0; i < y.size(); ++i) {
+            l -= log1p(exp(-y[i] * w.dot(X.col(i))));
+        }
+        l -= lam * w.squaredNorm() / 2;
+        return l;
+    }
+
+    void LogisticRegression::grad_log_likelihood(Eigen::Ref<const Eigen::MatrixXd> X, Eigen::Ref<const Eigen::VectorXd> y, Eigen::Ref<const Eigen::VectorXd> w, double lam, Eigen::Ref<Eigen::VectorXd> g)
+    {
+        if (!(lam >= 0)) {
+            throw std::domain_error("Lambda must be non-negative");
+        }
+        if (y.size() != X.cols()) {
+            throw std::invalid_argument("Size mismatch: y.size() != X.cols()");
+        }
+        if (w.size() != X.rows()) {
+            throw std::invalid_argument("Size mismatch: w.size() != X.rows()");
+        }
+        if (w.size() != g.size()) {
+            throw std::invalid_argument("Size mismatch: w.size() != g.size()");
+        }
+        g = - lam * w;
+        for (Eigen::Index i = 0; i < y.size(); ++i) {
+            g += probability(X.col(i), -y[i], w) * y[i] * X.col(i);
+        }
+    }
+
+    void LogisticRegression::hessian_log_likelihood(Eigen::Ref<const Eigen::MatrixXd> X, Eigen::Ref<const Eigen::VectorXd> y, Eigen::Ref<const Eigen::VectorXd> w, double lam, Eigen::Ref<Eigen::MatrixXd> H)
+    {
+        if (!(lam >= 0)) {
+            throw std::domain_error("Lambda must be non-negative");
+        }
+        const auto n = y.size();
+        if (n != X.cols()) {
+            throw std::invalid_argument("Size mismatch: y.size() != X.cols()");
+        }
+        const auto dim = w.size();
+        if (dim != X.rows()) {
+            throw std::invalid_argument("Size mismatch: w.size() != X.rows()");
+        }
+        if (dim != H.rows()) {
+            throw std::invalid_argument("Size mismatch: w.size() != H.rows()");
+        }
+        if (dim != H.cols()) {
+            throw std::invalid_argument("Size mismatch: w.size() != H.cols()");
+        }        
+        H = -lam * Eigen::MatrixXd::Identity(dim, dim);
+        for (Eigen::Index i = 0; i < n; ++i) {
+            const auto x_i = X.col(i);
+            const double p = probability(x_i, 1, w);
+            H -= p * (1 - p) * x_i * x_i.transpose();
+        }
+    }    
+
+    void LogisticRegression::Result::predict(Eigen::Ref<const Eigen::MatrixXd> X, Eigen::Ref<Eigen::VectorXd> y) const
+    {
+        const auto dim = w.size();
+        if (dim != X.rows()) {
+            throw std::invalid_argument("Size mismatch: w.size() != X.rows()");
+        }
+        const auto n = X.cols();
+        if (n != y.size()) {
+            throw std::invalid_argument("Size mismatch: X.cols() != y.size()");
+        }
+        for (Eigen::Index i = 0; i < n; ++i) {
+            if (w.dot(X.col(i)) > 0) {
+                y[i] = 1;
+            } else {
+                y[i] = -1;
+            }
+        }
+    }
+
+    std::string LogisticRegression::Result::to_string() const
+    {
+        std::stringstream s;
+        s << "LogisticRegressionResult(w=[" << w.transpose() << "], steps_taken=" << steps_taken << ", converged=" << converged << ")";
+        return s.str();
+    }
+
+    AbstractLogisticRegression::AbstractLogisticRegression()
+    {
+        lam_ = 1e-3;
+        weight_absolute_tolerance_ = 0;
+        weight_relative_tolerance_ = 1e-8;
+        maximum_steps_ = 100;
+    }
+
+    void AbstractLogisticRegression::set_lam(double lam)
+    {
+        if (!(lam >= 0)) {
+            throw std::domain_error("Lambda must be non-negative");
+        }
+        lam_ = lam;
+    }
+
+    void AbstractLogisticRegression::set_weight_absolute_tolerance(double weight_absolute_tolerance)
+    {
+        if (!(weight_absolute_tolerance >= 0)) {
+            throw std::domain_error("Absolute weight tolerance must be non-negative");
+        }
+        weight_absolute_tolerance_ = weight_absolute_tolerance;
+    }
+
+    void AbstractLogisticRegression::set_weight_relative_tolerance(double weight_relative_tolerance)
+    {
+        if (!(weight_relative_tolerance >= 0)) {
+            throw std::domain_error("Relative weight tolerance must be non-negative");
+        }
+        weight_relative_tolerance_ = weight_relative_tolerance;
+    }
+
+    void AbstractLogisticRegression::set_maximum_steps(unsigned int maximum_steps)
+    {
+        maximum_steps_ = maximum_steps;
+    }
+
+    bool AbstractLogisticRegression::weights_converged(Eigen::Ref<const Eigen::VectorXd> old_weights, Eigen::Ref<const Eigen::VectorXd> new_weights) const
+    {
+        const double old_weights_norm = old_weights.norm();
+        const double new_weights_norm = new_weights.norm();
+        const double weights_diff_norm = (old_weights - new_weights).norm();
+        return weights_diff_norm <= weight_absolute_tolerance_ + std::max(old_weights_norm, new_weights_norm) * weight_relative_tolerance_;
+    }
+
+    LogisticRegression::Result ConjugateGradientLogisticRegression::fit(Eigen::Ref<const Eigen::MatrixXd> X, Eigen::Ref<const Eigen::VectorXd> y) const
+    {
+        const auto n = y.size();
+        const auto d = X.rows();
+        if (!n) {
+            throw std::invalid_argument("Need at least 1 example");
+        }
+        if (!d) {
+            throw std::invalid_argument("Need at least 1 feature");
+        }
+        if (X.cols() != n) {
+            throw std::invalid_argument("Dimension mismatch");
+        }
+
+        Eigen::VectorXd prev_w;
+        Result result;
+        result.converged = false;
+        result.w = Eigen::VectorXd::Zero(d);
+        Eigen::VectorXd g(d);
+        Eigen::VectorXd prev_g;
+        Eigen::MatrixXd H(d, d);
+        Eigen::VectorXd update_direction(d);
+        Eigen::VectorXd prev_update_direction;
+        unsigned int iter = 0;
+        while (iter < maximum_steps() && !result.converged) {
+            prev_w = result.w;
+            prev_g = g;
+            prev_update_direction = update_direction;
+            grad_log_likelihood(X, y, prev_w, lam(), g);
+            hessian_log_likelihood(X, y, prev_w, lam(), H);
+            update_direction = g;
+            if (iter) {
+                assert(prev_g.size() == d);
+                assert(prev_update_direction.size() == d);
+                assert(prev_w.size() == d);
+                const auto diff_g = g - prev_g;
+                update_direction = g;
+                const double denom = prev_update_direction.dot(diff_g);
+                if (denom != 0) {
+                    const double beta = g.dot(diff_g) / denom;
+                    update_direction -= beta * prev_update_direction;
+                }                                
+            }
+            result.w -= update_direction * g.dot(update_direction) / LinearAlgebra::xAx_symmetric(H, update_direction);
+            result.converged = weights_converged(prev_w, result.w);
+            ++iter;
+        }
+        result.steps_taken = iter;
+        return result;
+    }
+}