-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Showing
26 changed files
with
937 additions
and
12 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
/* (C) 2021 Roman Werpachowski. */ | ||
#include <random> | ||
#include <benchmark/benchmark.h> | ||
#include "ML/LogisticRegression.hpp" | ||
|
||
|
||
template <unsigned int D> static void conjugate_gradient_logistic_regression(benchmark::State& state) | ||
{ | ||
const auto sample_size = static_cast<Eigen::Index>(state.range(0)); | ||
const Eigen::MatrixXd X(Eigen::MatrixXd::Random(D, sample_size)); | ||
const Eigen::VectorXd beta(Eigen::VectorXd::Random(D)); | ||
Eigen::VectorXd y(X.transpose() * beta + 0.02 * Eigen::VectorXd::Random(sample_size)); | ||
for (Eigen::Index i = 0; i < sample_size; ++i) { | ||
y[i] = y[i] > 0 ? 1 : -1; | ||
} | ||
ml::ConjugateGradientLogisticRegression lr; | ||
lr.set_maximum_steps(1000); // Most people will give up at that point. | ||
for (auto _ : state) { | ||
lr.fit(X, y); | ||
} | ||
state.SetComplexityN(state.range(0)); | ||
} | ||
|
||
constexpr auto conjugate_gradient_logistic_regression_5d = conjugate_gradient_logistic_regression<5>; | ||
constexpr auto conjugate_gradient_logistic_regression_50d = conjugate_gradient_logistic_regression<50>; | ||
constexpr auto conjugate_gradient_logistic_regression_500d = conjugate_gradient_logistic_regression<500>; | ||
|
||
BENCHMARK(conjugate_gradient_logistic_regression_5d)->RangeMultiplier(10)->Range(10, 1000)->Complexity(); | ||
BENCHMARK(conjugate_gradient_logistic_regression_50d)->RangeMultiplier(10)->Range(100, 10000)->Complexity(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
"""Demo program for logistic_regression module. | ||
(C) 2021 Roman Werpachowski. | ||
""" | ||
import time | ||
import warnings | ||
|
||
import numpy as np | ||
import pandas as pd | ||
from sklearn.linear_model import LogisticRegression | ||
|
||
from cppyml import logistic_regression | ||
|
||
def main(): | ||
print(""" | ||
*** LINEAR REGRESSION DEMO *** | ||
Times logistic regression against sklearn. | ||
""") | ||
np.random.seed(1066) | ||
n_timing_iters = 100 | ||
n = 100 | ||
d = 5 | ||
|
||
X = np.random.randn(n, d) | ||
w = np.random.randn(d) | ||
z = np.matmul(X, w) + np.random.randn(n) * 0.6 | ||
y = np.sign(z) | ||
y[y == 0] = 1 | ||
y01 = (1 + y) / 2 | ||
|
||
cglr = logistic_regression.ConjugateGradientLogisticRegression() | ||
lam = 0.2 | ||
tol = 1e-7 | ||
cglr .set_lam(lam) | ||
cglr .set_weight_absolute_tolerance(tol) | ||
cglr .set_weight_relative_tolerance(tol) | ||
|
||
t0 = time.perf_counter() | ||
for _ in range(n_timing_iters): | ||
result = cglr.fit(X, y) | ||
t1 = time.perf_counter() | ||
|
||
print("cppyml time: %g" % (t1 - t0)) | ||
print("cppyml result: %s" % result) | ||
|
||
lr = LogisticRegression(tol=tol, C=1/lam) | ||
|
||
t0 = time.perf_counter() | ||
for _ in range(n_timing_iters): | ||
lr.fit(X, y) | ||
t1 = time.perf_counter() | ||
print("sklearn.linear_model.LogisticRegression time: %g" % (t1 - t0)) | ||
print("sklearn.linear_model.LogisticRegression result: coef=%s, r2=%g" % (lr.coef_, lr.score(X, y))) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,206 @@ | ||
/* (C) 2021 Roman Werpachowski. */ | ||
#include <cassert> | ||
#include <cmath> | ||
#include <stdexcept> | ||
#include "LinearAlgebra.hpp" | ||
#include "LogisticRegression.hpp" | ||
|
||
namespace ml | ||
{ | ||
LogisticRegression::~LogisticRegression() | ||
{} | ||
|
||
double LogisticRegression::probability(Eigen::Ref<const Eigen::VectorXd> x, double y, Eigen::Ref<const Eigen::VectorXd> w) | ||
{ | ||
assert(y == -1 || y == 1); | ||
return 1 / (1 + exp(-y * w.dot(x))); | ||
} | ||
|
||
double LogisticRegression::log_likelihood(Eigen::Ref<const Eigen::MatrixXd> X, Eigen::Ref<const Eigen::VectorXd> y, Eigen::Ref<const Eigen::VectorXd> w, const double lam) | ||
{ | ||
if (!(lam >= 0)) { | ||
throw std::domain_error("Lambda must be non-negative"); | ||
} | ||
if (y.size() != X.cols()) { | ||
throw std::invalid_argument("Size mismatch: y.size() != X.cols()"); | ||
} | ||
if (w.size() != X.rows()) { | ||
throw std::invalid_argument("Size mismatch: w.size() != X.rows()"); | ||
} | ||
double l = 0; | ||
for (Eigen::Index i = 0; i < y.size(); ++i) { | ||
l -= log1p(exp(-y[i] * w.dot(X.col(i)))); | ||
} | ||
l -= lam * w.squaredNorm() / 2; | ||
return l; | ||
} | ||
|
||
void LogisticRegression::grad_log_likelihood(Eigen::Ref<const Eigen::MatrixXd> X, Eigen::Ref<const Eigen::VectorXd> y, Eigen::Ref<const Eigen::VectorXd> w, double lam, Eigen::Ref<Eigen::VectorXd> g) | ||
{ | ||
if (!(lam >= 0)) { | ||
throw std::domain_error("Lambda must be non-negative"); | ||
} | ||
if (y.size() != X.cols()) { | ||
throw std::invalid_argument("Size mismatch: y.size() != X.cols()"); | ||
} | ||
if (w.size() != X.rows()) { | ||
throw std::invalid_argument("Size mismatch: w.size() != X.rows()"); | ||
} | ||
if (w.size() != g.size()) { | ||
throw std::invalid_argument("Size mismatch: w.size() != g.size()"); | ||
} | ||
g = - lam * w; | ||
for (Eigen::Index i = 0; i < y.size(); ++i) { | ||
g += probability(X.col(i), -y[i], w) * y[i] * X.col(i); | ||
} | ||
} | ||
|
||
void LogisticRegression::hessian_log_likelihood(Eigen::Ref<const Eigen::MatrixXd> X, Eigen::Ref<const Eigen::VectorXd> y, Eigen::Ref<const Eigen::VectorXd> w, double lam, Eigen::Ref<Eigen::MatrixXd> H) | ||
{ | ||
if (!(lam >= 0)) { | ||
throw std::domain_error("Lambda must be non-negative"); | ||
} | ||
const auto n = y.size(); | ||
if (n != X.cols()) { | ||
throw std::invalid_argument("Size mismatch: y.size() != X.cols()"); | ||
} | ||
const auto dim = w.size(); | ||
if (dim != X.rows()) { | ||
throw std::invalid_argument("Size mismatch: w.size() != X.rows()"); | ||
} | ||
if (dim != H.rows()) { | ||
throw std::invalid_argument("Size mismatch: w.size() != H.rows()"); | ||
} | ||
if (dim != H.cols()) { | ||
throw std::invalid_argument("Size mismatch: w.size() != H.cols()"); | ||
} | ||
H = -lam * Eigen::MatrixXd::Identity(dim, dim); | ||
for (Eigen::Index i = 0; i < n; ++i) { | ||
const auto x_i = X.col(i); | ||
const double p = probability(x_i, 1, w); | ||
H -= p * (1 - p) * x_i * x_i.transpose(); | ||
} | ||
} | ||
|
||
void LogisticRegression::Result::predict(Eigen::Ref<const Eigen::MatrixXd> X, Eigen::Ref<Eigen::VectorXd> y) const | ||
{ | ||
const auto dim = w.size(); | ||
if (dim != X.rows()) { | ||
throw std::invalid_argument("Size mismatch: w.size() != X.rows()"); | ||
} | ||
const auto n = X.cols(); | ||
if (n != y.size()) { | ||
throw std::invalid_argument("Size mismatch: X.cols() != y.size()"); | ||
} | ||
for (Eigen::Index i = 0; i < n; ++i) { | ||
if (w.dot(X.col(i)) > 0) { | ||
y[i] = 1; | ||
} else { | ||
y[i] = -1; | ||
} | ||
} | ||
} | ||
|
||
std::string LogisticRegression::Result::to_string() const | ||
{ | ||
std::stringstream s; | ||
s << "LogisticRegressionResult(w=[" << w.transpose() << "], steps_taken=" << steps_taken << ", converged=" << converged << ")"; | ||
return s.str(); | ||
} | ||
|
||
AbstractLogisticRegression::AbstractLogisticRegression() | ||
{ | ||
lam_ = 1e-3; | ||
weight_absolute_tolerance_ = 0; | ||
weight_relative_tolerance_ = 1e-8; | ||
maximum_steps_ = 100; | ||
} | ||
|
||
void AbstractLogisticRegression::set_lam(double lam) | ||
{ | ||
if (!(lam >= 0)) { | ||
throw std::domain_error("Lambda must be non-negative"); | ||
} | ||
lam_ = lam; | ||
} | ||
|
||
void AbstractLogisticRegression::set_weight_absolute_tolerance(double weight_absolute_tolerance) | ||
{ | ||
if (!(weight_absolute_tolerance >= 0)) { | ||
throw std::domain_error("Absolute weight tolerance must be non-negative"); | ||
} | ||
weight_absolute_tolerance_ = weight_absolute_tolerance; | ||
} | ||
|
||
void AbstractLogisticRegression::set_weight_relative_tolerance(double weight_relative_tolerance) | ||
{ | ||
if (!(weight_relative_tolerance >= 0)) { | ||
throw std::domain_error("Relative weight tolerance must be non-negative"); | ||
} | ||
weight_relative_tolerance_ = weight_relative_tolerance; | ||
} | ||
|
||
void AbstractLogisticRegression::set_maximum_steps(unsigned int maximum_steps) | ||
{ | ||
maximum_steps_ = maximum_steps; | ||
} | ||
|
||
bool AbstractLogisticRegression::weights_converged(Eigen::Ref<const Eigen::VectorXd> old_weights, Eigen::Ref<const Eigen::VectorXd> new_weights) const | ||
{ | ||
const double old_weights_norm = old_weights.norm(); | ||
const double new_weights_norm = new_weights.norm(); | ||
const double weights_diff_norm = (old_weights - new_weights).norm(); | ||
return weights_diff_norm <= weight_absolute_tolerance_ + std::max(old_weights_norm, new_weights_norm) * weight_relative_tolerance_; | ||
} | ||
|
||
LogisticRegression::Result ConjugateGradientLogisticRegression::fit(Eigen::Ref<const Eigen::MatrixXd> X, Eigen::Ref<const Eigen::VectorXd> y) const | ||
{ | ||
const auto n = y.size(); | ||
const auto d = X.rows(); | ||
if (!n) { | ||
throw std::invalid_argument("Need at least 1 example"); | ||
} | ||
if (!d) { | ||
throw std::invalid_argument("Need at least 1 feature"); | ||
} | ||
if (X.cols() != n) { | ||
throw std::invalid_argument("Dimension mismatch"); | ||
} | ||
|
||
Eigen::VectorXd prev_w; | ||
Result result; | ||
result.converged = false; | ||
result.w = Eigen::VectorXd::Zero(d); | ||
Eigen::VectorXd g(d); | ||
Eigen::VectorXd prev_g; | ||
Eigen::MatrixXd H(d, d); | ||
Eigen::VectorXd update_direction(d); | ||
Eigen::VectorXd prev_update_direction; | ||
unsigned int iter = 0; | ||
while (iter < maximum_steps() && !result.converged) { | ||
prev_w = result.w; | ||
prev_g = g; | ||
prev_update_direction = update_direction; | ||
grad_log_likelihood(X, y, prev_w, lam(), g); | ||
hessian_log_likelihood(X, y, prev_w, lam(), H); | ||
update_direction = g; | ||
if (iter) { | ||
assert(prev_g.size() == d); | ||
assert(prev_update_direction.size() == d); | ||
assert(prev_w.size() == d); | ||
const auto diff_g = g - prev_g; | ||
update_direction = g; | ||
const double denom = prev_update_direction.dot(diff_g); | ||
if (denom != 0) { | ||
const double beta = g.dot(diff_g) / denom; | ||
update_direction -= beta * prev_update_direction; | ||
} | ||
} | ||
result.w -= update_direction * g.dot(update_direction) / LinearAlgebra::xAx_symmetric(H, update_direction); | ||
result.converged = weights_converged(prev_w, result.w); | ||
++iter; | ||
} | ||
result.steps_taken = iter; | ||
return result; | ||
} | ||
} |
Oops, something went wrong.