Skip to content

Commit

Permalink
take pure ReHLine class
Browse files Browse the repository at this point in the history
  • Loading branch information
statmlben committed Aug 21, 2024
1 parent 3d7112f commit b766232
Show file tree
Hide file tree
Showing 7 changed files with 508 additions and 236 deletions.
Binary file added dist/rehline-0.0.4.dev0-py3.10-linux-x86_64.egg
Binary file not shown.
1 change: 1 addition & 0 deletions doc/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -78,5 +78,6 @@ If you use this code please star 🌟 the repository and cite the following pape
:maxdepth: 2
:hidden:

tutorials
example
benchmark
42 changes: 40 additions & 2 deletions doc/source/tutorials.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,43 @@ and ridge regularized Huber minimization (RidgeHuber).

.. image:: ./figs/tab.png

Solving Custom ERMs
-------------------
Solving PLQ ERMs
-------------------

Loss
****

.. code:: python
# name (str): name of the custom loss function
# loss_kwargs: more keys and values for loss parameters
loss = {'name': <loss_name>, <**loss_kwargs>}
.. list-table::

* - **SVM**
- | ``loss_name``: 'hinge' / 'svm' / 'SVM'
|
| *Example:* ``loss = {'name': 'SVM'}``
* - **Quantile Reg**
- | ``loss_name``: 'check' / 'quantile' / 'quantile regression' / 'QR'
| ``qt`` (*list*): [q1, q2, ... qK]
|
| *Example:* ``loss = {'name': 'QR', 'qt': [0.25, 0.75]}``
* - **Smooth SVM**
- | ``loss_name``: 'sSVM' / 'smooth SVM' / 'smooth hinge'
|
| *Example:* ``loss = {'name': 'sSVM'}``
* - **Huber**
- | ``loss_name``: 'huber' / 'Huber'
|
| *Example:* ``loss = {'name': 'huber'}``
* - **SVR**
- | ``loss_name``: 'SVR' / 'svr'
| ``epsilon`` (*float*): 0.1
|
| *Example:* ``loss = {'name': 'svr', 'epsilon': 0.1}``
9 changes: 5 additions & 4 deletions rehline/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
# Import from internal C++ module
from ._base import make_fair_classification, rehu, relu
from ._class import ReHLine, ReHLine_solver, ReHLineLinear
from ._base import ReHLine_solver, _BaseReHLine
from ._class import ReHLine
from ._data import make_fair_classification
from ._internal import rehline_internal, rehline_result

__all__ = ("ReHLine",
"ReHLineLinear",
__all__ = ("_BaseReHLine",
"ReHLine",
"make_fair_classification")
162 changes: 122 additions & 40 deletions rehline/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,123 @@

# License: MIT License

from abc import abstractmethod

import numpy as np
from scipy.special import huber
from sklearn.datasets import make_classification
from sklearn.preprocessing import StandardScaler
from sklearn.base import BaseEstimator
from sklearn.utils.validation import check_array, check_is_fitted, check_X_y

from ._internal import rehline_internal, rehline_result


class _BaseReHLine(BaseEstimator):
r"""Base Class of ReHLine Formulation.
.. math::
\min_{\mathbf{\beta} \in \mathbb{R}^d} \sum_{i=1}^n \sum_{l=1}^L \text{ReLU}( u_{li} \mathbf{x}_i^\intercal \mathbf{\beta} + v_{li}) + \sum_{i=1}^n \sum_{h=1}^H {\text{ReHU}}_{\tau_{hi}}( s_{hi} \mathbf{x}_i^\intercal \mathbf{\beta} + t_{hi}) + \frac{1}{2} \| \mathbf{\beta} \|_2^2, \\ \text{ s.t. }
\mathbf{A} \mathbf{\beta} + \mathbf{b} \geq \mathbf{0},
where :math:`\mathbf{U} = (u_{li}),\mathbf{V} = (v_{li}) \in \mathbb{R}^{L \times n}`
and :math:`\mathbf{S} = (s_{hi}),\mathbf{T} = (t_{hi}),\mathbf{\tau} = (\tau_{hi}) \in \mathbb{R}^{H \times n}`
are the ReLU-ReHU loss parameters, and :math:`(\mathbf{A},\mathbf{b})` are the constraint parameters.
Parameters
----------
C : float, default=1.0
Regularization parameter. The strength of the regularization is
inversely proportional to C. Must be strictly positive.
U, V: array of shape (L, n_samples), default=np.empty(shape=(0, 0))
The parameters pertaining to the ReLU part in the loss function.
Tau, S, T: array of shape (H, n_samples), default=np.empty(shape=(0, 0))
The parameters pertaining to the ReHU part in the loss function.
A: array of shape (K, n_features), default=np.empty(shape=(0, 0))
The coefficient matrix in the linear constraint.
b: array of shape (K, ), default=np.empty(shape=0)
The intercept vector in the linear constraint.
def relu(x):
"""

def __init__(self, C=1.,
U=np.empty(shape=(0,0)), V=np.empty(shape=(0,0)),
Tau=np.empty(shape=(0,0)),
S=np.empty(shape=(0,0)), T=np.empty(shape=(0,0)),
A=np.empty(shape=(0,0)), b=np.empty(shape=(0))):
self.C = C
self.U = U
self.V = V
self.S = S
self.T = T
self.Tau = Tau
self.A = A
self.b = b
self.L = U.shape[0]
self.n = U.shape[1]
self.H = S.shape[0]
self.K = A.shape[0]

def auto_shape(self):
"""
Automatically generate the shape of the parameters of the ReHLine loss function.
"""
self.L = self.U.shape[0]
self.n = self.U.shape[1]
self.H = self.S.shape[0]
self.K = self.A.shape[0]

def call_ReLHLoss(self, score):
"""
Return the value of the ReHLine loss of the `score`.
Parameters
----------
score : ndarray of shape (n_samples, )
The input score that will be evaluated through the ReHLine loss.
Returns
-------
float
ReHLine loss evaluation of the given score.
"""

relu_input = np.zeros((self.L, self.n))
rehu_input = np.zeros((self.H, self.n))
if self.L > 0:
relu_input = (self.U.T * score[:,np.newaxis]).T + self.V
if self.H > 0:
rehu_input = (self.S.T * score[:,np.newaxis]).T + self.T
return np.sum(_relu(relu_input), 0) + np.sum(_rehu(rehu_input), 0)

@abstractmethod
def fit(self, X, y, sample_weight):
"""Fit model."""

@abstractmethod
def decision_function(self, X):
"""The decision function evaluated on the given dataset
Parameters
----------
X : array-like of shape (n_samples, n_features)
The data matrix.
Returns
-------
ndarray of shape (n_samples, )
Returns the decision function of the samples.
"""
# Check if fit has been called
check_is_fitted(self)

X = check_array(X)

def _relu(x):
"""
Evaluation of ReLU given a vector.
Expand All @@ -31,7 +141,7 @@ def relu(x):
return np.maximum(x, 0)


def rehu(x, cut=1):
def _rehu(x, cut=1):
"""
Evaluation of ReHU given a vector.
Expand Down Expand Up @@ -64,39 +174,11 @@ def _check_rehu(rehu_coef, rehu_intercept, rehu_cut):
if len(rehu_coef) > 0:
assert (rehu_cut >= 0.0).all(), "`rehu_cut` must be non-negative!"

def make_fair_classification(n_samples=100, n_features=5, ind_sensitive=0):
"""
Generate a random binary fair classification problem.
Parameters
----------
n_samples : int, default=100
The number of samples.
n_features : int, default=5
The total number of features.
ind_sensitive : int, default=0
The index of the sensitive feature.
Returns
-------
X : ndarray of shape (n_samples, n_features)
The generated samples.
y : ndarray of shape (n_samples,)
The +/- labels for class membership of each sample.
X_sen: ndarray of shape (n_samples,)
The centered samples of the sensitive feature.
"""

X, y = make_classification(n_samples, n_features)
y = 2*y - 1

scaler = StandardScaler()
X = scaler.fit_transform(X)

X_sen = X[:, ind_sensitive]

return X, y, X_sen
def ReHLine_solver(X, U, V,
Tau=np.empty(shape=(0, 0)),
S=np.empty(shape=(0, 0)), T=np.empty(shape=(0, 0)),
A=np.empty(shape=(0, 0)), b=np.empty(shape=(0)),
max_iter=1000, tol=1e-4, shrink=1, verbose=1, trace_freq=100):
result = rehline_result()
rehline_internal(result, X, A, b, U, V, S, T, Tau, max_iter, tol, shrink, verbose, trace_freq)
return result
Loading

0 comments on commit b766232

Please sign in to comment.