take pure ReHLine class

softmin · Aug 21, 2024 · b766232 · b766232
1 parent 3d7112f
commit b766232
Show file tree

Hide file tree

Showing 7 changed files with 508 additions and 236 deletions.
diff --git a/dist/rehline-0.0.4.dev0-py3.10-linux-x86_64.egg b/dist/rehline-0.0.4.dev0-py3.10-linux-x86_64.egg
diff --git a/doc/source/index.rst b/doc/source/index.rst
@@ -78,5 +78,6 @@ If you use this code please star 🌟 the repository and cite the following pape
    :maxdepth: 2
    :hidden:
 
+   tutorials
    example
    benchmark
diff --git a/doc/source/tutorials.rst b/doc/source/tutorials.rst
@@ -19,5 +19,43 @@ and ridge regularized Huber minimization (RidgeHuber).
 
 .. image:: ./figs/tab.png
 
-Solving Custom ERMs
--------------------
+Solving PLQ ERMs
+-------------------
+
+Loss
+****
+
+.. code:: python
+   
+   # name (str): name of the custom loss function
+   # loss_kwargs: more keys and values for loss parameters
+   loss = {'name': <loss_name>, <**loss_kwargs>}
+
+.. list-table::
+
+ * - **SVM**
+   - | ``loss_name``: 'hinge' / 'svm' / 'SVM'
+     |
+     | *Example:* ``loss = {'name': 'SVM'}``
+
+ * - **Quantile Reg**
+   - | ``loss_name``: 'check' / 'quantile' / 'quantile regression' / 'QR'
+     | ``qt`` (*list*): [q1, q2, ... qK]
+     |
+     | *Example:* ``loss = {'name': 'QR', 'qt': [0.25, 0.75]}``
+
+ * - **Smooth SVM**
+   - | ``loss_name``: 'sSVM' / 'smooth SVM' / 'smooth hinge'
+     |
+     | *Example:* ``loss = {'name': 'sSVM'}``
+
+ * - **Huber**
+   - | ``loss_name``: 'huber' / 'Huber'
+     |
+     | *Example:* ``loss = {'name': 'huber'}``
+
+ * - **SVR**
+   - | ``loss_name``: 'SVR' / 'svr'
+     | ``epsilon`` (*float*): 0.1
+     |
+     | *Example:* ``loss = {'name': 'svr', 'epsilon': 0.1}``
diff --git a/rehline/__init__.py b/rehline/__init__.py
@@ -1,8 +1,9 @@
 # Import from internal C++ module
-from ._base import make_fair_classification, rehu, relu
-from ._class import ReHLine, ReHLine_solver, ReHLineLinear
+from ._base import ReHLine_solver, _BaseReHLine
+from ._class import ReHLine
+from ._data import make_fair_classification
 from ._internal import rehline_internal, rehline_result
 
-__all__ = ("ReHLine",
-           "ReHLineLinear",   
+__all__ = ("_BaseReHLine",
+           "ReHLine",
            "make_fair_classification")
diff --git a/rehline/_base.py b/rehline/_base.py
@@ -5,13 +5,123 @@
 
 # License: MIT License
 
+from abc import abstractmethod
+
 import numpy as np
 from scipy.special import huber
-from sklearn.datasets import make_classification
-from sklearn.preprocessing import StandardScaler
+from sklearn.base import BaseEstimator
+from sklearn.utils.validation import check_array, check_is_fitted, check_X_y
+
+from ._internal import rehline_internal, rehline_result
+
+
+class _BaseReHLine(BaseEstimator):
+    r"""Base Class of ReHLine Formulation.
+
+    .. math::
+
+        \min_{\mathbf{\beta} \in \mathbb{R}^d} \sum_{i=1}^n \sum_{l=1}^L \text{ReLU}( u_{li} \mathbf{x}_i^\intercal \mathbf{\beta} + v_{li}) + \sum_{i=1}^n \sum_{h=1}^H {\text{ReHU}}_{\tau_{hi}}( s_{hi} \mathbf{x}_i^\intercal \mathbf{\beta} + t_{hi}) + \frac{1}{2} \| \mathbf{\beta} \|_2^2, \\ \text{ s.t. } 
+        \mathbf{A} \mathbf{\beta} + \mathbf{b} \geq \mathbf{0},
+        
+    where :math:`\mathbf{U} = (u_{li}),\mathbf{V} = (v_{li}) \in \mathbb{R}^{L \times n}` 
+    and :math:`\mathbf{S} = (s_{hi}),\mathbf{T} = (t_{hi}),\mathbf{\tau} = (\tau_{hi}) \in \mathbb{R}^{H \times n}` 
+    are the ReLU-ReHU loss parameters, and :math:`(\mathbf{A},\mathbf{b})` are the constraint parameters.
+    
+    Parameters
+    ----------
 
+    C : float, default=1.0
+        Regularization parameter. The strength of the regularization is
+        inversely proportional to C. Must be strictly positive. 
+
+    U, V: array of shape (L, n_samples), default=np.empty(shape=(0, 0))
+        The parameters pertaining to the ReLU part in the loss function.
+
+    Tau, S, T: array of shape (H, n_samples), default=np.empty(shape=(0, 0))
+        The parameters pertaining to the ReHU part in the loss function.
+    
+    A: array of shape (K, n_features), default=np.empty(shape=(0, 0))
+        The coefficient matrix in the linear constraint.
+
+    b: array of shape (K, ), default=np.empty(shape=0)
+        The intercept vector in the linear constraint.
 
-def relu(x):
+    """
+
+    def __init__(self, C=1.,
+                       U=np.empty(shape=(0,0)), V=np.empty(shape=(0,0)),
+                       Tau=np.empty(shape=(0,0)),
+                       S=np.empty(shape=(0,0)), T=np.empty(shape=(0,0)),
+                       A=np.empty(shape=(0,0)), b=np.empty(shape=(0))):
+        self.C = C
+        self.U = U
+        self.V = V
+        self.S = S
+        self.T = T
+        self.Tau = Tau
+        self.A = A
+        self.b = b
+        self.L = U.shape[0]
+        self.n = U.shape[1]
+        self.H = S.shape[0]
+        self.K = A.shape[0]
+
+    def auto_shape(self):
+        """
+        Automatically generate the shape of the parameters of the ReHLine loss function.
+        """
+        self.L = self.U.shape[0]
+        self.n = self.U.shape[1]
+        self.H = self.S.shape[0]
+        self.K = self.A.shape[0]
+
+    def call_ReLHLoss(self, score):
+        """
+        Return the value of the ReHLine loss of the `score`.
+
+        Parameters
+        ----------
+        score : ndarray of shape (n_samples, )
+            The input score that will be evaluated through the ReHLine loss.
+
+        Returns
+        -------
+        float
+            ReHLine loss evaluation of the given score.
+        """
+
+        relu_input = np.zeros((self.L, self.n))
+        rehu_input = np.zeros((self.H, self.n))
+        if self.L > 0:
+            relu_input = (self.U.T * score[:,np.newaxis]).T + self.V
+        if self.H > 0:
+            rehu_input = (self.S.T * score[:,np.newaxis]).T + self.T
+        return np.sum(_relu(relu_input), 0) + np.sum(_rehu(rehu_input), 0)
+
+    @abstractmethod
+    def fit(self, X, y, sample_weight):
+        """Fit model."""
+
+    @abstractmethod
+    def decision_function(self, X):
+        """The decision function evaluated on the given dataset
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The data matrix.
+
+        Returns
+        -------
+        ndarray of shape (n_samples, )
+            Returns the decision function of the samples.
+        """
+        # Check if fit has been called
+        check_is_fitted(self)
+
+        X = check_array(X)
+
+def _relu(x):
     """
     Evaluation of ReLU given a vector.
 
@@ -31,7 +141,7 @@ def relu(x):
     return np.maximum(x, 0)
 
 
-def rehu(x, cut=1):
+def _rehu(x, cut=1):
     """
     Evaluation of ReHU given a vector.
 
@@ -64,39 +174,11 @@ def _check_rehu(rehu_coef, rehu_intercept, rehu_cut):
     if len(rehu_coef) > 0:
         assert (rehu_cut >= 0.0).all(), "`rehu_cut` must be non-negative!"
 
-def make_fair_classification(n_samples=100, n_features=5, ind_sensitive=0):
-    """
-    Generate a random binary fair classification problem.
-
-    Parameters
-    ----------
-    n_samples : int, default=100
-        The number of samples.
-
-    n_features : int, default=5
-        The total number of features. 
-
-    ind_sensitive : int, default=0
-        The index of the sensitive feature.
-
-    Returns
-    -------
-    X : ndarray of shape (n_samples, n_features)
-        The generated samples.
-
-    y : ndarray of shape (n_samples,)
-        The +/- labels for class membership of each sample.
-
-    X_sen: ndarray of shape (n_samples,)
-        The centered samples of the sensitive feature.
-    """
-
-    X, y = make_classification(n_samples, n_features)
-    y = 2*y - 1
-
-    scaler = StandardScaler()
-    X = scaler.fit_transform(X)
-
-    X_sen = X[:, ind_sensitive]
-
-    return X, y, X_sen
+def ReHLine_solver(X, U, V,
+        Tau=np.empty(shape=(0, 0)),
+        S=np.empty(shape=(0, 0)), T=np.empty(shape=(0, 0)),
+        A=np.empty(shape=(0, 0)), b=np.empty(shape=(0)),
+        max_iter=1000, tol=1e-4, shrink=1, verbose=1, trace_freq=100):
+    result = rehline_result()
+    rehline_internal(result, X, A, b, U, V, S, T, Tau, max_iter, tol, shrink, verbose, trace_freq)
+    return result