Merge pull request #10 from youqiangao/main

Add composite quantile regressor
softmin · Jan 12, 2025 · 4d969ee · 4d969ee
2 parents 77f2b1a + 2dbcde7
commit 4d969ee
Show file tree

Hide file tree

Showing 8 changed files with 552 additions and 9 deletions.
diff --git a/doc/source/autoapi/rehline/index.rst b/doc/source/autoapi/rehline/index.rst
@@ -17,22 +17,24 @@ Overview
      - ReHLine Minimization.
    * - :py:obj:`plqERM_Ridge <rehline.plqERM_Ridge>`
      - Empirical Risk Minimization (ERM) with a piecewise linear-quadratic (PLQ) objective with a ridge penalty.
+   * - :py:obj:`CQR_Ridge <rehline.CQR_Ridge>`
+     - Composite Quantile Regressor (CQR) with a ridge penalty.
 
 
 .. list-table:: Function
    :header-rows: 0
    :widths: auto
    :class: summarytable
 
-   * - :py:obj:`ReHLine_solver <rehline.ReHLine_solver>`\ (X, U, V, Tau, S, T, A, b, max_iter, tol, shrink, verbose, trace_freq)
+   * - :py:obj:`ReHLine_solver <rehline.ReHLine_solver>`\ (X, U, V, Tau, S, T, A, b, Lambda, Gamma, xi, max_iter, tol, shrink, verbose, trace_freq)
      - \-
 
 
 
 Classes
 -------
 
-.. py:class:: ReHLine(C=1.0, U=np.empty(shape=(0, 0)), V=np.empty(shape=(0, 0)), Tau=np.empty(shape=(0, 0)), S=np.empty(shape=(0, 0)), T=np.empty(shape=(0, 0)), A=np.empty(shape=(0, 0)), b=np.empty(shape=0), max_iter=1000, tol=0.0001, shrink=1, verbose=0, trace_freq=100)
+.. py:class:: ReHLine(C=1.0, U=np.empty(shape=(0, 0)), V=np.empty(shape=(0, 0)), Tau=np.empty(shape=(0, 0)), S=np.empty(shape=(0, 0)), T=np.empty(shape=(0, 0)), A=np.empty(shape=(0, 0)), b=np.empty(shape=0), max_iter=1000, tol=0.0001, shrink=1, warm_start=0, verbose=0, trace_freq=100)
 
    Bases: :py:obj:`rehline._base._BaseReHLine`, :py:obj:`sklearn.base.BaseEstimator`
 
@@ -66,13 +68,24 @@ Classes
        The intercept vector in the linear constraint.
 
    verbose : int, default=0
-       Enable verbose output. Note that this setting takes advantage of a
-       per-process runtime setting in liblinear that, if enabled, may not work
-       properly in a multithreaded context.
+       Enable verbose output. 
 
    max_iter : int, default=1000
        The maximum number of iterations to be run.
 
+   tol : float, default=1e-4
+       The tolerance for the stopping criterion.
+
+   shrink : float, default=1
+       The shrinkage of dual variables for the ReHLine algorithm.
+
+   warm_start : bool, default=False
+       Whether to use the given dual params as an initial guess for the
+       optimization algorithm.
+
+   trace_freq : int, default=100
+       The frequency at which to print the optimization trace.
+
    Attributes
    ----------
    coef\_ : array-like
@@ -90,6 +103,15 @@ Classes
    primal_obj\_ : array-like
        The primal objective function values.
 
+   Lambda: array-like
+       The optimized dual variables for ReLU parts.
+
+   Gamma: array-like
+       The optimized dual variables for ReHU parts.
+
+   xi: array-like
+       The optimized dual variables for linear constraints.
+
    Examples
    --------
 
@@ -178,7 +200,7 @@ Classes
 
 
 
-.. py:class:: plqERM_Ridge(loss, constraint=[], C=1.0, U=np.empty(shape=(0, 0)), V=np.empty(shape=(0, 0)), Tau=np.empty(shape=(0, 0)), S=np.empty(shape=(0, 0)), T=np.empty(shape=(0, 0)), A=np.empty(shape=(0, 0)), b=np.empty(shape=0), max_iter=1000, tol=0.0001, shrink=1, verbose=0, trace_freq=100)
+.. py:class:: plqERM_Ridge(loss, constraint=[], C=1.0, U=np.empty(shape=(0, 0)), V=np.empty(shape=(0, 0)), Tau=np.empty(shape=(0, 0)), S=np.empty(shape=(0, 0)), T=np.empty(shape=(0, 0)), A=np.empty(shape=(0, 0)), b=np.empty(shape=0), max_iter=1000, tol=0.0001, shrink=1, warm_start=0, verbose=0, trace_freq=100)
 
    Bases: :py:obj:`rehline._base._BaseReHLine`, :py:obj:`sklearn.base.BaseEstimator`
 
@@ -328,9 +350,146 @@ Classes
 
 
 
+.. py:class:: CQR_Ridge(quantiles, C=1.0, max_iter=1000, tol=0.0001, shrink=1, warm_start=0, verbose=0, trace_freq=100)
+
+   Bases: :py:obj:`rehline._base._BaseReHLine`, :py:obj:`sklearn.base.BaseEstimator`
+
+   Composite Quantile Regressor (CQR) with a ridge penalty.
+
+   It allows for the fitting of a linear regression model that minimizes a composite quantile loss function.
+
+   .. math::
+
+       \min_{\mathbf{\beta} \in \mathbb{R}^d, \mathbf{\beta_0} \in \mathbb{R}^K} \sum_{k=1}^K \sum_{i=1}^n \text{PLQ}(y_i, \mathbf{x}_i^T \mathbf{\beta} + \mathbf{\beta_0k}) + \frac{1}{2} \| \mathbf{\beta} \|_2^2.
+
+
+   Parameters
+   ----------
+   quantiles : list of float (n_quantiles,)
+       The quantiles to be estimated.
+
+   C : float, default=1.0
+       Regularization parameter. The strength of the regularization is
+       inversely proportional to C. Must be strictly positive. 
+       `C` will be absorbed by the ReHLine parameters when `self.make_ReLHLoss` is conducted.
+
+   verbose : int, default=0
+       Enable verbose output. Note that this setting takes advantage of a
+       per-process runtime setting in liblinear that, if enabled, may not work
+       properly in a multithreaded context.
+
+   max_iter : int, default=1000
+       The maximum number of iterations to be run.
+
+   tol : float, default=1e-4
+       The tolerance for the stopping criterion.
+
+   shrink : float, default=1
+       The shrinkage of dual variables for the ReHLine algorithm.
+
+   warm_start : bool, default=False
+       Whether to use the given dual params as an initial guess for the
+       optimization algorithm.
+
+   trace_freq : int, default=100
+       The frequency at which to print the optimization trace.
+
+   Attributes
+   ----------
+   coef\_ : array-like
+       The optimized model coefficients.
+
+   intercept\_ : array-like
+       The optimized model intercepts.
+
+   quantiles\_: array-like
+       The quantiles to be estimated.
+
+   n_iter\_ : int
+       The number of iterations performed by the ReHLine solver.
+
+   opt_result\_ : object
+       The optimization result object.
+
+   dual_obj\_ : array-like
+       The dual objective function values.
+
+   primal_obj\_ : array-like
+       The primal objective function values.
+
+   Methods
+   -------
+   fit(X, y, sample_weight=None)
+       Fit the model based on the given training data.
+
+   predict(X)
+       The prediction for the given dataset.
+
+
+   Overview
+   ========
+
+
+   .. list-table:: Methods
+      :header-rows: 0
+      :widths: auto
+      :class: summarytable
+
+      * - :py:obj:`fit <rehline.CQR_Ridge.fit>`\ (X, y, sample_weight)
+        - Fit the model based on the given training data.
+      * - :py:obj:`predict <rehline.CQR_Ridge.predict>`\ (X)
+        - The decision function evaluated on the given dataset
+
+
+   Members
+   =======
+
+   .. py:method:: fit(X, y, sample_weight=None)
+
+      Fit the model based on the given training data.
+
+      Parameters
+      ----------
+
+      X: {array-like} of shape (n_samples, n_features)
+          Training vector, where `n_samples` is the number of samples and
+          `n_features` is the number of features.
+
+      y : array-like of shape (n_samples,)
+          The target variable.
+
+      sample_weight : array-like of shape (n_samples,), default=None
+          Array of weights that are assigned to individual
+          samples. If not provided, then each sample is given unit weight.
+
+      Returns
+      -------
+      self : object
+          An instance of the estimator.
+
+
+
+
+   .. py:method:: predict(X)
+
+      The decision function evaluated on the given dataset
+
+      Parameters
+      ----------
+      X : array-like of shape (n_samples, n_features)
+          The data matrix.
+
+      Returns
+      -------
+      ndarray of shape (n_samples, n_quantiles)
+          Returns the decision function of the samples.
+
+
+
+
 Functions
 ---------
-.. py:function:: ReHLine_solver(X, U, V, Tau=np.empty(shape=(0, 0)), S=np.empty(shape=(0, 0)), T=np.empty(shape=(0, 0)), A=np.empty(shape=(0, 0)), b=np.empty(shape=0), max_iter=1000, tol=0.0001, shrink=1, verbose=1, trace_freq=100)
+.. py:function:: ReHLine_solver(X, U, V, Tau=np.empty(shape=(0, 0)), S=np.empty(shape=(0, 0)), T=np.empty(shape=(0, 0)), A=np.empty(shape=(0, 0)), b=np.empty(shape=0), Lambda=np.empty(shape=(0, 0)), Gamma=np.empty(shape=(0, 0)), xi=np.empty(shape=(0, 0)), max_iter=1000, tol=0.0001, shrink=1, verbose=1, trace_freq=100)
 
 
 

diff --git a/doc/source/example.rst b/doc/source/example.rst
@@ -9,9 +9,10 @@ Example Gallery
    :name: rst-link-gallery
 
    examples/QR.ipynb
+   examples/CQR.ipynb
    examples/SVM.ipynb
    examples/FairSVM.ipynb
-   examples/RankRegression.ipynb
+   examples/RankRegression.ipynb   
 
 List of Examples
 ----------------
@@ -20,6 +21,7 @@ List of Examples
    :maxdepth: 1
 
    examples/QR.ipynb
+   examples/CQR.ipynb
    examples/SVM.ipynb
    examples/FairSVM.ipynb
    examples/RankRegression.ipynb
diff --git a/doc/source/examples/CQR.ipynb b/doc/source/examples/CQR.ipynb
diff --git a/doc/source/tutorials.rst b/doc/source/tutorials.rst
@@ -40,6 +40,10 @@ List of Tutorials
    - | `ReHLine <./autoapi/rehline/index.html#rehline.ReHLine>`_
    - | ReHLine minimization with manual parameter settings.
 
+ * - `ReHLine: Ridge Composite Quantile Regression <./examples/CQR.ipynb>`_
+   - | `CQR_Ridge <./autoapi/rehline/index.html#rehline.CQR_Ridge>`_
+   - | Composite Quantile Regression (CQR) with a ridge penalty.
+
  * - `ReHLine: Empirical Risk Minimization <./tutorials/ReHLine_ERM.rst>`_
    - | `plqERM_Ridge <./autoapi/rehline/index.html#rehline.plqERM_Ridge>`_
    - | Empirical Risk Minimization (ERM) with a piecewise linear-quadratic (PLQ) objective with a ridge penalty.

diff --git a/doc/source/tutorials/ReHLine_ERM.rst b/doc/source/tutorials/ReHLine_ERM.rst
@@ -22,5 +22,6 @@ Example
    :name: rst-link-gallery
 
    ../examples/QR.ipynb
+   ../examples/CQR.ipynb
    ../examples/SVM.ipynb
    ../examples/FairSVM.ipynb
diff --git a/rehline/__init__.py b/rehline/__init__.py
@@ -1,14 +1,15 @@
 # Import from internal C++ module
 from ._base import (ReHLine_solver, _BaseReHLine,
                     _make_constraint_rehline_param, _make_loss_rehline_param)
-from ._class import ReHLine, plqERM_Ridge
+from ._class import ReHLine, plqERM_Ridge, CQR_Ridge
 from ._data import make_fair_classification
 from ._internal import rehline_internal, rehline_result
 
 __all__ = ("ReHLine_solver",
            "_BaseReHLine",
            "ReHLine",
            "plqERM_Ridge",
+           "CQR_Ridge",
            "_make_loss_rehline_param",
            "_make_constraint_rehline_param"
            "make_fair_classification")