Merge pull request #125 from wilsonrljr/tests/ridge_regression

Tests/ridge regression
wilsonrljr · Nov 15, 2023 · dfa0c9a · dfa0c9a
2 parents 55727d4 + 9a7fa50
commit dfa0c9a
Show file tree

Hide file tree

Showing 8 changed files with 125 additions and 37 deletions.
diff --git a/CHANGELOG b/CHANGELOG
@@ -5,7 +5,36 @@ File for tracking changes in SysIdentPy
 Changes in SysIdentPy
 =====================
 
-v0.3.0
+v0.3.4
+------
+
+CONTRIBUTORS
+~~~~~~~~~~~~
+
+- wilsonrljr
+- dj-gauthier
+
+CHANGES
+~~~~~~~
+
+- The update **v0.3.4**  has been released with additional features, API changes and fixes. 
+
+- MAJOR: Ridge Regression Parameter Estimation:
+    - Now you can use AILS to estimate parameters of NARMAX models (and variants) using a multiobjective approach.
+    - AILS can be accessed using `from sysidentpy.multiobjective_parameter_estimation import AILS`
+    - See the docs for a more in depth explanation of how to use AILS.
+    - This feature is related to Issue #101. This work is the result of an undergraduate research conducted by Gabriel Bueno Leandro under the supervision of Samir Milani Martins and Wilson Rocha Lacerda Junior.
+
+- API Change: plotting.py code was improved. Added type hints and added new options for plotting results.
+
+- DATASET: Added buck_id.csv and buck_valid.csv dataset to SysIdentPy repository.
+
+- DOC: Add a Multiobjective Parameter Optimization Notebook showing how to use the new AILS method
+
+- DOC: Minor additions and grammar fixes.
+
+
+v0.3.3
 ------
 
 CONTRIBUTORS
@@ -26,11 +55,11 @@ CHANGES
     - See the docs for a more in depth explanation of how to use AILS.
     - This feature is related to Issue #101. This work is the result of an undergraduate research conducted by Gabriel Bueno Leandro under the supervision of Samir Milani Martins and Wilson Rocha Lacerda Junior.
 
-- API Change: `regressor_code` variable was renamed as `enconding` to avoid using the same name as the method in `narmax_tool` `regressor_code` method.
+- API Change: `regressor_code` variable was renamed as `encoding` to avoid using the same name as the method in `narmax_tool` `regressor_code` method.
 
 - DATASET: Added buck_id.csv and buck_valid.csv dataset to SysIdentPy repository.
 
-- DOC: Add a Multiobjetive Parameter Optimization Notebook showing how to use the new AILS method
+- DOC: Add a Multiobjective Parameter Optimization Notebook showing how to use the new AILS method
 
 - DOC: Minor additions and grammar fixes.
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -138,7 +138,7 @@ exclude_lines = [
 
 [tool.black]
 line-length = 88
-target_version = ['py37', 'py38', 'py39', 'py310']
+target_version = ['py37', 'py38', 'py39', 'py310', 'py311']
 preview = true
 exclude = '''
 /(

diff --git a/sysidentpy/model_structure_selection/forward_regression_orthogonal_least_squares.py b/sysidentpy/model_structure_selection/forward_regression_orthogonal_least_squares.py
@@ -86,10 +86,10 @@ class FROLS(Estimators, BaseMSS):
         filter.
     mu : float, default=0.01
         The convergence coefficient (learning rate) of the filter.
-    eps : float
+    eps : float, default=np.finfo(np.float64).eps
         Normalization factor of the normalized filters.
-    ridge_param : float
-        Regularization prameter used in ridge regression
+    ridge_param : float, default=np.finfo(np.float64).eps
+        Regularization parameter used in ridge regression
     gama : float, default=0.2
         The leakage factor of the Leaky LMS method.
     weight : float, default=0.02
@@ -166,7 +166,7 @@ def __init__(
         offset_covariance: float = 0.2,
         mu: float = 0.01,
         eps: np.float64 = np.finfo(np.float64).eps,
-        ridge_param: np.float64 = np.finfo(np.float64).eps, # default is machine eps
+        ridge_param: np.float64 = np.finfo(np.float64).eps,  # default is machine eps
         gama: float = 0.2,
         weight: float = 0.02,
         basis_function: Union[Polynomial, Fourier] = Polynomial(),
@@ -194,7 +194,7 @@ def __init__(
             offset_covariance=offset_covariance,
             mu=mu,
             eps=eps,
-            ridge_param=ridge_param, # ridge regression parameter
+            ridge_param=ridge_param,  # ridge regression parameter
             gama=gama,
             weight=weight,
             basis_function=basis_function,
@@ -298,15 +298,16 @@ def error_reduction_ratio(self, psi, y, process_term_number):
             for j in np.arange(i, dimension):
                 # Add `eps` in the denominator to omit division by zero if
                 # denominator is zero
-                # To implement regularized regression (ridge regression), add 
-                # ridgePparam to psi.T @ psi.   See S. Chen, Local regularization assisted
+                # To implement regularized regression (ridge regression), add
+                # ridgeParam to psi.T @ psi.   See S. Chen, Local regularization assisted
                 # orthogonal least squares regression, Neurocomputing 69 (2006) 559–585.
-                # The version implemeted below uses the same regularization for every feature,
+                # The version implemented below uses the same regularization for every feature,
                 # What Chen refers to Uniform regularized orthogonal least squares (UROLS)
                 # Set to tiny (self.eps) when you are not regularizing.  ridge_param = eps is
                 # the default.
                 tmp_err[j] = (np.dot(tmp_psi[i:, j].T, tmp_y[i:]) ** 2) / (
-                    (np.dot(tmp_psi[i:, j].T, tmp_psi[i:, j]) + self.ridge_param) * squared_y 
+                    (np.dot(tmp_psi[i:, j].T, tmp_psi[i:, j]) + self.ridge_param)
+                    * squared_y
                 ) + self.eps
 
             if i == process_term_number:
@@ -329,7 +330,7 @@ def error_reduction_ratio(self, psi, y, process_term_number):
         psi_orthogonal = psi[:, tmp_piv]
         return err, piv, psi_orthogonal
 
-    def information_criterion(self, X_base, y):
+    def information_criterion(self, X, y):
         """Determine the model order.
 
         This function uses a information criterion to determine the model size.
@@ -343,7 +344,7 @@ def information_criterion(self, X_base, y):
         ----------
         y : array-like of shape = n_samples
             Target values of the system.
-        X_base : array-like of shape = n_samples
+        X : array-like of shape = n_samples
             Input system values measured by the user.
 
         Returns
@@ -354,14 +355,12 @@ def information_criterion(self, X_base, y):
             vector position + 1).
 
         """
-        if self.n_info_values is not None and self.n_info_values > X_base.shape[1]:
-            self.n_info_values = X_base.shape[1]
+        if self.n_info_values is not None and self.n_info_values > X.shape[1]:
+            self.n_info_values = X.shape[1]
             warnings.warn(
-                (
-                    "n_info_values is greater than the maximum number of all"
-                    " regressors space considering the chosen y_lag, u_lag, and"
-                    f" non_degree. We set as {X_base.shape[1]}"
-                ),
+                "n_info_values is greater than the maximum number of all"
+                " regressors space considering the chosen y_lag, u_lag, and"
+                f" non_degree. We set as {X.shape[1]}",
                 stacklevel=2,
             )
 
@@ -372,7 +371,7 @@ def information_criterion(self, X_base, y):
 
         for i in range(0, self.n_info_values):
             n_theta = i + 1
-            regressor_matrix = self.error_reduction_ratio(X_base, y, n_theta)[2]
+            regressor_matrix = self.error_reduction_ratio(X, y, n_theta)[2]
 
             tmp_theta = getattr(self, self.estimator)(regressor_matrix, y)
 

diff --git a/sysidentpy/model_structure_selection/tests/test_frols.py b/sysidentpy/model_structure_selection/tests/test_frols.py
@@ -96,6 +96,7 @@ def test_default_values():
         "offset_covariance": 0.2,
         "mu": 0.01,
         "eps": np.finfo(np.float64).eps,
+        "ridge_param": np.finfo(np.float64).eps,
         "gama": 0.2,
         "weight": 0.02,
         "model_type": "NARMAX",
@@ -115,6 +116,7 @@ def test_default_values():
         model.offset_covariance,
         model.mu,
         model.eps,
+        model.ridge_param,
         model.gama,
         model.weight,
         model.model_type,

diff --git a/sysidentpy/neural_network/tests/test_narxnn.py b/sysidentpy/neural_network/tests/test_narxnn.py
@@ -446,6 +446,30 @@ def forward(self, xb):
     assert_almost_equal(yhat.mean(), y_test.mean(), decimal=2)
 
 
+def test_raise_batch_size():
+    assert_raises(
+        ValueError, NARXNN, batch_size=0.3, basis_function=Polynomial(degree=2)
+    )
+
+
+def test_raise_epochs():
+    assert_raises(ValueError, NARXNN, epochs=0.3, basis_function=Polynomial(degree=2))
+
+
+def test_raise_train_percentage():
+    assert_raises(
+        ValueError, NARXNN, train_percentage=-1, basis_function=Polynomial(degree=2)
+    )
+
+
+def test_raise_verbose():
+    assert_raises(TypeError, NARXNN, verbose=None, basis_function=Polynomial(degree=2))
+
+
+def test_raise_device():
+    assert_raises(ValueError, NARXNN, device="CPU", basis_function=Polynomial(degree=2))
+
+
 def test_model_predict_fourier():
     basis_function = Fourier(degree=1)
 

diff --git a/sysidentpy/parameter_estimation/estimators.py b/sysidentpy/parameter_estimation/estimators.py
@@ -23,13 +23,13 @@ def __init__(
         offset_covariance=0.2,
         mu=0.01,
         eps=np.finfo(np.float64).eps,
-        ridge_param = np.finfo(np.float64).eps, # for regularized ridge regression
+        ridge_param=np.finfo(np.float64).eps,  # for regularized ridge regression
         gama=0.2,
         weight=0.02,
         basis_function=None,
     ):
         self.eps = eps
-        self.ridge_param = ridge_param # for regularized ridge regression
+        self.ridge_param = ridge_param  # for regularized ridge regression
         self.mu = mu
         self.offset_covariance = offset_covariance
         self.max_lag = max_lag
@@ -51,7 +51,7 @@ def _validate_params(self):
             "offset_covariance": self.offset_covariance,
             "mu": self.mu,
             "eps": self.eps,
-            "ridge_param": self.ridge_param, # for regularized ridge regression
+            "ridge_param": self.ridge_param,
             "gama": self.gama,
             "weight": self.weight,
         }
@@ -76,10 +76,8 @@ def _validate_params(self):
     def _check_linear_dependence_rows(self, psi):
         if np.linalg.matrix_rank(psi) != psi.shape[1]:
             warnings.warn(
-                (
-                    "Psi matrix might have linearly dependent rows."
-                    "Be careful and check your data"
-                ),
+                "Psi matrix might have linearly dependent rows."
+                "Be careful and check your data",
                 stacklevel=2,
             )
 
@@ -119,7 +117,7 @@ def least_squares(self, psi, y):
         y = y[self.max_lag :, 0].reshape(-1, 1)
         theta = np.linalg.lstsq(psi, y, rcond=None)[0]
         return theta
-    
+
     def ridge_regression(self, psi, y):
         """Estimate the model parameters using the regularized least squares method
            known as ridge regression.  Based on the least_squares module and uses
@@ -146,19 +144,23 @@ def ridge_regression(self, psi, y):
         ----------
         - Wikipedia entry on ridge regression
           https://en.wikipedia.org/wiki/Ridge_regression
-          
+
         ridge_parm multiplied by the identity matrix (np.eye) favors models (theta) that
         have small size using an L2 norm.  This prevents over fitting of the model.
         For applications where preventing overfitting is important, see, for example,
         D. J. Gauthier, E. Bollt, A. Griffith, W. A. S. Barbosa, ‘Next generation
         reservoir computing,’ Nat. Commun. 12, 5564 (2021).
         https://www.nature.com/articles/s41467-021-25801-2
-          
+
         """
         self._check_linear_dependence_rows(psi)
 
         y = y[self.max_lag :, 0].reshape(-1, 1)
-        theta = (np.linalg.pinv(psi.T @ psi + self.ridge_param * np.eye(psi.shape[1])) @ psi.T @ y)
+        theta = (
+            np.linalg.pinv(psi.T @ psi + self.ridge_param * np.eye(psi.shape[1]))
+            @ psi.T
+            @ y
+        )
         return theta
 
     def _unbiased_estimator(self, psi, y, theta, elag, max_lag, estimator):

diff --git a/sysidentpy/parameter_estimation/tests/test_estimators.py b/sysidentpy/parameter_estimation/tests/test_estimators.py
@@ -42,6 +42,38 @@ def test_least_squares():
     assert_almost_equal(model.theta, theta, decimal=2)
 
 
+def test_ridge_regression():
+    x, y, theta = create_test_data()
+    basis_function = Polynomial(degree=2)
+    model = FROLS(
+        n_terms=5,
+        extended_least_squares=False,
+        ylag=[1, 2],
+        xlag=2,
+        estimator="ridge_regression",
+        ridge_param=np.finfo(np.float64).eps,
+        basis_function=basis_function,
+    )
+    model.fit(X=x, y=y)
+    assert_almost_equal(model.theta, theta, decimal=2)
+
+
+def test_raise_ridge_regression():
+    assert_raises(
+        ValueError, Estimators, ridge_param=-0.3, basis_function=Polynomial(degree=2)
+    )
+
+
+def test_raise():
+    assert_raises(
+        ValueError, Estimators, lam="0.97", basis_function=Polynomial(degree=2)
+    )
+
+
+def test_lam_raise():
+    assert_raises(ValueError, Estimators, lam=1.01, basis_function=Polynomial(degree=2))
+
+
 def test_total_least_squares():
     x, y, theta = create_test_data()
     model = FROLS(

diff --git a/sysidentpy/utils/narmax_tools.py b/sysidentpy/utils/narmax_tools.py
@@ -41,11 +41,11 @@ def regressor_code(
 
     if basis_name == "Polynomial" and model_representation == "neural_network":
         return encoding[1:]
-    elif basis_name == "Polynomial" and model_representation is None:
-        return encoding
-    else:
+    if basis_name == "Polynomial" and model_representation is None:
         return encoding
 
+    return encoding
+
 
 def set_weights(
     *,