Skip to content

Commit

Permalink
Merge pull request #125 from wilsonrljr/tests/ridge_regression
Browse files Browse the repository at this point in the history
Tests/ridge regression
  • Loading branch information
wilsonrljr authored Nov 15, 2023
2 parents 55727d4 + 9a7fa50 commit dfa0c9a
Show file tree
Hide file tree
Showing 8 changed files with 125 additions and 37 deletions.
35 changes: 32 additions & 3 deletions CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,36 @@ File for tracking changes in SysIdentPy
Changes in SysIdentPy
=====================

v0.3.0
v0.3.4
------

CONTRIBUTORS
~~~~~~~~~~~~

- wilsonrljr
- dj-gauthier

CHANGES
~~~~~~~

- The update **v0.3.4** has been released with additional features, API changes and fixes.

- MAJOR: Ridge Regression Parameter Estimation:
- Now you can use AILS to estimate parameters of NARMAX models (and variants) using a multiobjective approach.
- AILS can be accessed using `from sysidentpy.multiobjective_parameter_estimation import AILS`
- See the docs for a more in depth explanation of how to use AILS.
- This feature is related to Issue #101. This work is the result of an undergraduate research conducted by Gabriel Bueno Leandro under the supervision of Samir Milani Martins and Wilson Rocha Lacerda Junior.

- API Change: plotting.py code was improved. Added type hints and added new options for plotting results.

- DATASET: Added buck_id.csv and buck_valid.csv dataset to SysIdentPy repository.

- DOC: Add a Multiobjective Parameter Optimization Notebook showing how to use the new AILS method

- DOC: Minor additions and grammar fixes.


v0.3.3
------

CONTRIBUTORS
Expand All @@ -26,11 +55,11 @@ CHANGES
- See the docs for a more in depth explanation of how to use AILS.
- This feature is related to Issue #101. This work is the result of an undergraduate research conducted by Gabriel Bueno Leandro under the supervision of Samir Milani Martins and Wilson Rocha Lacerda Junior.

- API Change: `regressor_code` variable was renamed as `enconding` to avoid using the same name as the method in `narmax_tool` `regressor_code` method.
- API Change: `regressor_code` variable was renamed as `encoding` to avoid using the same name as the method in `narmax_tool` `regressor_code` method.

- DATASET: Added buck_id.csv and buck_valid.csv dataset to SysIdentPy repository.

- DOC: Add a Multiobjetive Parameter Optimization Notebook showing how to use the new AILS method
- DOC: Add a Multiobjective Parameter Optimization Notebook showing how to use the new AILS method

- DOC: Minor additions and grammar fixes.

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ exclude_lines = [

[tool.black]
line-length = 88
target_version = ['py37', 'py38', 'py39', 'py310']
target_version = ['py37', 'py38', 'py39', 'py310', 'py311']
preview = true
exclude = '''
/(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,10 +86,10 @@ class FROLS(Estimators, BaseMSS):
filter.
mu : float, default=0.01
The convergence coefficient (learning rate) of the filter.
eps : float
eps : float, default=np.finfo(np.float64).eps
Normalization factor of the normalized filters.
ridge_param : float
Regularization prameter used in ridge regression
ridge_param : float, default=np.finfo(np.float64).eps
Regularization parameter used in ridge regression
gama : float, default=0.2
The leakage factor of the Leaky LMS method.
weight : float, default=0.02
Expand Down Expand Up @@ -166,7 +166,7 @@ def __init__(
offset_covariance: float = 0.2,
mu: float = 0.01,
eps: np.float64 = np.finfo(np.float64).eps,
ridge_param: np.float64 = np.finfo(np.float64).eps, # default is machine eps
ridge_param: np.float64 = np.finfo(np.float64).eps, # default is machine eps
gama: float = 0.2,
weight: float = 0.02,
basis_function: Union[Polynomial, Fourier] = Polynomial(),
Expand Down Expand Up @@ -194,7 +194,7 @@ def __init__(
offset_covariance=offset_covariance,
mu=mu,
eps=eps,
ridge_param=ridge_param, # ridge regression parameter
ridge_param=ridge_param, # ridge regression parameter
gama=gama,
weight=weight,
basis_function=basis_function,
Expand Down Expand Up @@ -298,15 +298,16 @@ def error_reduction_ratio(self, psi, y, process_term_number):
for j in np.arange(i, dimension):
# Add `eps` in the denominator to omit division by zero if
# denominator is zero
# To implement regularized regression (ridge regression), add
# ridgePparam to psi.T @ psi. See S. Chen, Local regularization assisted
# To implement regularized regression (ridge regression), add
# ridgeParam to psi.T @ psi. See S. Chen, Local regularization assisted
# orthogonal least squares regression, Neurocomputing 69 (2006) 559–585.
# The version implemeted below uses the same regularization for every feature,
# The version implemented below uses the same regularization for every feature,
# What Chen refers to Uniform regularized orthogonal least squares (UROLS)
# Set to tiny (self.eps) when you are not regularizing. ridge_param = eps is
# the default.
tmp_err[j] = (np.dot(tmp_psi[i:, j].T, tmp_y[i:]) ** 2) / (
(np.dot(tmp_psi[i:, j].T, tmp_psi[i:, j]) + self.ridge_param) * squared_y
(np.dot(tmp_psi[i:, j].T, tmp_psi[i:, j]) + self.ridge_param)
* squared_y
) + self.eps

if i == process_term_number:
Expand All @@ -329,7 +330,7 @@ def error_reduction_ratio(self, psi, y, process_term_number):
psi_orthogonal = psi[:, tmp_piv]
return err, piv, psi_orthogonal

def information_criterion(self, X_base, y):
def information_criterion(self, X, y):
"""Determine the model order.
This function uses a information criterion to determine the model size.
Expand All @@ -343,7 +344,7 @@ def information_criterion(self, X_base, y):
----------
y : array-like of shape = n_samples
Target values of the system.
X_base : array-like of shape = n_samples
X : array-like of shape = n_samples
Input system values measured by the user.
Returns
Expand All @@ -354,14 +355,12 @@ def information_criterion(self, X_base, y):
vector position + 1).
"""
if self.n_info_values is not None and self.n_info_values > X_base.shape[1]:
self.n_info_values = X_base.shape[1]
if self.n_info_values is not None and self.n_info_values > X.shape[1]:
self.n_info_values = X.shape[1]
warnings.warn(
(
"n_info_values is greater than the maximum number of all"
" regressors space considering the chosen y_lag, u_lag, and"
f" non_degree. We set as {X_base.shape[1]}"
),
"n_info_values is greater than the maximum number of all"
" regressors space considering the chosen y_lag, u_lag, and"
f" non_degree. We set as {X.shape[1]}",
stacklevel=2,
)

Expand All @@ -372,7 +371,7 @@ def information_criterion(self, X_base, y):

for i in range(0, self.n_info_values):
n_theta = i + 1
regressor_matrix = self.error_reduction_ratio(X_base, y, n_theta)[2]
regressor_matrix = self.error_reduction_ratio(X, y, n_theta)[2]

tmp_theta = getattr(self, self.estimator)(regressor_matrix, y)

Expand Down
2 changes: 2 additions & 0 deletions sysidentpy/model_structure_selection/tests/test_frols.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ def test_default_values():
"offset_covariance": 0.2,
"mu": 0.01,
"eps": np.finfo(np.float64).eps,
"ridge_param": np.finfo(np.float64).eps,
"gama": 0.2,
"weight": 0.02,
"model_type": "NARMAX",
Expand All @@ -115,6 +116,7 @@ def test_default_values():
model.offset_covariance,
model.mu,
model.eps,
model.ridge_param,
model.gama,
model.weight,
model.model_type,
Expand Down
24 changes: 24 additions & 0 deletions sysidentpy/neural_network/tests/test_narxnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,30 @@ def forward(self, xb):
assert_almost_equal(yhat.mean(), y_test.mean(), decimal=2)


def test_raise_batch_size():
assert_raises(
ValueError, NARXNN, batch_size=0.3, basis_function=Polynomial(degree=2)
)


def test_raise_epochs():
assert_raises(ValueError, NARXNN, epochs=0.3, basis_function=Polynomial(degree=2))


def test_raise_train_percentage():
assert_raises(
ValueError, NARXNN, train_percentage=-1, basis_function=Polynomial(degree=2)
)


def test_raise_verbose():
assert_raises(TypeError, NARXNN, verbose=None, basis_function=Polynomial(degree=2))


def test_raise_device():
assert_raises(ValueError, NARXNN, device="CPU", basis_function=Polynomial(degree=2))


def test_model_predict_fourier():
basis_function = Fourier(degree=1)

Expand Down
24 changes: 13 additions & 11 deletions sysidentpy/parameter_estimation/estimators.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,13 @@ def __init__(
offset_covariance=0.2,
mu=0.01,
eps=np.finfo(np.float64).eps,
ridge_param = np.finfo(np.float64).eps, # for regularized ridge regression
ridge_param=np.finfo(np.float64).eps, # for regularized ridge regression
gama=0.2,
weight=0.02,
basis_function=None,
):
self.eps = eps
self.ridge_param = ridge_param # for regularized ridge regression
self.ridge_param = ridge_param # for regularized ridge regression
self.mu = mu
self.offset_covariance = offset_covariance
self.max_lag = max_lag
Expand All @@ -51,7 +51,7 @@ def _validate_params(self):
"offset_covariance": self.offset_covariance,
"mu": self.mu,
"eps": self.eps,
"ridge_param": self.ridge_param, # for regularized ridge regression
"ridge_param": self.ridge_param,
"gama": self.gama,
"weight": self.weight,
}
Expand All @@ -76,10 +76,8 @@ def _validate_params(self):
def _check_linear_dependence_rows(self, psi):
if np.linalg.matrix_rank(psi) != psi.shape[1]:
warnings.warn(
(
"Psi matrix might have linearly dependent rows."
"Be careful and check your data"
),
"Psi matrix might have linearly dependent rows."
"Be careful and check your data",
stacklevel=2,
)

Expand Down Expand Up @@ -119,7 +117,7 @@ def least_squares(self, psi, y):
y = y[self.max_lag :, 0].reshape(-1, 1)
theta = np.linalg.lstsq(psi, y, rcond=None)[0]
return theta

def ridge_regression(self, psi, y):
"""Estimate the model parameters using the regularized least squares method
known as ridge regression. Based on the least_squares module and uses
Expand All @@ -146,19 +144,23 @@ def ridge_regression(self, psi, y):
----------
- Wikipedia entry on ridge regression
https://en.wikipedia.org/wiki/Ridge_regression
ridge_parm multiplied by the identity matrix (np.eye) favors models (theta) that
have small size using an L2 norm. This prevents over fitting of the model.
For applications where preventing overfitting is important, see, for example,
D. J. Gauthier, E. Bollt, A. Griffith, W. A. S. Barbosa, ‘Next generation
reservoir computing,’ Nat. Commun. 12, 5564 (2021).
https://www.nature.com/articles/s41467-021-25801-2
"""
self._check_linear_dependence_rows(psi)

y = y[self.max_lag :, 0].reshape(-1, 1)
theta = (np.linalg.pinv(psi.T @ psi + self.ridge_param * np.eye(psi.shape[1])) @ psi.T @ y)
theta = (
np.linalg.pinv(psi.T @ psi + self.ridge_param * np.eye(psi.shape[1]))
@ psi.T
@ y
)
return theta

def _unbiased_estimator(self, psi, y, theta, elag, max_lag, estimator):
Expand Down
32 changes: 32 additions & 0 deletions sysidentpy/parameter_estimation/tests/test_estimators.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,38 @@ def test_least_squares():
assert_almost_equal(model.theta, theta, decimal=2)


def test_ridge_regression():
x, y, theta = create_test_data()
basis_function = Polynomial(degree=2)
model = FROLS(
n_terms=5,
extended_least_squares=False,
ylag=[1, 2],
xlag=2,
estimator="ridge_regression",
ridge_param=np.finfo(np.float64).eps,
basis_function=basis_function,
)
model.fit(X=x, y=y)
assert_almost_equal(model.theta, theta, decimal=2)


def test_raise_ridge_regression():
assert_raises(
ValueError, Estimators, ridge_param=-0.3, basis_function=Polynomial(degree=2)
)


def test_raise():
assert_raises(
ValueError, Estimators, lam="0.97", basis_function=Polynomial(degree=2)
)


def test_lam_raise():
assert_raises(ValueError, Estimators, lam=1.01, basis_function=Polynomial(degree=2))


def test_total_least_squares():
x, y, theta = create_test_data()
model = FROLS(
Expand Down
6 changes: 3 additions & 3 deletions sysidentpy/utils/narmax_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,11 @@ def regressor_code(

if basis_name == "Polynomial" and model_representation == "neural_network":
return encoding[1:]
elif basis_name == "Polynomial" and model_representation is None:
return encoding
else:
if basis_name == "Polynomial" and model_representation is None:
return encoding

return encoding


def set_weights(
*,
Expand Down

0 comments on commit dfa0c9a

Please sign in to comment.