Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added momentum to named_arrays.optimize.minimize_gradient_descent() #73

Merged
merged 8 commits into from
Aug 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions docs/refs.bib
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,11 @@ @article{Eriksson1990
URL = {https://doi.org/10.1080/0025570X.1990.11977515},
eprint = {https://doi.org/10.1080/0025570X.1990.11977515}
}


@article{Goh2017,
author = {Goh, Gabriel},
title = {Why Momentum Really Works},
journal = {Distill},
year = {2017},
url = {http://distill.pub/2017/momentum},
doi = {10.23915/distill.00006}
}
2 changes: 1 addition & 1 deletion named_arrays/_scalars/scalar_named_array_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -917,7 +917,7 @@ def optimize_root_newton(
if callback is not None:
callback(i, x, f, converged)

converged |= np.abs(f) < max_abs_error
converged = np.abs(f) < max_abs_error

if np.all(converged):
return x
Expand Down
7 changes: 5 additions & 2 deletions named_arrays/_vectors/tests/test_vectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -597,8 +597,11 @@ class TestOptimizeRoot(
@pytest.mark.parametrize(
argnames="function,expected",
argvalues=[
(lambda x: (np.square(na.value(x) - shift_horizontal) + shift_vertical).length, shift_horizontal)
for shift_horizontal in [20,]
(
lambda x: (np.square((na.value(x) - shift_horizontal).length) + shift_vertical) * u.ph,
shift_horizontal,
)
for shift_horizontal in [2,]
for shift_vertical in [1,]
]
)
Expand Down
10 changes: 7 additions & 3 deletions named_arrays/_vectors/vector_named_array_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,7 +450,7 @@
if callback is not None:
callback(i, x, f, converged)

converged |= np.abs(f) < max_abs_error
converged = np.abs(f) < max_abs_error

Check warning on line 453 in named_arrays/_vectors/vector_named_array_functions.py

View check run for this annotation

Codecov / codecov/patch

named_arrays/_vectors/vector_named_array_functions.py#L453

Added line #L453 was not covered by tests

if np.all(converged):
return x
Expand Down Expand Up @@ -518,6 +518,7 @@
function: Callable[[na.AbstractVectorArray], na.AbstractScalar],
guess: na.AbstractVectorArray,
step_size: float | na.AbstractScalar,
momentum: float | na.AbstractScalar,
gradient: None | Callable[[na.AbstractVectorArray], na.AbstractScalar],
min_gradient: na.ScalarLike,
max_iterations: int,
Expand Down Expand Up @@ -547,6 +548,7 @@
converged = na.broadcast_to(0 * na.value(x), shape=shape).astype(bool)

x = na.broadcast_to(x, shape).astype(float)
z = 0

Check warning on line 551 in named_arrays/_vectors/vector_named_array_functions.py

View check run for this annotation

Codecov / codecov/patch

named_arrays/_vectors/vector_named_array_functions.py#L551

Added line #L551 was not covered by tests

for i in range(max_iterations):

Expand All @@ -555,12 +557,14 @@

grad = gradient(x)

converged |= np.abs(grad) < min_gradient
converged = np.abs(grad) < min_gradient

Check warning on line 560 in named_arrays/_vectors/vector_named_array_functions.py

View check run for this annotation

Codecov / codecov/patch

named_arrays/_vectors/vector_named_array_functions.py#L560

Added line #L560 was not covered by tests

if np.all(converged):
return x

correction = step_size * grad
z = momentum * z + grad

Check warning on line 565 in named_arrays/_vectors/vector_named_array_functions.py

View check run for this annotation

Codecov / codecov/patch

named_arrays/_vectors/vector_named_array_functions.py#L565

Added line #L565 was not covered by tests

correction = step_size * z

Check warning on line 567 in named_arrays/_vectors/vector_named_array_functions.py

View check run for this annotation

Codecov / codecov/patch

named_arrays/_vectors/vector_named_array_functions.py#L567

Added line #L567 was not covered by tests

x = x - correction

Expand Down
33 changes: 30 additions & 3 deletions named_arrays/optimize.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,12 +142,13 @@
function: Callable[[InputT], OutputT],
guess: InputT,
step_size: None | InputT = None,
momentum: float | OutputT = 0,
gradient: None | Callable[[InputT], InputT] = None,
min_gradient: None | InputT = None,
max_iterations: int = 1000,
callback: None | Callable[[int, InputT, OutputT, na.AbstractArray], None] = None,
) -> InputT:
"""
r"""
Find the local minimum of the given function using the
`gradient descent <https://en.wikipedia.org/wiki/Gradient_descent>`_ method.

Expand All @@ -161,7 +162,12 @@
The learning rate for the gradient descent algorithm.
This should have the same units as ``x / gradient(x)``.
If :obj:`None` (the default), this takes the value
``0.1 * na.unit(x / gradient(x))``.
``0.01 * na.unit(x / gradient(x))``.
momentum
The momentum constant, :math:`\beta` for the gradient descent algorithm.
Should be a dimensionless number between zero and one.
Defaults to zero, which equivalent to vanilla gradient descent with
no momentum.
gradient
The gradient of `function`.
If :obj:`None` (the default), the gradient is computed using
Expand All @@ -180,6 +186,26 @@
``x`` is the current guess, ``f`` is the current function value,
and ``converged`` is an array storing the convergence state for every
minimum being computed.

Notes
-----

This function uses the update rules described in :cite:t:`Goh2017`,

.. math::
:label: momentum-equation

z_{k + 1} = \beta z_k + \nabla f(x_k)

.. math::
:label: gradient-descent

x_{k + 1} = x_k - \alpha z_k,

where :math:`x_k` is the current guess for iteration :math:`k`,
:math:`f` is the objective function,
:math:`\alpha` is the learning rate,
and :math:`\beta` is the momentum constant.
"""

x = guess
Expand All @@ -191,7 +217,7 @@
unit_grad = unit_f / unit_x

if step_size is None:
step_size = 0.1 * (unit_x / unit_grad)
step_size = 0.01 * (unit_x / unit_grad)

Check warning on line 220 in named_arrays/optimize.py

View check run for this annotation

Codecov / codecov/patch

named_arrays/optimize.py#L220

Added line #L220 was not covered by tests

if gradient is None:
def gradient(x: float | na.AbstractScalar | na.AbstractVectorArray):
Expand All @@ -209,6 +235,7 @@
function=function,
guess=guess,
step_size=step_size,
momentum=momentum,
gradient=gradient,
min_gradient=min_gradient,
max_iterations=max_iterations,
Expand Down
3 changes: 2 additions & 1 deletion named_arrays/tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1469,9 +1469,10 @@
function=function,
guess=array,
callback=callback,
momentum=0.5,
)

assert np.allclose(na.value(result), expected)
assert np.allclose(result, expected * na.unit_normalized(array))

Check warning on line 1475 in named_arrays/tests/test_core.py

View check run for this annotation

Codecov / codecov/patch

named_arrays/tests/test_core.py#L1475

Added line #L1475 was not covered by tests
assert out is result

@pytest.mark.parametrize(
Expand Down
Loading