diff --git a/docs/changelogs/v3.3.1.md b/docs/changelogs/v3.3.1.md index 805d0ebd..af2c0100 100644 --- a/docs/changelogs/v3.3.1.md +++ b/docs/changelogs/v3.3.1.md @@ -8,7 +8,8 @@ * [SGD-like Memory, AdamW-level Performance](https://arxiv.org/abs/2412.05270) * Rename the `Apollo` (`An Adaptive Parameter-wise Diagonal Quasi-Newton Method for Nonconvex Stochastic Optimization`) optimizer name to `ApolloDQN` not to overlap with the new optimizer name `APOLLO`. (#312) * Implement `MARS` optimizer. (#313, #314) - * [Unleashing the Power of Variance Reduction for Training Large Models](https://arxiv.org/abs/2411.10438) + * [Unleashing the Power of Variance Reduction for Training Large Models](https://arxiv.org/abs/2411.10438) +* Support `Cautious` variant to `MARS` optimizer. (#314) ### Bug @@ -17,7 +18,7 @@ ### Docs -* Add more visualizations. (#310) +* Add more visualizations. (#310, #314) ### Contributions diff --git a/pyproject.toml b/pyproject.toml index 5a50899a..3aac1a48 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "pytorch_optimizer" -version = "3.3.0" +version = "3.3.1" description = "optimizer & lr scheduler & objective function collections in PyTorch" license = "Apache-2.0" authors = ["kozistr "] diff --git a/tests/constants.py b/tests/constants.py index 0bc6673c..07a34bcd 100644 --- a/tests/constants.py +++ b/tests/constants.py @@ -537,6 +537,7 @@ (MARS, {'lr': 5e-1, 'lr_1d': 5e-1, 'weight_decay': 1e-3, 'mars_type': 'adamw'}, 5), (MARS, {'lr': 1e-1, 'weight_decay': 1e-3, 'mars_type': 'lion', 'optimize_1d': True}, 5), (MARS, {'lr': 5e-1, 'lr_1d': 5e-1, 'weight_decay': 1e-3, 'mars_type': 'shampoo'}, 5), + (MARS, {'lr': 5e-1, 'lr_1d': 5e-1, 'weight_decay': 1e-3, 'mars_type': 'adamw', 'ams_bound': True}, 5), ] ADANORM_SUPPORTED_OPTIMIZERS: List[Tuple[Any, Dict[str, Union[float, bool, int]], int]] = [ (AdaBelief, {'lr': 5e-1, 'weight_decay': 1e-3, 'adanorm': True}, 10), diff --git a/tests/test_optimizers.py b/tests/test_optimizers.py index 1671245f..ee182a6f 100644 --- a/tests/test_optimizers.py +++ b/tests/test_optimizers.py @@ -812,3 +812,11 @@ def test_muon_rank(rank): model[2].weight.grad = torch.randn(1, 1, 1) optimizer.step() + + +def test_mars_c_t_norm(): + param = simple_parameter(True) + param.grad[0] = 100.0 + + optimizer = load_optimizer('mars')([param], optimize_1d=True) + optimizer.step()