Merge pull request #314 from kozistr/feature/mars-optimizer

[Feature] Implement MARS optimizer
kozistr · Dec 21, 2024 · d16a368 · d16a368
2 parents 42b1d76 + e915efd
commit d16a368
Show file tree

Hide file tree

Showing 4 changed files with 13 additions and 3 deletions.
diff --git a/docs/changelogs/v3.3.1.md b/docs/changelogs/v3.3.1.md
@@ -8,7 +8,8 @@
     * [SGD-like Memory, AdamW-level Performance](https://arxiv.org/abs/2412.05270) 
 * Rename the `Apollo` (`An Adaptive Parameter-wise Diagonal Quasi-Newton Method for Nonconvex Stochastic Optimization`) optimizer name to `ApolloDQN` not to overlap with the new optimizer name `APOLLO`. (#312)
 * Implement `MARS` optimizer. (#313, #314)
-    * [Unleashing the Power of Variance Reduction for Training Large Models](https://arxiv.org/abs/2411.10438) 
+    * [Unleashing the Power of Variance Reduction for Training Large Models](https://arxiv.org/abs/2411.10438)
+* Support `Cautious` variant to `MARS` optimizer. (#314)
 
 ### Bug
 
@@ -17,7 +18,7 @@
 
 ### Docs
 
-* Add more visualizations. (#310)
+* Add more visualizations. (#310, #314)
 
 ### Contributions
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "pytorch_optimizer"
-version = "3.3.0"
+version = "3.3.1"
 description = "optimizer & lr scheduler & objective function collections in PyTorch"
 license = "Apache-2.0"
 authors = ["kozistr <kozistr@gmail.com>"]

diff --git a/tests/constants.py b/tests/constants.py
@@ -537,6 +537,7 @@
     (MARS, {'lr': 5e-1, 'lr_1d': 5e-1, 'weight_decay': 1e-3, 'mars_type': 'adamw'}, 5),
     (MARS, {'lr': 1e-1, 'weight_decay': 1e-3, 'mars_type': 'lion', 'optimize_1d': True}, 5),
     (MARS, {'lr': 5e-1, 'lr_1d': 5e-1, 'weight_decay': 1e-3, 'mars_type': 'shampoo'}, 5),
+    (MARS, {'lr': 5e-1, 'lr_1d': 5e-1, 'weight_decay': 1e-3, 'mars_type': 'adamw', 'ams_bound': True}, 5),
 ]
 ADANORM_SUPPORTED_OPTIMIZERS: List[Tuple[Any, Dict[str, Union[float, bool, int]], int]] = [
     (AdaBelief, {'lr': 5e-1, 'weight_decay': 1e-3, 'adanorm': True}, 10),

diff --git a/tests/test_optimizers.py b/tests/test_optimizers.py
@@ -812,3 +812,11 @@ def test_muon_rank(rank):
     model[2].weight.grad = torch.randn(1, 1, 1)
 
     optimizer.step()
+
+
+def test_mars_c_t_norm():
+    param = simple_parameter(True)
+    param.grad[0] = 100.0
+
+    optimizer = load_optimizer('mars')([param], optimize_1d=True)
+    optimizer.step()