diff --git a/optimization.py b/optimization.py
index 1038bb5b8..b40348b38 100644
--- a/optimization.py
+++ b/optimization.py
@@ -137,7 +137,7 @@ def apply_gradients(self, grads_and_vars, global_step=None, name=None):
       # the correct way of using L2 regularization/weight decay with Adam,
       # since that will interact with the m and v parameters in strange ways.
       #
-      # Instead we want ot decay the weights in a manner that doesn't interact
+      # Instead we want to decay the weights in a manner that doesn't interact
       # with the m/v parameters. This is equivalent to adding the square
       # of the weights to the loss with plain (non-momentum) SGD.
       if self._do_use_weight_decay(param_name):