diff --git a/optimization.py b/optimization.py index 1038bb5b8..b40348b38 100644 --- a/optimization.py +++ b/optimization.py @@ -137,7 +137,7 @@ def apply_gradients(self, grads_and_vars, global_step=None, name=None): # the correct way of using L2 regularization/weight decay with Adam, # since that will interact with the m and v parameters in strange ways. # - # Instead we want ot decay the weights in a manner that doesn't interact + # Instead we want to decay the weights in a manner that doesn't interact # with the m/v parameters. This is equivalent to adding the square # of the weights to the loss with plain (non-momentum) SGD. if self._do_use_weight_decay(param_name):