Skip to content

Commit

Permalink
add schedulers
Browse files Browse the repository at this point in the history
  • Loading branch information
Hiroshiba committed May 23, 2020
1 parent 442058b commit 75a7e90
Show file tree
Hide file tree
Showing 16 changed files with 1,213 additions and 0 deletions.
14 changes: 14 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
FROM hiroshiba/hiho-deep-docker-base:pytorch1.5.0-cuda9.0

WORKDIR /app

# install requirements
RUN pip install pytest mock typing-extensions filelock matplotlib torchvision==0.3.0

# add applications
COPY pytorch_trainer /app/pytorch_trainer
COPY tests /app/tests
COPY examples /app/examples
COPY setup.py /app/setup.py

CMD bash
7 changes: 7 additions & 0 deletions pytorch_trainer/training/extensions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,18 @@
from pytorch_trainer.training.extensions._snapshot import snapshot # NOQA
from pytorch_trainer.training.extensions._snapshot import snapshot_object # NOQA
from pytorch_trainer.training.extensions.evaluator import Evaluator # NOQA
from pytorch_trainer.training.extensions.exponential_shift import ExponentialShift # NOQA
from pytorch_trainer.training.extensions.fail_on_nonnumber import FailOnNonNumber # NOQA
from pytorch_trainer.training.extensions.inverse_shift import InverseShift # NOQA
from pytorch_trainer.training.extensions.linear_shift import LinearShift # NOQA
from pytorch_trainer.training.extensions.log_report import LogReport # NOQA
from pytorch_trainer.training.extensions.micro_average import MicroAverage # NOQA
from pytorch_trainer.training.extensions.multistep_shift import MultistepShift # NOQA
from pytorch_trainer.training.extensions.plot_report import PlotReport # NOQA
from pytorch_trainer.training.extensions.polynomial_shift import PolynomialShift # NOQA
from pytorch_trainer.training.extensions.print_report import PrintReport # NOQA
from pytorch_trainer.training.extensions.progress_bar import ProgressBar # NOQA
from pytorch_trainer.training.extensions.step_shift import StepShift # NOQA
from pytorch_trainer.training.extensions.value_observation import observe_lr # NOQA
from pytorch_trainer.training.extensions.value_observation import observe_value # NOQA
from pytorch_trainer.training.extensions.warmup_shift import WarmupShift # NOQA
87 changes: 87 additions & 0 deletions pytorch_trainer/training/extensions/exponential_shift.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
from __future__ import division

import numpy

from pytorch_trainer.training import extension


class ExponentialShift(extension.Extension):

"""Trainer extension to exponentially shift an optimizer attribute.
This extension exponentially increases or decreases the specified attribute
of the optimizer. The typical use case is an exponential decay of the
learning rate.
This extension is also called before the training loop starts by default.
Args:
attr (str): Name of the attribute to shift.
rate (float): Rate of the exponential shift. This value is multiplied
to the attribute at each call.
init (float): Initial value of the attribute. If it is ``None``, the
extension extracts the attribute at the first call and uses it as
the initial value.
target (float): Target value of the attribute. If the attribute reaches
this value, the shift stops.
optimizer (~chainer.Optimizer): Target optimizer to adjust the
attribute. If it is ``None``, the main optimizer of the updater is
used.
"""

def __init__(self, attr, rate, init=None, target=None, optimizer=None):
self._attr = attr
if rate < 0:
raise ValueError('ExponentialShift does not support negative rate')
self._rate = rate
self._init = init
self._target = target
self._optimizer = optimizer
self._t = 0
self._last_value = None

def initialize(self, trainer):
optimizer = self._get_optimizer(trainer)
# ensure that _init is set
if self._init is None:
self._init = optimizer.param_groups[0][self._attr]

if self._last_value is not None: # resuming from a snapshot
self._update_value(optimizer, self._last_value)
else:
self._update_value(optimizer, self._init)

def __call__(self, trainer):
self._t += 1

optimizer = self._get_optimizer(trainer)
value = self._init * (self._rate ** self._t)
if self._target is not None:
if self._rate > 1:
# almost same as value = min(value, self._target), but this
# line supports negative values, too
if value / self._target > 1:
value = self._target
else:
# ditto
if value / self._target < 1:
value = self._target
self._update_value(optimizer, value)

def state_dict(self):
return {"t": self._t, "last_value": self._last_value}

def load_state_dict(self, state_dict):
self._t = state_dict["t"]
self._last_value = state_dict["last_value"]
if isinstance(self._last_value, numpy.ndarray):
self._last_value = self._last_value.item()

def _get_optimizer(self, trainer):
return self._optimizer or trainer.updater.get_optimizer('main')

def _update_value(self, optimizer, value):
for param_group in optimizer.param_groups:
param_group[self._attr] = value
self._last_value = value
92 changes: 92 additions & 0 deletions pytorch_trainer/training/extensions/inverse_shift.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
from __future__ import division

import numpy

from pytorch_trainer.training import extension


class InverseShift(extension.Extension):

"""Trainer extension to shift an optimizer attribute.
The new value is computed according to the fomula below:
new_attr = init_attr * (1 + gamma * iter) ^ (- power), which is compatible
to the ``inv`` learning rate policy in Caffe.
The typical use is to decrease the learning rate during the training.
This extension is also called before the training loop starts by default.
Args:
attr (str): Name of the attribute to shift.
gamma (float): Parameter used to compute the new value. Refer to the
fomula above. Note that gamma is assumed to be nonegative.
power (float): Parameter used to compute the new value. Refer to the
fomula above.
init (float): Initial value of the attribute. If it is ``None``, the
extension extracts the attribute at the first call and uses it as
the initial value.
target (float): Target value of the attribute. If the attribute reaches
this value, the shift stops.
optimizer (~chainer.Optimizer): Target optimizer to adjust the
attribute. If it is ``None``, the main optimizer of the updater is
used.
"""

def __init__(self, attr, gamma, power,
init=None, target=None, optimizer=None):
self._attr = attr
if gamma < 0:
raise ValueError('InverseShift does not support negative gamma')
self._gamma = gamma
self._power = power
self._init = init
self._target = target
self._optimizer = optimizer
self._t = 0
self._last_value = None

def initialize(self, trainer):
optimizer = self._get_optimizer(trainer)
# ensure that _init is set
if self._init is None:
self._init = optimizer.param_groups[0][self._attr]

if self._last_value is not None: # resuming from a snapshot
self._update_value(optimizer, self._last_value)
else:
self._update_value(optimizer, self._init)

def __call__(self, trainer):
self._t += 1

optimizer = self._get_optimizer(trainer)
value = self._init * (1 + self._gamma * self._t) ** (-self._power)
if self._target is not None:
if self._power < 0:
# almost same as value = min(value, self._target), but this
# line supports negative values, too
if value / self._target > 1:
value = self._target
else:
# ditto
if value / self._target < 1:
value = self._target
self._update_value(optimizer, value)

def state_dict(self):
return {"t": self._t, "last_value": self._last_value}

def load_state_dict(self, state_dict):
self._t = state_dict["t"]
self._last_value = state_dict["last_value"]
if isinstance(self._last_value, numpy.ndarray):
self._last_value = self._last_value.item()

def _get_optimizer(self, trainer):
return self._optimizer or trainer.updater.get_optimizer('main')

def _update_value(self, optimizer, value):
for param_group in optimizer.param_groups:
param_group[self._attr] = value
self._last_value = value
83 changes: 83 additions & 0 deletions pytorch_trainer/training/extensions/linear_shift.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
from __future__ import division

import numpy

from pytorch_trainer.training import extension


class LinearShift(extension.Extension):

"""Trainer extension to change an optimizer attribute linearly.
This extension changes an optimizer attribute from the first value to the
last value linearly within a specified duration. The typical use case is
warming up of the momentum coefficient.
For example, suppose that this extension is called at every iteration, and
``value_range == (x, y)`` and ``time_range == (i, j)``. Then, this
extension keeps the attribute to be ``x`` up to the ``i``-th iteration,
linearly shifts the value to ``y`` by the ``j``-th iteration, and then
keeps the value to be ``y`` after the ``j``-th iteration.
This extension is also called before the training loop starts by default.
Args:
attr (str): Name of the optimizer attribute to adjust.
value_range (tuple of float): The first and the last values of the
attribute.
time_range (tuple of ints): The first and last counts of calls in which
the attribute is adjusted.
optimizer (~chainer.Optimizer): Target optimizer object. If it is None,
the main optimizer of the trainer is used.
"""

def __init__(self, attr, value_range, time_range, optimizer=None):
self._attr = attr
self._value_range = value_range
self._time_range = time_range
self._optimizer = optimizer
self._t = 0
self._last_value = None

def initialize(self, trainer):
optimizer = self._get_optimizer(trainer)
if self._last_value is not None:
value = self._last_value
else:
value = self._compute_next_value()
self._update_value(optimizer, value)

def __call__(self, trainer):
self._t += 1
optimizer = self._get_optimizer(trainer)
value = self._compute_next_value()
self._update_value(optimizer, value)

def state_dict(self):
return {"t": self._t, "last_value": self._last_value}

def load_state_dict(self, state_dict):
self._t = state_dict["t"]
self._last_value = state_dict["last_value"]
if isinstance(self._last_value, numpy.ndarray):
self._last_value = self._last_value.item()

def _get_optimizer(self, trainer):
return self._optimizer or trainer.updater.get_optimizer("main")

def _compute_next_value(self):
t1, t2 = self._time_range
v1, v2 = self._value_range

if self._t <= t1:
return v1
elif self._t >= t2:
return v2
rate = (self._t - t1) / (t2 - t1)
return v1 + rate * (v2 - v1)

def _update_value(self, optimizer, value):
for param_group in optimizer.param_groups:
param_group[self._attr] = value
self._last_value = value
69 changes: 69 additions & 0 deletions pytorch_trainer/training/extensions/multistep_shift.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
from __future__ import division

from pytorch_trainer.training import extension


class MultistepShift(extension.Extension):

"""Trainer extension to shift an optimizer attribute in several steps.
This extension changes an optimizer attribute in several steps, every step
the attribute will multiply a factor ``gamma``.
For example, suppose that this extension is called at every iteration,
and ``init = x``, ``gamma = y``, ``step_value = [s1, s2, s3]``.
Then during the iterations from 0 to (s1 - 1), the attr will be ``x``.
During the iterations from s1 to (s2 - 1), the attr will be ``x * y``.
During the iterations from s2 to (s3 - 1), the attr will be ``x * y * y``.
During the iterations after s3, the attr will be ``x * y * y * y``.
This extension is also called before the training loop starts by default.
Args:
attr (str): Name of the attribute to shift.
init (float): Initial value of the attribute. If it is ``None``, the
extension extracts the attribute at the first call and uses it as
the initial value.
gamma (float): The factor which the attr will mutiply at the beginning
of each step.
step_value (tuple): The first iterations of each step.
optimizer (~chainer.Optimizer): Target optimizer to adjust the
attribute. If it is ``None``, the main optimizer of the updater is
used.
"""

def __init__(self, attr, gamma, step_value, init, optimizer=None):
self._attr = attr
self._gamma = gamma
self._step_value = step_value
self._init = init
self._optimizer = optimizer
self._stepvalue_size = len(step_value)
self._current_step = 0
self._t = 0

def initialize(self, trainer):
optimizer = self._optimizer or trainer.updater.get_optimizer('main')
if self._init is None:
self._init = optimizer.param_groups[0][self._attr]
else:
for param_group in optimizer.param_groups:
param_group[self._attr] = self._init

def __call__(self, trainer):
self._t += 1
optimizer = self._optimizer or trainer.updater.get_optimizer('main')
if (self._current_step < self._stepvalue_size and
self._t >= self._step_value[self._current_step]):
self._current_step += 1
value = self._init * pow(self._gamma, self._current_step)
for param_group in optimizer.param_groups:
param_group[self._attr] = value

def state_dict(self):
return {"t": self._t, "current_step": self._current_step}

def load_state_dict(self, state_dict):
self._t = state_dict["t"]
self._current_step = state_dict["current_step"]
Loading

0 comments on commit 75a7e90

Please sign in to comment.