add schedulers

Hiroshiba · May 23, 2020 · 75a7e90 · 75a7e90
1 parent 442058b
commit 75a7e90
Show file tree

Hide file tree

Showing 16 changed files with 1,213 additions and 0 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,14 @@
+FROM hiroshiba/hiho-deep-docker-base:pytorch1.5.0-cuda9.0
+
+WORKDIR /app
+
+# install requirements
+RUN pip install pytest mock typing-extensions filelock matplotlib torchvision==0.3.0
+
+# add applications
+COPY pytorch_trainer /app/pytorch_trainer
+COPY tests /app/tests
+COPY examples /app/examples
+COPY setup.py /app/setup.py
+
+CMD bash
diff --git a/pytorch_trainer/training/extensions/__init__.py b/pytorch_trainer/training/extensions/__init__.py
@@ -2,11 +2,18 @@
 from pytorch_trainer.training.extensions._snapshot import snapshot  # NOQA
 from pytorch_trainer.training.extensions._snapshot import snapshot_object  # NOQA
 from pytorch_trainer.training.extensions.evaluator import Evaluator  # NOQA
+from pytorch_trainer.training.extensions.exponential_shift import ExponentialShift  # NOQA
 from pytorch_trainer.training.extensions.fail_on_nonnumber import FailOnNonNumber  # NOQA
+from pytorch_trainer.training.extensions.inverse_shift import InverseShift  # NOQA
+from pytorch_trainer.training.extensions.linear_shift import LinearShift  # NOQA
 from pytorch_trainer.training.extensions.log_report import LogReport  # NOQA
 from pytorch_trainer.training.extensions.micro_average import MicroAverage  # NOQA
+from pytorch_trainer.training.extensions.multistep_shift import MultistepShift  # NOQA
 from pytorch_trainer.training.extensions.plot_report import PlotReport  # NOQA
+from pytorch_trainer.training.extensions.polynomial_shift import PolynomialShift  # NOQA
 from pytorch_trainer.training.extensions.print_report import PrintReport  # NOQA
 from pytorch_trainer.training.extensions.progress_bar import ProgressBar  # NOQA
+from pytorch_trainer.training.extensions.step_shift import StepShift  # NOQA
 from pytorch_trainer.training.extensions.value_observation import observe_lr  # NOQA
 from pytorch_trainer.training.extensions.value_observation import observe_value  # NOQA
+from pytorch_trainer.training.extensions.warmup_shift import WarmupShift  # NOQA
diff --git a/pytorch_trainer/training/extensions/exponential_shift.py b/pytorch_trainer/training/extensions/exponential_shift.py
@@ -0,0 +1,87 @@
+from __future__ import division
+
+import numpy
+
+from pytorch_trainer.training import extension
+
+
+class ExponentialShift(extension.Extension):
+
+    """Trainer extension to exponentially shift an optimizer attribute.
+
+    This extension exponentially increases or decreases the specified attribute
+    of the optimizer. The typical use case is an exponential decay of the
+    learning rate.
+
+    This extension is also called before the training loop starts by default.
+
+    Args:
+        attr (str): Name of the attribute to shift.
+        rate (float): Rate of the exponential shift. This value is multiplied
+            to the attribute at each call.
+        init (float): Initial value of the attribute. If it is ``None``, the
+            extension extracts the attribute at the first call and uses it as
+            the initial value.
+        target (float): Target value of the attribute. If the attribute reaches
+            this value, the shift stops.
+        optimizer (~chainer.Optimizer): Target optimizer to adjust the
+            attribute. If it is ``None``, the main optimizer of the updater is
+            used.
+
+    """
+
+    def __init__(self, attr, rate, init=None, target=None, optimizer=None):
+        self._attr = attr
+        if rate < 0:
+            raise ValueError('ExponentialShift does not support negative rate')
+        self._rate = rate
+        self._init = init
+        self._target = target
+        self._optimizer = optimizer
+        self._t = 0
+        self._last_value = None
+
+    def initialize(self, trainer):
+        optimizer = self._get_optimizer(trainer)
+        # ensure that _init is set
+        if self._init is None:
+            self._init = optimizer.param_groups[0][self._attr]
+
+        if self._last_value is not None:  # resuming from a snapshot
+            self._update_value(optimizer, self._last_value)
+        else:
+            self._update_value(optimizer, self._init)
+
+    def __call__(self, trainer):
+        self._t += 1
+
+        optimizer = self._get_optimizer(trainer)
+        value = self._init * (self._rate ** self._t)
+        if self._target is not None:
+            if self._rate > 1:
+                # almost same as value = min(value, self._target), but this
+                # line supports negative values, too
+                if value / self._target > 1:
+                    value = self._target
+            else:
+                # ditto
+                if value / self._target < 1:
+                    value = self._target
+        self._update_value(optimizer, value)
+
+    def state_dict(self):
+        return {"t": self._t, "last_value": self._last_value}
+
+    def load_state_dict(self, state_dict):
+        self._t = state_dict["t"]
+        self._last_value = state_dict["last_value"]
+        if isinstance(self._last_value, numpy.ndarray):
+            self._last_value = self._last_value.item()
+
+    def _get_optimizer(self, trainer):
+        return self._optimizer or trainer.updater.get_optimizer('main')
+
+    def _update_value(self, optimizer, value):
+        for param_group in optimizer.param_groups:
+            param_group[self._attr] = value
+        self._last_value = value
diff --git a/pytorch_trainer/training/extensions/inverse_shift.py b/pytorch_trainer/training/extensions/inverse_shift.py
@@ -0,0 +1,92 @@
+from __future__ import division
+
+import numpy
+
+from pytorch_trainer.training import extension
+
+
+class InverseShift(extension.Extension):
+
+    """Trainer extension to shift an optimizer attribute.
+
+    The new value is computed according to the fomula below:
+    new_attr = init_attr * (1 + gamma * iter) ^ (- power), which is compatible
+    to the ``inv`` learning rate policy in Caffe.
+
+    The typical use is to decrease the learning rate during the training.
+
+    This extension is also called before the training loop starts by default.
+
+    Args:
+        attr (str): Name of the attribute to shift.
+        gamma (float): Parameter used to compute the new value. Refer to the
+            fomula above. Note that gamma is assumed to be nonegative.
+        power (float): Parameter used to compute the new value. Refer to the
+            fomula above.
+        init (float): Initial value of the attribute. If it is ``None``, the
+            extension extracts the attribute at the first call and uses it as
+            the initial value.
+        target (float): Target value of the attribute. If the attribute reaches
+            this value, the shift stops.
+        optimizer (~chainer.Optimizer): Target optimizer to adjust the
+            attribute. If it is ``None``, the main optimizer of the updater is
+            used.
+    """
+
+    def __init__(self, attr, gamma, power,
+                 init=None, target=None, optimizer=None):
+        self._attr = attr
+        if gamma < 0:
+            raise ValueError('InverseShift does not support negative gamma')
+        self._gamma = gamma
+        self._power = power
+        self._init = init
+        self._target = target
+        self._optimizer = optimizer
+        self._t = 0
+        self._last_value = None
+
+    def initialize(self, trainer):
+        optimizer = self._get_optimizer(trainer)
+        # ensure that _init is set
+        if self._init is None:
+            self._init = optimizer.param_groups[0][self._attr]
+
+        if self._last_value is not None:  # resuming from a snapshot
+            self._update_value(optimizer, self._last_value)
+        else:
+            self._update_value(optimizer, self._init)
+
+    def __call__(self, trainer):
+        self._t += 1
+
+        optimizer = self._get_optimizer(trainer)
+        value = self._init * (1 + self._gamma * self._t) ** (-self._power)
+        if self._target is not None:
+            if self._power < 0:
+                # almost same as value = min(value, self._target), but this
+                # line supports negative values, too
+                if value / self._target > 1:
+                    value = self._target
+            else:
+                # ditto
+                if value / self._target < 1:
+                    value = self._target
+        self._update_value(optimizer, value)
+
+    def state_dict(self):
+        return {"t": self._t, "last_value": self._last_value}
+
+    def load_state_dict(self, state_dict):
+        self._t = state_dict["t"]
+        self._last_value = state_dict["last_value"]
+        if isinstance(self._last_value, numpy.ndarray):
+            self._last_value = self._last_value.item()
+
+    def _get_optimizer(self, trainer):
+        return self._optimizer or trainer.updater.get_optimizer('main')
+
+    def _update_value(self, optimizer, value):
+        for param_group in optimizer.param_groups:
+            param_group[self._attr] = value
+        self._last_value = value
diff --git a/pytorch_trainer/training/extensions/linear_shift.py b/pytorch_trainer/training/extensions/linear_shift.py
@@ -0,0 +1,83 @@
+from __future__ import division
+
+import numpy
+
+from pytorch_trainer.training import extension
+
+
+class LinearShift(extension.Extension):
+
+    """Trainer extension to change an optimizer attribute linearly.
+
+    This extension changes an optimizer attribute from the first value to the
+    last value linearly within a specified duration. The typical use case is
+    warming up of the momentum coefficient.
+
+    For example, suppose that this extension is called at every iteration, and
+    ``value_range == (x, y)`` and ``time_range == (i, j)``. Then, this
+    extension keeps the attribute to be ``x`` up to the ``i``-th iteration,
+    linearly shifts the value to ``y`` by the ``j``-th iteration, and then
+    keeps the value to be ``y`` after the ``j``-th iteration.
+
+    This extension is also called before the training loop starts by default.
+
+    Args:
+        attr (str): Name of the optimizer attribute to adjust.
+        value_range (tuple of float): The first and the last values of the
+            attribute.
+        time_range (tuple of ints): The first and last counts of calls in which
+            the attribute is adjusted.
+        optimizer (~chainer.Optimizer): Target optimizer object. If it is None,
+            the main optimizer of the trainer is used.
+
+    """
+
+    def __init__(self, attr, value_range, time_range, optimizer=None):
+        self._attr = attr
+        self._value_range = value_range
+        self._time_range = time_range
+        self._optimizer = optimizer
+        self._t = 0
+        self._last_value = None
+
+    def initialize(self, trainer):
+        optimizer = self._get_optimizer(trainer)
+        if self._last_value is not None:
+            value = self._last_value
+        else:
+            value = self._compute_next_value()
+        self._update_value(optimizer, value)
+
+    def __call__(self, trainer):
+        self._t += 1
+        optimizer = self._get_optimizer(trainer)
+        value = self._compute_next_value()
+        self._update_value(optimizer, value)
+
+    def state_dict(self):
+        return {"t": self._t, "last_value": self._last_value}
+
+    def load_state_dict(self, state_dict):
+        self._t = state_dict["t"]
+        self._last_value = state_dict["last_value"]
+        if isinstance(self._last_value, numpy.ndarray):
+            self._last_value = self._last_value.item()
+
+    def _get_optimizer(self, trainer):
+        return self._optimizer or trainer.updater.get_optimizer("main")
+
+    def _compute_next_value(self):
+        t1, t2 = self._time_range
+        v1, v2 = self._value_range
+
+        if self._t <= t1:
+            return v1
+        elif self._t >= t2:
+            return v2
+        rate = (self._t - t1) / (t2 - t1)
+        return v1 + rate * (v2 - v1)
+
+    def _update_value(self, optimizer, value):
+        for param_group in optimizer.param_groups:
+            param_group[self._attr] = value
+        self._last_value = value
diff --git a/pytorch_trainer/training/extensions/multistep_shift.py b/pytorch_trainer/training/extensions/multistep_shift.py
@@ -0,0 +1,69 @@
+from __future__ import division
+
+from pytorch_trainer.training import extension
+
+
+class MultistepShift(extension.Extension):
+
+    """Trainer extension to shift an optimizer attribute in several steps.
+
+    This extension changes an optimizer attribute in several steps, every step
+    the attribute will multiply a factor ``gamma``.
+
+    For example, suppose that this extension is called at every iteration,
+    and ``init = x``, ``gamma = y``, ``step_value = [s1, s2, s3]``.
+    Then during the iterations from 0 to (s1 - 1), the attr will be ``x``.
+    During the iterations from s1 to (s2 - 1), the attr will be ``x * y``.
+    During the iterations from s2 to (s3 - 1), the attr will be ``x * y * y``.
+    During the iterations after s3, the attr will be ``x * y * y * y``.
+
+    This extension is also called before the training loop starts by default.
+
+    Args:
+        attr (str): Name of the attribute to shift.
+        init (float): Initial value of the attribute. If it is ``None``, the
+            extension extracts the attribute at the first call and uses it as
+            the initial value.
+        gamma (float): The factor which the attr will mutiply at the beginning
+            of each step.
+        step_value (tuple): The first iterations of each step.
+        optimizer (~chainer.Optimizer): Target optimizer to adjust the
+            attribute. If it is ``None``, the main optimizer of the updater is
+            used.
+
+    """
+
+    def __init__(self, attr, gamma, step_value, init, optimizer=None):
+        self._attr = attr
+        self._gamma = gamma
+        self._step_value = step_value
+        self._init = init
+        self._optimizer = optimizer
+        self._stepvalue_size = len(step_value)
+        self._current_step = 0
+        self._t = 0
+
+    def initialize(self, trainer):
+        optimizer = self._optimizer or trainer.updater.get_optimizer('main')
+        if self._init is None:
+            self._init = optimizer.param_groups[0][self._attr]
+        else:
+            for param_group in optimizer.param_groups:
+                param_group[self._attr] = self._init
+
+    def __call__(self, trainer):
+        self._t += 1
+        optimizer = self._optimizer or trainer.updater.get_optimizer('main')
+        if (self._current_step < self._stepvalue_size and
+                self._t >= self._step_value[self._current_step]):
+            self._current_step += 1
+        value = self._init * pow(self._gamma, self._current_step)
+        for param_group in optimizer.param_groups:
+            param_group[self._attr] = value
+
+    def state_dict(self):
+        return {"t": self._t, "current_step": self._current_step}
+
+    def load_state_dict(self, state_dict):
+        self._t = state_dict["t"]
+        self._current_step = state_dict["current_step"]