-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
16 changed files
with
1,213 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
FROM hiroshiba/hiho-deep-docker-base:pytorch1.5.0-cuda9.0 | ||
|
||
WORKDIR /app | ||
|
||
# install requirements | ||
RUN pip install pytest mock typing-extensions filelock matplotlib torchvision==0.3.0 | ||
|
||
# add applications | ||
COPY pytorch_trainer /app/pytorch_trainer | ||
COPY tests /app/tests | ||
COPY examples /app/examples | ||
COPY setup.py /app/setup.py | ||
|
||
CMD bash |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
from __future__ import division | ||
|
||
import numpy | ||
|
||
from pytorch_trainer.training import extension | ||
|
||
|
||
class ExponentialShift(extension.Extension): | ||
|
||
"""Trainer extension to exponentially shift an optimizer attribute. | ||
This extension exponentially increases or decreases the specified attribute | ||
of the optimizer. The typical use case is an exponential decay of the | ||
learning rate. | ||
This extension is also called before the training loop starts by default. | ||
Args: | ||
attr (str): Name of the attribute to shift. | ||
rate (float): Rate of the exponential shift. This value is multiplied | ||
to the attribute at each call. | ||
init (float): Initial value of the attribute. If it is ``None``, the | ||
extension extracts the attribute at the first call and uses it as | ||
the initial value. | ||
target (float): Target value of the attribute. If the attribute reaches | ||
this value, the shift stops. | ||
optimizer (~chainer.Optimizer): Target optimizer to adjust the | ||
attribute. If it is ``None``, the main optimizer of the updater is | ||
used. | ||
""" | ||
|
||
def __init__(self, attr, rate, init=None, target=None, optimizer=None): | ||
self._attr = attr | ||
if rate < 0: | ||
raise ValueError('ExponentialShift does not support negative rate') | ||
self._rate = rate | ||
self._init = init | ||
self._target = target | ||
self._optimizer = optimizer | ||
self._t = 0 | ||
self._last_value = None | ||
|
||
def initialize(self, trainer): | ||
optimizer = self._get_optimizer(trainer) | ||
# ensure that _init is set | ||
if self._init is None: | ||
self._init = optimizer.param_groups[0][self._attr] | ||
|
||
if self._last_value is not None: # resuming from a snapshot | ||
self._update_value(optimizer, self._last_value) | ||
else: | ||
self._update_value(optimizer, self._init) | ||
|
||
def __call__(self, trainer): | ||
self._t += 1 | ||
|
||
optimizer = self._get_optimizer(trainer) | ||
value = self._init * (self._rate ** self._t) | ||
if self._target is not None: | ||
if self._rate > 1: | ||
# almost same as value = min(value, self._target), but this | ||
# line supports negative values, too | ||
if value / self._target > 1: | ||
value = self._target | ||
else: | ||
# ditto | ||
if value / self._target < 1: | ||
value = self._target | ||
self._update_value(optimizer, value) | ||
|
||
def state_dict(self): | ||
return {"t": self._t, "last_value": self._last_value} | ||
|
||
def load_state_dict(self, state_dict): | ||
self._t = state_dict["t"] | ||
self._last_value = state_dict["last_value"] | ||
if isinstance(self._last_value, numpy.ndarray): | ||
self._last_value = self._last_value.item() | ||
|
||
def _get_optimizer(self, trainer): | ||
return self._optimizer or trainer.updater.get_optimizer('main') | ||
|
||
def _update_value(self, optimizer, value): | ||
for param_group in optimizer.param_groups: | ||
param_group[self._attr] = value | ||
self._last_value = value |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
from __future__ import division | ||
|
||
import numpy | ||
|
||
from pytorch_trainer.training import extension | ||
|
||
|
||
class InverseShift(extension.Extension): | ||
|
||
"""Trainer extension to shift an optimizer attribute. | ||
The new value is computed according to the fomula below: | ||
new_attr = init_attr * (1 + gamma * iter) ^ (- power), which is compatible | ||
to the ``inv`` learning rate policy in Caffe. | ||
The typical use is to decrease the learning rate during the training. | ||
This extension is also called before the training loop starts by default. | ||
Args: | ||
attr (str): Name of the attribute to shift. | ||
gamma (float): Parameter used to compute the new value. Refer to the | ||
fomula above. Note that gamma is assumed to be nonegative. | ||
power (float): Parameter used to compute the new value. Refer to the | ||
fomula above. | ||
init (float): Initial value of the attribute. If it is ``None``, the | ||
extension extracts the attribute at the first call and uses it as | ||
the initial value. | ||
target (float): Target value of the attribute. If the attribute reaches | ||
this value, the shift stops. | ||
optimizer (~chainer.Optimizer): Target optimizer to adjust the | ||
attribute. If it is ``None``, the main optimizer of the updater is | ||
used. | ||
""" | ||
|
||
def __init__(self, attr, gamma, power, | ||
init=None, target=None, optimizer=None): | ||
self._attr = attr | ||
if gamma < 0: | ||
raise ValueError('InverseShift does not support negative gamma') | ||
self._gamma = gamma | ||
self._power = power | ||
self._init = init | ||
self._target = target | ||
self._optimizer = optimizer | ||
self._t = 0 | ||
self._last_value = None | ||
|
||
def initialize(self, trainer): | ||
optimizer = self._get_optimizer(trainer) | ||
# ensure that _init is set | ||
if self._init is None: | ||
self._init = optimizer.param_groups[0][self._attr] | ||
|
||
if self._last_value is not None: # resuming from a snapshot | ||
self._update_value(optimizer, self._last_value) | ||
else: | ||
self._update_value(optimizer, self._init) | ||
|
||
def __call__(self, trainer): | ||
self._t += 1 | ||
|
||
optimizer = self._get_optimizer(trainer) | ||
value = self._init * (1 + self._gamma * self._t) ** (-self._power) | ||
if self._target is not None: | ||
if self._power < 0: | ||
# almost same as value = min(value, self._target), but this | ||
# line supports negative values, too | ||
if value / self._target > 1: | ||
value = self._target | ||
else: | ||
# ditto | ||
if value / self._target < 1: | ||
value = self._target | ||
self._update_value(optimizer, value) | ||
|
||
def state_dict(self): | ||
return {"t": self._t, "last_value": self._last_value} | ||
|
||
def load_state_dict(self, state_dict): | ||
self._t = state_dict["t"] | ||
self._last_value = state_dict["last_value"] | ||
if isinstance(self._last_value, numpy.ndarray): | ||
self._last_value = self._last_value.item() | ||
|
||
def _get_optimizer(self, trainer): | ||
return self._optimizer or trainer.updater.get_optimizer('main') | ||
|
||
def _update_value(self, optimizer, value): | ||
for param_group in optimizer.param_groups: | ||
param_group[self._attr] = value | ||
self._last_value = value |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
from __future__ import division | ||
|
||
import numpy | ||
|
||
from pytorch_trainer.training import extension | ||
|
||
|
||
class LinearShift(extension.Extension): | ||
|
||
"""Trainer extension to change an optimizer attribute linearly. | ||
This extension changes an optimizer attribute from the first value to the | ||
last value linearly within a specified duration. The typical use case is | ||
warming up of the momentum coefficient. | ||
For example, suppose that this extension is called at every iteration, and | ||
``value_range == (x, y)`` and ``time_range == (i, j)``. Then, this | ||
extension keeps the attribute to be ``x`` up to the ``i``-th iteration, | ||
linearly shifts the value to ``y`` by the ``j``-th iteration, and then | ||
keeps the value to be ``y`` after the ``j``-th iteration. | ||
This extension is also called before the training loop starts by default. | ||
Args: | ||
attr (str): Name of the optimizer attribute to adjust. | ||
value_range (tuple of float): The first and the last values of the | ||
attribute. | ||
time_range (tuple of ints): The first and last counts of calls in which | ||
the attribute is adjusted. | ||
optimizer (~chainer.Optimizer): Target optimizer object. If it is None, | ||
the main optimizer of the trainer is used. | ||
""" | ||
|
||
def __init__(self, attr, value_range, time_range, optimizer=None): | ||
self._attr = attr | ||
self._value_range = value_range | ||
self._time_range = time_range | ||
self._optimizer = optimizer | ||
self._t = 0 | ||
self._last_value = None | ||
|
||
def initialize(self, trainer): | ||
optimizer = self._get_optimizer(trainer) | ||
if self._last_value is not None: | ||
value = self._last_value | ||
else: | ||
value = self._compute_next_value() | ||
self._update_value(optimizer, value) | ||
|
||
def __call__(self, trainer): | ||
self._t += 1 | ||
optimizer = self._get_optimizer(trainer) | ||
value = self._compute_next_value() | ||
self._update_value(optimizer, value) | ||
|
||
def state_dict(self): | ||
return {"t": self._t, "last_value": self._last_value} | ||
|
||
def load_state_dict(self, state_dict): | ||
self._t = state_dict["t"] | ||
self._last_value = state_dict["last_value"] | ||
if isinstance(self._last_value, numpy.ndarray): | ||
self._last_value = self._last_value.item() | ||
|
||
def _get_optimizer(self, trainer): | ||
return self._optimizer or trainer.updater.get_optimizer("main") | ||
|
||
def _compute_next_value(self): | ||
t1, t2 = self._time_range | ||
v1, v2 = self._value_range | ||
|
||
if self._t <= t1: | ||
return v1 | ||
elif self._t >= t2: | ||
return v2 | ||
rate = (self._t - t1) / (t2 - t1) | ||
return v1 + rate * (v2 - v1) | ||
|
||
def _update_value(self, optimizer, value): | ||
for param_group in optimizer.param_groups: | ||
param_group[self._attr] = value | ||
self._last_value = value |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
from __future__ import division | ||
|
||
from pytorch_trainer.training import extension | ||
|
||
|
||
class MultistepShift(extension.Extension): | ||
|
||
"""Trainer extension to shift an optimizer attribute in several steps. | ||
This extension changes an optimizer attribute in several steps, every step | ||
the attribute will multiply a factor ``gamma``. | ||
For example, suppose that this extension is called at every iteration, | ||
and ``init = x``, ``gamma = y``, ``step_value = [s1, s2, s3]``. | ||
Then during the iterations from 0 to (s1 - 1), the attr will be ``x``. | ||
During the iterations from s1 to (s2 - 1), the attr will be ``x * y``. | ||
During the iterations from s2 to (s3 - 1), the attr will be ``x * y * y``. | ||
During the iterations after s3, the attr will be ``x * y * y * y``. | ||
This extension is also called before the training loop starts by default. | ||
Args: | ||
attr (str): Name of the attribute to shift. | ||
init (float): Initial value of the attribute. If it is ``None``, the | ||
extension extracts the attribute at the first call and uses it as | ||
the initial value. | ||
gamma (float): The factor which the attr will mutiply at the beginning | ||
of each step. | ||
step_value (tuple): The first iterations of each step. | ||
optimizer (~chainer.Optimizer): Target optimizer to adjust the | ||
attribute. If it is ``None``, the main optimizer of the updater is | ||
used. | ||
""" | ||
|
||
def __init__(self, attr, gamma, step_value, init, optimizer=None): | ||
self._attr = attr | ||
self._gamma = gamma | ||
self._step_value = step_value | ||
self._init = init | ||
self._optimizer = optimizer | ||
self._stepvalue_size = len(step_value) | ||
self._current_step = 0 | ||
self._t = 0 | ||
|
||
def initialize(self, trainer): | ||
optimizer = self._optimizer or trainer.updater.get_optimizer('main') | ||
if self._init is None: | ||
self._init = optimizer.param_groups[0][self._attr] | ||
else: | ||
for param_group in optimizer.param_groups: | ||
param_group[self._attr] = self._init | ||
|
||
def __call__(self, trainer): | ||
self._t += 1 | ||
optimizer = self._optimizer or trainer.updater.get_optimizer('main') | ||
if (self._current_step < self._stepvalue_size and | ||
self._t >= self._step_value[self._current_step]): | ||
self._current_step += 1 | ||
value = self._init * pow(self._gamma, self._current_step) | ||
for param_group in optimizer.param_groups: | ||
param_group[self._attr] = value | ||
|
||
def state_dict(self): | ||
return {"t": self._t, "current_step": self._current_step} | ||
|
||
def load_state_dict(self, state_dict): | ||
self._t = state_dict["t"] | ||
self._current_step = state_dict["current_step"] |
Oops, something went wrong.