Skip to content
This repository has been archived by the owner on Dec 16, 2022. It is now read-only.

Commit

Permalink
Update torch requirement from <1.6.0,>=1.5.0 to >=1.5.0,<1.7.0 (#4519)
Browse files Browse the repository at this point in the history
* Update torch requirement from <1.6.0,>=1.5.0 to >=1.5.0,<1.7.0

Updates the requirements on [torch](https://github.com/pytorch/pytorch) to permit the latest version.
- [Release notes](https://github.com/pytorch/pytorch/releases)
- [Commits](pytorch/pytorch@v1.5.0...v1.6.0)

Signed-off-by: dependabot-preview[bot] <support@dependabot.com>

* fix tensor division

* replace Apex with torch's native amp

* add another note to CHANGELOG

* use amp during validation as well

* change test name

Co-authored-by: dependabot-preview[bot] <27856297+dependabot-preview[bot]@users.noreply.github.com>
Co-authored-by: epwalsh <epwalsh10@gmail.com>
Co-authored-by: ai2-bulldozer[bot] <47044978+ai2-bulldozer[bot]@users.noreply.github.com>
  • Loading branch information
3 people authored Jul 31, 2020
1 parent 146bd9e commit 9415350
Show file tree
Hide file tree
Showing 9 changed files with 91 additions and 147 deletions.
12 changes: 12 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## Unreleased

### Changed

- Upgraded PyTorch requirement to 1.6.
- Replaced the NVIDIA Apex AMP module with torch's native AMP module. The default trainer (`GradientDescentTrainer`)
now takes a `use_amp: bool` parameter instead of the old `opt_level: str` parameter.

### Fixed

- Removed unnecessary warning about deadlocks in `DataLoader`.
Expand All @@ -19,6 +25,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Added the option to specify `requires_grad: false` within an optimizers parameter groups.
- Added the `file-friendly-logging` flag back to the `train` command. Also added this flag to the `predict`, `evaluate`, and `find-learning-rate` commands.

### Removed

- Removed the `opt_level` parameter to `Model.load` and `load_archive`. In order to use AMP with a loaded
model now, just run the model's forward pass within torch's [`autocast`](https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.autocast)
context.

## [v1.1.0rc1](https://github.com/allenai/allennlp/releases/tag/v1.1.0rc1) - 2020-07-14

### Fixed
Expand Down
8 changes: 3 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -88,13 +88,11 @@ install :
# Due to a weird thing with pip, we may need egg-info before running `pip install -e`.
# See https://github.com/pypa/pip/issues/4537.
python setup.py install_egg_info
# Install allennlp as editable and all dependencies except apex since that requires torch to already be installed.
grep -Ev 'NVIDIA/apex\.git' dev-requirements.txt | pip install --upgrade --upgrade-strategy eager -e . -r /dev/stdin
# The above command will probably install the typing backport because of pydoc-markdown,
# Install allennlp as editable and all dependencies.
pip install --upgrade --upgrade-strategy eager -e . -r dev-requirements.txt
# The above command might install the typing backport because of pydoc-markdown,
# so we have to uninstall it again.
pip uninstall -y typing
# Now install apex.
grep -E 'NVIDIA/apex\.git' dev-requirements.txt | pip install --upgrade -r /dev/stdin

#
# Documention helpers.
Expand Down
13 changes: 1 addition & 12 deletions allennlp/models/archival.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,11 +130,7 @@ def archive_model(


def load_archive(
archive_file: str,
cuda_device: int = -1,
opt_level: str = None,
overrides: str = "",
weights_file: str = None,
archive_file: str, cuda_device: int = -1, overrides: str = "", weights_file: str = None,
) -> Archive:
"""
Instantiates an Archive from an archived `tar.gz` file.
Expand All @@ -146,12 +142,6 @@ def load_archive(
cuda_device : `int`, optional (default = `-1`)
If `cuda_device` is >= 0, the model will be loaded onto the
corresponding GPU. Otherwise it will be loaded onto the CPU.
opt_level : `str`, optional, (default = `None`)
Each `opt_level` establishes a set of properties that govern Amp’s implementation of pure or mixed
precision training. Must be a choice of `"O0"`, `"O1"`, `"O2"`, or `"O3"`.
See the Apex [documentation](https://nvidia.github.io/apex/amp.html#opt-levels-and-properties) for
more details. If `None`, defaults to the `opt_level` found in the model params. If `cuda_device==-1`,
Amp is not used and this argument is ignored.
overrides : `str`, optional (default = `""`)
JSON overrides to apply to the unarchived `Params` object.
weights_file : `str`, optional (default = `None`)
Expand Down Expand Up @@ -196,7 +186,6 @@ def load_archive(
weights_file=weights_path,
serialization_dir=serialization_dir,
cuda_device=cuda_device,
opt_level=opt_level,
)

return Archive(model=model, config=config)
Expand Down
41 changes: 1 addition & 40 deletions allennlp/models/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,6 @@
from os import PathLike
from typing import Dict, List, Set, Type, Optional, Union

try:
from apex import amp
except ImportError:
amp = None
import numpy
import torch

Expand Down Expand Up @@ -272,7 +268,6 @@ def _load(
serialization_dir: Union[str, PathLike],
weights_file: Optional[Union[str, PathLike]] = None,
cuda_device: int = -1,
opt_level: Optional[str] = None,
) -> "Model":
"""
Instantiates an already-trained model, based on the experiment
Expand All @@ -292,9 +287,6 @@ def _load(

model_params = config.get("model")

training_params = config.get("trainer", Params({}))
opt_level = opt_level or training_params.get("opt_level")

# The experiment config tells us how to _train_ a model, including where to get pre-trained
# embeddings from. We're now _loading_ the model, so those embeddings will already be
# stored in our weights. We don't need any pretrained weight file anymore, and we don't
Expand All @@ -309,30 +301,6 @@ def _load(
else:
model.cpu()

# If opt_level is not None (i.e. it exists in the loaded models params or was provided
# as argument to this method), call amp.initialize on the loaded model.
# Log a warning if amp is not installed or we are loading onto the cpu so that these
# cases do not pass silently.
if opt_level is not None:
if amp is None:
logger.warning(
(
f"Apex must be installed to enable mixed-precision via amp."
f" Got opt_level is not None (opt_level={opt_level}) but Apex is not installed."
" Any further training or inference will happen at full-precision."
)
)
if cuda_device == -1:
logger.warning(
(
f"A CUDA device must be specified to enable mixed-precision via amp."
f" Got cuda_device=={cuda_device} but opt_level is not None (opt_level={opt_level})."
" Any further training or inference will happen at full-precision."
)
)
if amp is not None and cuda_device >= 0:
model = amp.initialize(model, opt_level=opt_level)

# If vocab+embedding extension was done, the model initialized from from_params
# and one defined by state dict in weights_file might not have same embedding shapes.
# Eg. when model embedder module was transferred along with vocab extension, the
Expand All @@ -353,7 +321,6 @@ def load(
serialization_dir: Union[str, PathLike],
weights_file: Optional[Union[str, PathLike]] = None,
cuda_device: int = -1,
opt_level: Optional[str] = None,
) -> "Model":
"""
Instantiates an already-trained model, based on the experiment
Expand All @@ -374,12 +341,6 @@ def load(
cuda_device: `int = -1`
By default we load the model on the CPU, but if you want to load it
for GPU usage you can specify the id of your GPU here
opt_level : `str`, optional (default = `None`)
Each `opt_level` establishes a set of properties that govern Amp’s implementation of pure or mixed
precision training. Must be a choice of `"O0"`, `"O1"`, `"O2"`, or `"O3"`.
See the Apex [documentation](https://nvidia.github.io/apex/amp.html#opt-levels-and-properties) for
more details. If `None`, defaults to the `opt_level` found in the model params. If `cuda_device==-1`,
Amp is not used and this argument is ignored.
# Returns
Expand All @@ -403,7 +364,7 @@ def load(
# If we really need to change this, we would need to implement a recursive
# get_model_class method, that recurses whenever it finds a from_archive model type.
model_class = Model
return model_class._load(config, serialization_dir, weights_file, cuda_device, opt_level)
return model_class._load(config, serialization_dir, weights_file, cuda_device)

def extend_embedder_vocab(self, embedding_sources_mapping: Dict[str, str] = None) -> None:
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ def lengths_to_mask(lengths, max_len, device):

# The number of segment each sequence spans, excluding padding. Mimicking ceiling operation.
# Shape: (batch_size,)
num_effective_segments = (seq_lengths + self._max_length - 1) / self._max_length
num_effective_segments = (seq_lengths + self._max_length - 1) // self._max_length
# The number of indices that end tokens should shift back.
num_removed_non_end_tokens = (
num_effective_segments * self._num_added_tokens - self._num_added_end_tokens
Expand Down
Loading

0 comments on commit 9415350

Please sign in to comment.