Update torch requirement from <1.6.0,>=1.5.0 to >=1.5.0,<1.7.0 (#4519)

* Update torch requirement from <1.6.0,>=1.5.0 to >=1.5.0,<1.7.0 Updates the requirements on [torch](https://github.com/pytorch/pytorch) to permit the latest version. - [Release notes](https://github.com/pytorch/pytorch/releases) - [Commits](pytorch/pytorch@v1.5.0...v1.6.0) Signed-off-by: dependabot-preview[bot] <support@dependabot.com> * fix tensor division * replace Apex with torch's native amp * add another note to CHANGELOG * use amp during validation as well * change test name Co-authored-by: dependabot-preview[bot] <27856297+dependabot-preview[bot]@users.noreply.github.com> Co-authored-by: epwalsh <epwalsh10@gmail.com> Co-authored-by: ai2-bulldozer[bot] <47044978+ai2-bulldozer[bot]@users.noreply.github.com>
allenai · Jul 31, 2020 · 9415350 · 9415350
1 parent 146bd9e
commit 9415350
Show file tree

Hide file tree

Showing 9 changed files with 91 additions and 147 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## Unreleased
 
+### Changed
+
+- Upgraded PyTorch requirement to 1.6.
+- Replaced the NVIDIA Apex AMP module with torch's native AMP module. The default trainer (`GradientDescentTrainer`)
+  now takes a `use_amp: bool` parameter instead of the old `opt_level: str` parameter.
+
 ### Fixed
 
 - Removed unnecessary warning about deadlocks in `DataLoader`.
@@ -19,6 +25,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Added the option to specify `requires_grad: false` within an optimizers parameter groups.
 - Added the `file-friendly-logging` flag back to the `train` command. Also added this flag to the `predict`, `evaluate`, and `find-learning-rate` commands.
 
+### Removed
+
+- Removed the `opt_level` parameter to `Model.load` and `load_archive`. In order to use AMP with a loaded
+  model now, just run the model's forward pass within torch's [`autocast`](https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.autocast)
+  context.
+
 ## [v1.1.0rc1](https://github.com/allenai/allennlp/releases/tag/v1.1.0rc1) - 2020-07-14
 
 ### Fixed

diff --git a/Makefile b/Makefile
@@ -88,13 +88,11 @@ install :
 	# Due to a weird thing with pip, we may need egg-info before running `pip install -e`.
 	# See https://github.com/pypa/pip/issues/4537.
 	python setup.py install_egg_info
-	# Install allennlp as editable and all dependencies except apex since that requires torch to already be installed.
-	grep -Ev 'NVIDIA/apex\.git' dev-requirements.txt | pip install --upgrade --upgrade-strategy eager -e . -r /dev/stdin
-	# The above command will probably install the typing backport because of pydoc-markdown,
+	# Install allennlp as editable and all dependencies.
+	pip install --upgrade --upgrade-strategy eager -e . -r dev-requirements.txt
+	# The above command might install the typing backport because of pydoc-markdown,
 	# so we have to uninstall it again.
 	pip uninstall -y typing
-	# Now install apex.
-	grep -E 'NVIDIA/apex\.git' dev-requirements.txt | pip install --upgrade -r /dev/stdin
 
 #
 # Documention helpers.

diff --git a/allennlp/models/archival.py b/allennlp/models/archival.py
@@ -130,11 +130,7 @@ def archive_model(
 
 
 def load_archive(
-    archive_file: str,
-    cuda_device: int = -1,
-    opt_level: str = None,
-    overrides: str = "",
-    weights_file: str = None,
+    archive_file: str, cuda_device: int = -1, overrides: str = "", weights_file: str = None,
 ) -> Archive:
     """
     Instantiates an Archive from an archived `tar.gz` file.
@@ -146,12 +142,6 @@ def load_archive(
     cuda_device : `int`, optional (default = `-1`)
         If `cuda_device` is >= 0, the model will be loaded onto the
         corresponding GPU. Otherwise it will be loaded onto the CPU.
-    opt_level : `str`, optional, (default = `None`)
-        Each `opt_level` establishes a set of properties that govern Amp’s implementation of pure or mixed
-        precision training. Must be a choice of `"O0"`, `"O1"`, `"O2"`, or `"O3"`.
-        See the Apex [documentation](https://nvidia.github.io/apex/amp.html#opt-levels-and-properties) for
-        more details. If `None`, defaults to the `opt_level` found in the model params. If `cuda_device==-1`,
-        Amp is not used and this argument is ignored.
     overrides : `str`, optional (default = `""`)
         JSON overrides to apply to the unarchived `Params` object.
     weights_file : `str`, optional (default = `None`)
@@ -196,7 +186,6 @@ def load_archive(
         weights_file=weights_path,
         serialization_dir=serialization_dir,
         cuda_device=cuda_device,
-        opt_level=opt_level,
     )
 
     return Archive(model=model, config=config)

diff --git a/allennlp/models/model.py b/allennlp/models/model.py
@@ -8,10 +8,6 @@
 from os import PathLike
 from typing import Dict, List, Set, Type, Optional, Union
 
-try:
-    from apex import amp
-except ImportError:
-    amp = None
 import numpy
 import torch
 
@@ -272,7 +268,6 @@ def _load(
         serialization_dir: Union[str, PathLike],
         weights_file: Optional[Union[str, PathLike]] = None,
         cuda_device: int = -1,
-        opt_level: Optional[str] = None,
     ) -> "Model":
         """
         Instantiates an already-trained model, based on the experiment
@@ -292,9 +287,6 @@ def _load(
 
         model_params = config.get("model")
 
-        training_params = config.get("trainer", Params({}))
-        opt_level = opt_level or training_params.get("opt_level")
-
         # The experiment config tells us how to _train_ a model, including where to get pre-trained
         # embeddings from.  We're now _loading_ the model, so those embeddings will already be
         # stored in our weights.  We don't need any pretrained weight file anymore, and we don't
@@ -309,30 +301,6 @@ def _load(
         else:
             model.cpu()
 
-        # If opt_level is not None (i.e. it exists in the loaded models params or was provided
-        # as argument to this method), call amp.initialize on the loaded model.
-        # Log a warning if amp is not installed or we are loading onto the cpu so that these
-        # cases do not pass silently.
-        if opt_level is not None:
-            if amp is None:
-                logger.warning(
-                    (
-                        f"Apex must be installed to enable mixed-precision via amp."
-                        f" Got opt_level is not None (opt_level={opt_level}) but Apex is not installed."
-                        " Any further training or inference will happen at full-precision."
-                    )
-                )
-            if cuda_device == -1:
-                logger.warning(
-                    (
-                        f"A CUDA device must be specified to enable mixed-precision via amp."
-                        f" Got cuda_device=={cuda_device} but opt_level is not None (opt_level={opt_level})."
-                        " Any further training or inference will happen at full-precision."
-                    )
-                )
-            if amp is not None and cuda_device >= 0:
-                model = amp.initialize(model, opt_level=opt_level)
-
         # If vocab+embedding extension was done, the model initialized from from_params
         # and one defined by state dict in weights_file might not have same embedding shapes.
         # Eg. when model embedder module was transferred along with vocab extension, the
@@ -353,7 +321,6 @@ def load(
         serialization_dir: Union[str, PathLike],
         weights_file: Optional[Union[str, PathLike]] = None,
         cuda_device: int = -1,
-        opt_level: Optional[str] = None,
     ) -> "Model":
         """
         Instantiates an already-trained model, based on the experiment
@@ -374,12 +341,6 @@ def load(
         cuda_device: `int = -1`
             By default we load the model on the CPU, but if you want to load it
             for GPU usage you can specify the id of your GPU here
-        opt_level : `str`, optional (default = `None`)
-            Each `opt_level` establishes a set of properties that govern Amp’s implementation of pure or mixed
-            precision training. Must be a choice of `"O0"`, `"O1"`, `"O2"`, or `"O3"`.
-            See the Apex [documentation](https://nvidia.github.io/apex/amp.html#opt-levels-and-properties) for
-            more details. If `None`, defaults to the `opt_level` found in the model params. If `cuda_device==-1`,
-            Amp is not used and this argument is ignored.
 
         # Returns
 
@@ -403,7 +364,7 @@ def load(
             # If we really need to change this, we would need to implement a recursive
             # get_model_class method, that recurses whenever it finds a from_archive model type.
             model_class = Model
-        return model_class._load(config, serialization_dir, weights_file, cuda_device, opt_level)
+        return model_class._load(config, serialization_dir, weights_file, cuda_device)
 
     def extend_embedder_vocab(self, embedding_sources_mapping: Dict[str, str] = None) -> None:
         """

diff --git a/allennlp/modules/token_embedders/pretrained_transformer_embedder.py b/allennlp/modules/token_embedders/pretrained_transformer_embedder.py
@@ -301,7 +301,7 @@ def lengths_to_mask(lengths, max_len, device):
 
         # The number of segment each sequence spans, excluding padding. Mimicking ceiling operation.
         # Shape: (batch_size,)
-        num_effective_segments = (seq_lengths + self._max_length - 1) / self._max_length
+        num_effective_segments = (seq_lengths + self._max_length - 1) // self._max_length
         # The number of indices that end tokens should shift back.
         num_removed_non_end_tokens = (
             num_effective_segments * self._num_added_tokens - self._num_added_end_tokens