From a3d691c3c2badce2a8e5d540538af2c7fed7811d Mon Sep 17 00:00:00 2001
From: Tudor Gulin <gulintudor@gmail.com>
Date: Mon, 9 Dec 2024 15:58:35 +0200
Subject: [PATCH] Fix deprecated statement (#3307)

* fix-deprecated-warning

Replaced torch.cuda.amp.autocast with torch.amp.autocast("cuda",...).

* autopep8 fix

* Update torch version to 1.12.0

* Address PR comments

* Revert unwanted changes

* Fix regex

* Revert change in CycleGAN_with_torch_cuda_amp

* Fix regex in test_create_supervised

* Update ignite/engine/__init__.py

* Update tests/ignite/engine/test_create_supervised.py

---------

Co-authored-by: Gulin7 <Gulin7@users.noreply.github.com>
Co-authored-by: vfdev <vfdev.5@gmail.com>
---
 examples/cifar10/main.py                      |  7 +++--
 .../benchmark_torch_cuda_amp.py               |  5 ++--
 examples/cifar10_qat/main.py                  |  5 ++--
 .../CycleGAN_with_torch_cuda_amp.ipynb        |  5 ++--
 .../classification/imagenet/main.py           |  9 +++---
 .../segmentation/pascal_voc2012/main.py       |  9 +++---
 examples/transformers/main.py                 |  7 +++--
 ignite/engine/__init__.py                     | 12 ++++----
 tests/ignite/engine/test_create_supervised.py | 30 +++++++++----------
 9 files changed, 48 insertions(+), 41 deletions(-)

diff --git a/examples/cifar10/main.py b/examples/cifar10/main.py
index b64b81c1d036..b8dbce5d9601 100644
--- a/examples/cifar10/main.py
+++ b/examples/cifar10/main.py
@@ -7,7 +7,8 @@
 import torch.nn as nn
 import torch.optim as optim
 import utils
-from torch.cuda.amp import autocast, GradScaler
+from torch.amp import autocast
+from torch.cuda.amp import GradScaler
 
 import ignite
 import ignite.distributed as idist
@@ -299,7 +300,7 @@ def train_step(engine, batch):
 
         model.train()
 
-        with autocast(enabled=with_amp):
+        with autocast("cuda", enabled=with_amp):
             y_pred = model(x)
             loss = criterion(y_pred, y)
 
@@ -355,7 +356,7 @@ def evaluate_step(engine: Engine, batch):
             x = x.to(device, non_blocking=True)
             y = y.to(device, non_blocking=True)
 
-        with autocast(enabled=with_amp):
+        with autocast("cuda", enabled=with_amp):
             output = model(x)
         return output, y
 
diff --git a/examples/cifar100_amp_benchmark/benchmark_torch_cuda_amp.py b/examples/cifar100_amp_benchmark/benchmark_torch_cuda_amp.py
index 992f305bf24a..746d7eb54c49 100644
--- a/examples/cifar100_amp_benchmark/benchmark_torch_cuda_amp.py
+++ b/examples/cifar100_amp_benchmark/benchmark_torch_cuda_amp.py
@@ -1,6 +1,7 @@
 import fire
 import torch
-from torch.cuda.amp import autocast, GradScaler
+from torch.amp import autocast
+from torch.cuda.amp import GradScaler
 from torch.nn import CrossEntropyLoss
 from torch.optim import SGD
 from torchvision.models import wide_resnet50_2
@@ -34,7 +35,7 @@ def train_step(engine, batch):
         optimizer.zero_grad()
 
         # Runs the forward pass with autocasting.
-        with autocast():
+        with autocast("cuda"):
             y_pred = model(x)
             loss = criterion(y_pred, y)
 
diff --git a/examples/cifar10_qat/main.py b/examples/cifar10_qat/main.py
index f965ce1e6e4d..7b8366a2a63f 100644
--- a/examples/cifar10_qat/main.py
+++ b/examples/cifar10_qat/main.py
@@ -6,7 +6,8 @@
 import torch.nn as nn
 import torch.optim as optim
 import utils
-from torch.cuda.amp import autocast, GradScaler
+from torch.amp import autocast
+from torch.cuda.amp import GradScaler
 
 import ignite
 import ignite.distributed as idist
@@ -283,7 +284,7 @@ def train_step(engine, batch):
 
         model.train()
 
-        with autocast(enabled=with_amp):
+        with autocast("cuda", enabled=with_amp):
             y_pred = model(x)
             loss = criterion(y_pred, y)
 
diff --git a/examples/notebooks/CycleGAN_with_torch_cuda_amp.ipynb b/examples/notebooks/CycleGAN_with_torch_cuda_amp.ipynb
index 614c8528b8d6..f6271eaf3bda 100644
--- a/examples/notebooks/CycleGAN_with_torch_cuda_amp.ipynb
+++ b/examples/notebooks/CycleGAN_with_torch_cuda_amp.ipynb
@@ -887,7 +887,7 @@
     "id": "JE8dLeEfIl_Z"
    },
    "source": [
-    "We will use [`torch.cuda.amp.autocast`](https://pytorch.org/docs/master/amp.html#torch.cuda.amp.autocast) and [`torch.cuda.amp.GradScaler`](https://pytorch.org/docs/master/amp.html#torch.cuda.amp.GradScaler) to perform automatic mixed precision training. Our code follows a [typical mixed precision training example](https://pytorch.org/docs/master/notes/amp_examples.html#typical-mixed-precision-training)."
+    "We will use [`torch.amp.autocast`](https://pytorch.org/docs/master/amp.html#torch.amp.autocast) and [`torch.cuda.amp.GradScaler`](https://pytorch.org/docs/master/amp.html#torch.cuda.amp.GradScaler) to perform automatic mixed precision training. Our code follows a [typical mixed precision training example](https://pytorch.org/docs/master/notes/amp_examples.html#typical-mixed-precision-training)."
    ]
   },
   {
@@ -896,7 +896,8 @@
     "id": "vrJls4p-FRcA"
    },
    "source": [
-    "from torch.cuda.amp import autocast, GradScaler\n",
+    "from torch.cuda.amp import GradScaler\n",
+    "from torch.amp import autocast\n",
     "\n",
     "from ignite.utils import convert_tensor\n",
     "import torch.nn.functional as F\n",
diff --git a/examples/references/classification/imagenet/main.py b/examples/references/classification/imagenet/main.py
index 85c20c08a62b..defb4ddc1510 100644
--- a/examples/references/classification/imagenet/main.py
+++ b/examples/references/classification/imagenet/main.py
@@ -6,9 +6,10 @@
 import torch
 
 try:
-    from torch.cuda.amp import autocast, GradScaler
+    from torch.amp import autocast
+    from torch.cuda.amp import GradScaler
 except ImportError:
-    raise RuntimeError("Please, use recent PyTorch version, e.g. >=1.6.0")
+    raise RuntimeError("Please, use recent PyTorch version, e.g. >=1.12.0")
 
 import dataflow as data
 import utils
@@ -144,7 +145,7 @@ def create_trainer(model, optimizer, criterion, train_sampler, config, logger, w
     def training_step(engine, batch):
         model.train()
         x, y = prepare_batch(batch, device=device, non_blocking=True)
-        with autocast(enabled=with_amp):
+        with autocast("cuda", enabled=with_amp):
             y_pred = model(x)
             y_pred = model_output_transform(y_pred)
             loss = criterion(y_pred, y) / accumulation_steps
@@ -235,7 +236,7 @@ def create_evaluator(model, metrics, config, with_clearml, tag="val"):
     @torch.no_grad()
     def evaluate_step(engine, batch):
         model.eval()
-        with autocast(enabled=with_amp):
+        with autocast("cuda", enabled=with_amp):
             x, y = prepare_batch(batch, device=config.device, non_blocking=True)
             y_pred = model(x)
             y_pred = model_output_transform(y_pred)
diff --git a/examples/references/segmentation/pascal_voc2012/main.py b/examples/references/segmentation/pascal_voc2012/main.py
index 20afebbb7d36..b6fbc7ad494a 100644
--- a/examples/references/segmentation/pascal_voc2012/main.py
+++ b/examples/references/segmentation/pascal_voc2012/main.py
@@ -6,9 +6,10 @@
 import torch
 
 try:
-    from torch.cuda.amp import autocast, GradScaler
+    from torch.amp import autocast
+    from torch.cuda.amp import GradScaler
 except ImportError:
-    raise RuntimeError("Please, use recent PyTorch version, e.g. >=1.6.0")
+    raise RuntimeError("Please, use recent PyTorch version, e.g. >=1.12.0")
 
 import dataflow as data
 import utils
@@ -191,7 +192,7 @@ def create_trainer(model, optimizer, criterion, train_sampler, config, logger, w
     def forward_pass(batch):
         model.train()
         x, y = prepare_batch(batch, device=device, non_blocking=True)
-        with autocast(enabled=with_amp):
+        with autocast("cuda", enabled=with_amp):
             y_pred = model(x)
             y_pred = model_output_transform(y_pred)
             loss = criterion(y_pred, y) / accumulation_steps
@@ -272,7 +273,7 @@ def create_evaluator(model, metrics, config, with_clearml, tag="val"):
     @torch.no_grad()
     def evaluate_step(engine, batch):
         model.eval()
-        with autocast(enabled=with_amp):
+        with autocast("cuda", enabled=with_amp):
             x, y = prepare_batch(batch, device=config.device, non_blocking=True)
             y_pred = model(x)
             y_pred = model_output_transform(y_pred)
diff --git a/examples/transformers/main.py b/examples/transformers/main.py
index cd1a84d2195b..f8118eabf90e 100644
--- a/examples/transformers/main.py
+++ b/examples/transformers/main.py
@@ -7,7 +7,8 @@
 import torch.nn as nn
 import torch.optim as optim
 import utils
-from torch.cuda.amp import autocast, GradScaler
+from torch.amp import autocast
+from torch.cuda.amp import GradScaler
 
 import ignite
 import ignite.distributed as idist
@@ -309,7 +310,7 @@ def train_step(engine, batch):
 
         model.train()
 
-        with autocast(enabled=with_amp):
+        with autocast("cuda", enabled=with_amp):
             y_pred = model(input_batch)
             loss = criterion(y_pred, labels)
 
@@ -373,7 +374,7 @@ def evaluate_step(engine, batch):
             input_batch = {k: v.to(device, non_blocking=True, dtype=torch.long) for k, v in batch[0].items()}
             labels = labels.to(device, non_blocking=True, dtype=torch.float)
 
-        with autocast(enabled=with_amp):
+        with autocast("cuda", enabled=with_amp):
             output = model(input_batch)
         return output, labels
 
diff --git a/ignite/engine/__init__.py b/ignite/engine/__init__.py
index cbaac4e16cb7..6e82bc2f6bc7 100644
--- a/ignite/engine/__init__.py
+++ b/ignite/engine/__init__.py
@@ -185,9 +185,9 @@ def supervised_training_step_amp(
     """
 
     try:
-        from torch.cuda.amp import autocast
+        from torch.amp import autocast
     except ImportError:
-        raise ImportError("Please install torch>=1.6.0 to use amp_mode='amp'.")
+        raise ImportError("Please install torch>=1.12.0 to use amp_mode='amp'.")
 
     if gradient_accumulation_steps <= 0:
         raise ValueError(
@@ -200,7 +200,7 @@ def update(engine: Engine, batch: Sequence[torch.Tensor]) -> Union[Any, Tuple[to
             optimizer.zero_grad()
         model.train()
         x, y = prepare_batch(batch, device=device, non_blocking=non_blocking)
-        with autocast(enabled=True):
+        with autocast("cuda", enabled=True):
             output = model_fn(model, x)
             y_pred = model_transform(output)
             loss = loss_fn(y_pred, y)
@@ -726,15 +726,15 @@ def supervised_evaluation_step_amp(
         Added `model_fn` to customize model's application on the sample
     """
     try:
-        from torch.cuda.amp import autocast
+        from torch.amp import autocast
     except ImportError:
-        raise ImportError("Please install torch>=1.6.0 to use amp_mode='amp'.")
+        raise ImportError("Please install torch>=1.12.0 to use amp_mode='amp'.")
 
     def evaluate_step(engine: Engine, batch: Sequence[torch.Tensor]) -> Union[Any, Tuple[torch.Tensor]]:
         model.eval()
         with torch.no_grad():
             x, y = prepare_batch(batch, device=device, non_blocking=non_blocking)
-            with autocast(enabled=True):
+            with autocast("cuda", enabled=True):
                 output = model_fn(model, x)
                 y_pred = model_transform(output)
             return output_transform(x, y, y_pred)
diff --git a/tests/ignite/engine/test_create_supervised.py b/tests/ignite/engine/test_create_supervised.py
index 54938167601a..4f07c95929e0 100644
--- a/tests/ignite/engine/test_create_supervised.py
+++ b/tests/ignite/engine/test_create_supervised.py
@@ -168,7 +168,7 @@ def _():
                 trainer.run(data)
 
 
-@pytest.mark.skipif(Version(torch.__version__) < Version("1.6.0"), reason="Skip if < 1.6.0")
+@pytest.mark.skipif(Version(torch.__version__) < Version("1.12.0"), reason="Skip if < 1.12.0")
 def test_create_supervised_training_scalar_assignment():
     with mock.patch("ignite.engine._check_arg") as check_arg_mock:
         check_arg_mock.return_value = None, torch.cuda.amp.GradScaler(enabled=False)
@@ -447,21 +447,21 @@ def test_create_supervised_trainer_apex_error():
 def mock_torch_cuda_amp_module():
     with patch.dict(
         "sys.modules",
-        {"torch.cuda.amp": None, "torch.cuda.amp.grad_scaler": None, "torch.cuda.amp.autocast_mode": None},
+        {"torch.amp": None, "torch.cuda.amp": None, "torch.amp.autocast_mode": None},
     ):
         yield torch
 
 
 def test_create_supervised_trainer_amp_error(mock_torch_cuda_amp_module):
-    with pytest.raises(ImportError, match="Please install torch>=1.6.0 to use amp_mode='amp'."):
+    with pytest.raises(ImportError, match="Please install torch>=1.12.0 to use amp_mode='amp'."):
         _test_create_supervised_trainer_wrong_accumulation(trainer_device="cpu", amp_mode="amp")
-    with pytest.raises(ImportError, match="Please install torch>=1.6.0 to use amp_mode='amp'."):
+    with pytest.raises(ImportError, match="Please install torch>=1.12.0 to use amp_mode='amp'."):
         _test_create_supervised_trainer(amp_mode="amp")
     with pytest.raises(ImportError, match="Please install torch>=1.6.0 to use scaler argument."):
         _test_create_supervised_trainer(amp_mode="amp", scaler=True)
 
 
-@pytest.mark.skipif(Version(torch.__version__) < Version("1.5.0"), reason="Skip if < 1.5.0")
+@pytest.mark.skipif(Version(torch.__version__) < Version("1.12.0"), reason="Skip if < 1.12.0")
 def test_create_supervised_trainer_scaler_not_amp():
     scaler = torch.cuda.amp.GradScaler(enabled=torch.cuda.is_available())
 
@@ -501,7 +501,7 @@ def test_create_supervised_trainer_on_mps():
     _test_create_mocked_supervised_trainer(model_device=model_device, trainer_device=trainer_device)
 
 
-@pytest.mark.skipif(Version(torch.__version__) < Version("1.6.0"), reason="Skip if < 1.6.0")
+@pytest.mark.skipif(Version(torch.__version__) < Version("1.12.0"), reason="Skip if < 1.12.0")
 @pytest.mark.skipif(not torch.cuda.is_available(), reason="Skip if no GPU")
 def test_create_supervised_trainer_on_cuda_amp():
     model_device = trainer_device = "cuda"
@@ -517,7 +517,7 @@ def test_create_supervised_trainer_on_cuda_amp():
     _test_create_mocked_supervised_trainer(model_device=model_device, trainer_device=trainer_device, amp_mode="amp")
 
 
-@pytest.mark.skipif(Version(torch.__version__) < Version("1.6.0"), reason="Skip if < 1.6.0")
+@pytest.mark.skipif(Version(torch.__version__) < Version("1.12.0"), reason="Skip if < 1.12.0")
 @pytest.mark.skipif(not torch.cuda.is_available(), reason="Skip if no GPU")
 def test_create_supervised_trainer_on_cuda_amp_scaler():
     model_device = trainer_device = "cuda"
@@ -630,8 +630,8 @@ def test_create_supervised_evaluator():
     _test_mocked_supervised_evaluator()
 
     # older versions didn't have the autocast method so we skip the test for older builds
-    if Version(torch.__version__) >= Version("1.6.0"):
-        with mock.patch("torch.cuda.amp.autocast") as mock_torch_cuda_amp_module:
+    if Version(torch.__version__) >= Version("1.12.0"):
+        with mock.patch("torch.amp.autocast") as mock_torch_cuda_amp_module:
             _test_create_evaluation_step_amp(mock_torch_cuda_amp_module)
 
 
@@ -640,8 +640,8 @@ def test_create_supervised_evaluator_on_cpu():
     _test_mocked_supervised_evaluator(evaluator_device="cpu")
 
     # older versions didn't have the autocast method so we skip the test for older builds
-    if Version(torch.__version__) >= Version("1.6.0"):
-        with mock.patch("torch.cuda.amp.autocast") as mock_torch_cuda_amp_module:
+    if Version(torch.__version__) >= Version("1.12.0"):
+        with mock.patch("torch.amp.autocast") as mock_torch_cuda_amp_module:
             _test_create_evaluation_step(mock_torch_cuda_amp_module, evaluator_device="cpu")
             _test_create_evaluation_step_amp(mock_torch_cuda_amp_module, evaluator_device="cpu")
 
@@ -651,8 +651,8 @@ def test_create_supervised_evaluator_traced_on_cpu():
     _test_mocked_supervised_evaluator(evaluator_device="cpu", trace=True)
 
     # older versions didn't have the autocast method so we skip the test for older builds
-    if Version(torch.__version__) >= Version("1.6.0"):
-        with mock.patch("torch.cuda.amp.autocast") as mock_torch_cuda_amp_module:
+    if Version(torch.__version__) >= Version("1.12.0"):
+        with mock.patch("torch.amp.autocast") as mock_torch_cuda_amp_module:
             _test_create_evaluation_step(mock_torch_cuda_amp_module, evaluator_device="cpu", trace=True)
 
 
@@ -682,7 +682,7 @@ def test_create_supervised_evaluator_on_mps_with_model_on_cpu():
     _test_mocked_supervised_evaluator(evaluator_device="mps")
 
 
-@pytest.mark.skipif(Version(torch.__version__) < Version("1.6.0"), reason="Skip if < 1.6.0")
+@pytest.mark.skipif(Version(torch.__version__) < Version("1.12.0"), reason="Skip if < 1.12.0")
 @pytest.mark.skipif(not torch.cuda.is_available(), reason="Skip if no GPU")
 def test_create_supervised_evaluator_on_cuda_amp():
     model_device = evaluator_device = "cuda"
@@ -691,7 +691,7 @@ def test_create_supervised_evaluator_on_cuda_amp():
 
 
 def test_create_supervised_evaluator_amp_error(mock_torch_cuda_amp_module):
-    with pytest.raises(ImportError, match="Please install torch>=1.6.0 to use amp_mode='amp'."):
+    with pytest.raises(ImportError, match="Please install torch>=1.12.0 to use amp_mode='amp'."):
         _test_create_supervised_evaluator(amp_mode="amp")