From 93cff75536a3cc3b12f07a5638268f16774dc603 Mon Sep 17 00:00:00 2001
From: vfdev <vfdev.5@gmail.com>
Date: Sun, 24 Nov 2024 22:38:38 +0100
Subject: [PATCH 01/10] try updating gpu tests GHA (#3306)

---
 .github/workflows/gpu-tests.yml | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/gpu-tests.yml b/.github/workflows/gpu-tests.yml
index 81862e1f67bd..0a72711fbddc 100644
--- a/.github/workflows/gpu-tests.yml
+++ b/.github/workflows/gpu-tests.yml
@@ -16,7 +16,7 @@ concurrency:
   group: gpu-tests-${{ github.ref_name }}-${{ !(github.ref_protected) || github.sha }}
   cancel-in-progress: true
 
-# Cherry-picked from https://github.com/pytorch/test-infra/blob/main/.github/workflows/linux_job.yml
+# Cherry-picked from https://github.com/pytorch/test-infra/blob/main/.github/workflows/linux_job_v2.yml
 
 jobs:
   gpu-tests:
@@ -25,7 +25,7 @@ jobs:
         pytorch-channel: [pytorch, pytorch-nightly]
       fail-fast: false
     env:
-      DOCKER_IMAGE: "pytorch/conda-builder:cuda12.1"
+      DOCKER_IMAGE: "pytorch/almalinux-builder:cuda12.4"
       REPOSITORY: ${{ github.repository }}
       PR_NUMBER: ${{ github.event.pull_request.number }}
     runs-on: linux.8xlarge.nvidia.gpu
@@ -40,7 +40,7 @@ jobs:
           echo "::endgroup::"
 
       - name: Checkout repository (pytorch/test-infra)
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           # Support the use case where we need to checkout someone's fork
           repository: pytorch/test-infra
@@ -55,7 +55,7 @@ jobs:
           docker-image: ${{ env.DOCKER_IMAGE }}
 
       - name: Checkout repository (${{ github.repository }})
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           # Support the use case where we need to checkout someone's fork
           repository: ${{ github.repository }}
@@ -102,9 +102,9 @@ jobs:
 
           # Install PyTorch
           if [ "${{ matrix.pytorch-channel }}" == "pytorch" ]; then
-            pip install --upgrade torch torchvision --index-url https://download.pytorch.org/whl/cu121
+            pip install --upgrade torch torchvision --index-url https://download.pytorch.org/whl/cu124
           else
-            pip install --upgrade --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu121
+            pip install --upgrade --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu124
           fi
 
           python -c "import torch; print(torch.__version__, ', CUDA is available: ', torch.cuda.is_available()); exit(not torch.cuda.is_available())"
@@ -139,7 +139,7 @@ jobs:
       - name: Run examples in container
         continue-on-error: false
         run: |
-          SCRIPT=$(cat << EOF
+          script=$(cat << EOF
 
           set -xe
 

From 36ff817506f0172516b66addfeb097892a04e933 Mon Sep 17 00:00:00 2001
From: vfdev <vfdev.5@gmail.com>
Date: Sun, 24 Nov 2024 22:40:41 +0100
Subject: [PATCH 02/10] Update pyproject.toml (#3305)

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index a6aae5458ad8..016c0d9eb13b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.black]
 line-length = 120
-target-version = ['py38', 'py39']
+target-version = ['py39', 'py311']
 include = '\.pyi?$'
 exclude = '''
 

From 9e2763e097e08a42caa9b7e03a88b6fff38621a5 Mon Sep 17 00:00:00 2001
From: vfdev <vfdev.5@gmail.com>
Date: Tue, 3 Dec 2024 11:38:46 +0100
Subject: [PATCH 03/10] Update requirements-dev.txt (#3310)

---
 requirements-dev.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements-dev.txt b/requirements-dev.txt
index d475e556cdff..91b560e56530 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -21,7 +21,7 @@ mlflow
 neptune-client>=0.16.17
 tensorboard
 torchvision
-pynvml
+pynvml<12  # pynvml module was removed in 12.X, is not developed or maintained. We should replace pynvml with something else.
 clearml
 scikit-image
 py-rouge

From 1c3b9e975073bd4be47533fe98adf537b2ea67b4 Mon Sep 17 00:00:00 2001
From: vfdev <vfdev.5@gmail.com>
Date: Tue, 3 Dec 2024 13:07:30 +0100
Subject: [PATCH 04/10] Fixed GPU tests exec scripts and failing metrics
 (#3301)

* Fixed GPU tests and failing metrics

* Updated timeout param

* Updated infra cuda12.1 -> cuda12.4

* Add tmate for debug

* Disable sudo

* Attempt to debug tmate!

* Attempt to use bash in step

* Update gpu-tests.yml

* Skip failing test and remove tmate debugging

* Fixed formatting

---------

Co-authored-by: Sadra Barikbin <sadraqazvin1@yahoo.com>
---
 .github/workflows/gpu-tests.yml                 |  2 +-
 .../clustering/calinski_harabasz_score.py       |  4 ++--
 .../metrics/clustering/davies_bouldin_score.py  |  4 ++--
 ignite/metrics/clustering/silhouette_score.py   |  4 ++--
 .../metrics/regression/kendall_correlation.py   |  4 ++--
 .../metrics/regression/spearman_correlation.py  |  4 ++--
 tests/common_test_functionality.sh              |  5 ++---
 .../metrics/test_classification_report.py       | 17 +++++++++++++++++
 tests/ignite/metrics/test_hsic.py               |  4 ++--
 tests/run_cpu_tests.sh                          |  5 ++---
 tests/run_gpu_tests.sh                          | 10 +++++-----
 11 files changed, 39 insertions(+), 24 deletions(-)

diff --git a/.github/workflows/gpu-tests.yml b/.github/workflows/gpu-tests.yml
index 0a72711fbddc..13c628ad302c 100644
--- a/.github/workflows/gpu-tests.yml
+++ b/.github/workflows/gpu-tests.yml
@@ -124,7 +124,7 @@ jobs:
         uses: nick-fields/retry@v2.9.0
         with:
           max_attempts: 5
-          timeout_minutes: 25
+          timeout_minutes: 45
           shell: bash
           command: docker exec -t pthd /bin/bash -xec 'bash tests/run_gpu_tests.sh 2'
           new_command_on_retry: docker exec -e USE_LAST_FAILED=1 -t pthd /bin/bash -xec 'bash tests/run_gpu_tests.sh 2'
diff --git a/ignite/metrics/clustering/calinski_harabasz_score.py b/ignite/metrics/clustering/calinski_harabasz_score.py
index fe58ac461517..79f8dc99ba50 100644
--- a/ignite/metrics/clustering/calinski_harabasz_score.py
+++ b/ignite/metrics/clustering/calinski_harabasz_score.py
@@ -11,8 +11,8 @@
 def _calinski_harabasz_score(features: Tensor, labels: Tensor) -> float:
     from sklearn.metrics import calinski_harabasz_score
 
-    np_features = features.numpy()
-    np_labels = labels.numpy()
+    np_features = features.cpu().numpy()
+    np_labels = labels.cpu().numpy()
     score = calinski_harabasz_score(np_features, np_labels)
     return score
 
diff --git a/ignite/metrics/clustering/davies_bouldin_score.py b/ignite/metrics/clustering/davies_bouldin_score.py
index b34ec69f51ad..afea0518951b 100644
--- a/ignite/metrics/clustering/davies_bouldin_score.py
+++ b/ignite/metrics/clustering/davies_bouldin_score.py
@@ -11,8 +11,8 @@
 def _davies_bouldin_score(features: Tensor, labels: Tensor) -> float:
     from sklearn.metrics import davies_bouldin_score
 
-    np_features = features.numpy()
-    np_labels = labels.numpy()
+    np_features = features.cpu().numpy()
+    np_labels = labels.cpu().numpy()
     score = davies_bouldin_score(np_features, np_labels)
     return score
 
diff --git a/ignite/metrics/clustering/silhouette_score.py b/ignite/metrics/clustering/silhouette_score.py
index 39b28c5d0409..48a59d583ec4 100644
--- a/ignite/metrics/clustering/silhouette_score.py
+++ b/ignite/metrics/clustering/silhouette_score.py
@@ -111,7 +111,7 @@ def __init__(
     def _silhouette_score(self, features: Tensor, labels: Tensor) -> float:
         from sklearn.metrics import silhouette_score
 
-        np_features = features.numpy()
-        np_labels = labels.numpy()
+        np_features = features.cpu().numpy()
+        np_labels = labels.cpu().numpy()
         score = silhouette_score(np_features, np_labels, **self._silhouette_kwargs)
         return score
diff --git a/ignite/metrics/regression/kendall_correlation.py b/ignite/metrics/regression/kendall_correlation.py
index 7ad87b224024..34d876a36599 100644
--- a/ignite/metrics/regression/kendall_correlation.py
+++ b/ignite/metrics/regression/kendall_correlation.py
@@ -16,8 +16,8 @@ def _get_kendall_tau(variant: str = "b") -> Callable[[Tensor, Tensor], float]:
         raise ValueError(f"variant accepts 'b' or 'c', got {variant!r}.")
 
     def _tau(predictions: Tensor, targets: Tensor) -> float:
-        np_preds = predictions.flatten().numpy()
-        np_targets = targets.flatten().numpy()
+        np_preds = predictions.flatten().cpu().numpy()
+        np_targets = targets.flatten().cpu().numpy()
         r = kendalltau(np_preds, np_targets, variant=variant).statistic
         return r
 
diff --git a/ignite/metrics/regression/spearman_correlation.py b/ignite/metrics/regression/spearman_correlation.py
index 7f126d6e56be..cbd89f67c9d0 100644
--- a/ignite/metrics/regression/spearman_correlation.py
+++ b/ignite/metrics/regression/spearman_correlation.py
@@ -12,8 +12,8 @@
 def _spearman_r(predictions: Tensor, targets: Tensor) -> float:
     from scipy.stats import spearmanr
 
-    np_preds = predictions.flatten().numpy()
-    np_targets = targets.flatten().numpy()
+    np_preds = predictions.flatten().cpu().numpy()
+    np_targets = targets.flatten().cpu().numpy()
     r = spearmanr(np_preds, np_targets).statistic
     return r
 
diff --git a/tests/common_test_functionality.sh b/tests/common_test_functionality.sh
index 6e60947f927b..91003eddc092 100644
--- a/tests/common_test_functionality.sh
+++ b/tests/common_test_functionality.sh
@@ -85,7 +85,6 @@ run_tests() {
         skip_distrib_opt=""
     fi
 
-
     echo [pytest] > pytest.ini ; echo "cache_dir=${cache_dir}" >> pytest.ini
 
     # Assemble options for the pytest command
@@ -103,8 +102,8 @@ run_tests() {
 
     # Run the command
     if [ "$trap_deselected_exit_code" -eq "1" ]; then
-        CUDA_VISIBLE_DEVICES="" eval "pytest ${pytest_args}" || { exit_code=$?; if [ "$exit_code" -eq ${last_failed_no_failures_code} ]; then echo "All tests deselected"; else exit $exit_code; fi; }
+        eval "pytest ${pytest_args}" || { exit_code=$?; if [ "$exit_code" -eq ${last_failed_no_failures_code} ]; then echo "All tests deselected"; else exit $exit_code; fi; }
     else
-        CUDA_VISIBLE_DEVICES="" eval "pytest ${pytest_args}"
+        eval "pytest ${pytest_args}"
     fi
 }
diff --git a/tests/ignite/metrics/test_classification_report.py b/tests/ignite/metrics/test_classification_report.py
index 87e328c8051e..cae8b5145f55 100644
--- a/tests/ignite/metrics/test_classification_report.py
+++ b/tests/ignite/metrics/test_classification_report.py
@@ -164,6 +164,23 @@ def update(engine, i):
 @pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
 @pytest.mark.skipif(Version(torch.__version__) < Version("1.7.0"), reason="Skip if < 1.7.0")
 def test_distrib_nccl_gpu(distributed_context_single_node_nccl):
+
+    pytest.skip("Temporarily skip failing test. See https://github.com/pytorch/ignite/pull/3301")
+    # When run with 2 devices:
+    #  tests/ignite/metrics/test_classification_report.py::test_distrib_nccl_gpu Fatal Python error: Aborted
+    # Thread 0x00007fac95c95700 (most recent call first):
+    #   <no Python frame>
+
+    # Thread 0x00007facbb89b700 (most recent call first):
+    #   <no Python frame>
+
+    # Thread 0x00007fae637f4700 (most recent call first):
+    #   File "<string>", line 534 in read
+    #   File "<string>", line 567 in from_io
+    #   File "<string>", line 1160 in _thread_receiver
+    #   File "<string>", line 341 in run
+    #   File "<string>", line 411 in _perform_spawn
+
     device = idist.device()
     _test_integration_multiclass(device, True)
     _test_integration_multiclass(device, False)
diff --git a/tests/ignite/metrics/test_hsic.py b/tests/ignite/metrics/test_hsic.py
index 57af5fa2862c..28fe5c1f97db 100644
--- a/tests/ignite/metrics/test_hsic.py
+++ b/tests/ignite/metrics/test_hsic.py
@@ -139,10 +139,10 @@ def test_integration(self, sigma_x: float, sigma_y: float):
             metric_devices.append(device)
 
         for metric_device in metric_devices:
-            x = torch.randn((n_iters * batch_size, n_dims_x)).float().to(device)
+            x = torch.randn((n_iters * batch_size, n_dims_x), device=device).float()
 
             lin = nn.Linear(n_dims_x, n_dims_y).to(device)
-            y = torch.sin(lin(x) * 100) + torch.randn(n_iters * batch_size, n_dims_y) * 1e-4
+            y = torch.sin(lin(x) * 100) + torch.randn(n_iters * batch_size, n_dims_y, device=x.device) * 1e-4
 
             def data_loader(i, input_x, input_y):
                 return input_x[i * batch_size : (i + 1) * batch_size], input_y[i * batch_size : (i + 1) * batch_size]
diff --git a/tests/run_cpu_tests.sh b/tests/run_cpu_tests.sh
index 8d387f5542e7..f52988a68183 100644
--- a/tests/run_cpu_tests.sh
+++ b/tests/run_cpu_tests.sh
@@ -6,8 +6,7 @@ skip_distrib_tests=${SKIP_DISTRIB_TESTS:-0}
 use_last_failed=${USE_LAST_FAILED:-0}
 match_tests_expression=${1:-""}
 
-
-run_tests \
+CUDA_VISIBLE_DEVICES="" run_tests \
     --core_args "--tx 4*popen//python=python -vvv tests/ignite" \
     --cache_dir ".cpu-not-distrib" \
     --skip_distrib_tests "${skip_distrib_tests}" \
@@ -21,7 +20,7 @@ if [ "${skip_distrib_tests}" -eq "1" ]; then
 fi
 
 # Run 2 processes with --dist=each
-run_tests \
+CUDA_VISIBLE_DEVICES="" run_tests \
     --core_args "-m distributed -vvv tests/ignite" \
     --world_size 2 \
     --cache_dir ".cpu-distrib" \
diff --git a/tests/run_gpu_tests.sh b/tests/run_gpu_tests.sh
index 26497f19c83e..c86d1d0746ee 100644
--- a/tests/run_gpu_tests.sh
+++ b/tests/run_gpu_tests.sh
@@ -2,26 +2,26 @@
 source "$(dirname "$0")/common_test_functionality.sh"
 set -xeu
 
-skip_distrib_tests=${SKIP_DISTRIB_TESTS:-1}
+# https://pubs.opengroup.org/onlinepubs/009695399/utilities/xcu_chap02.html#tag_02_06_02
+skip_distrib_tests=${SKIP_DISTRIB_TESTS:-0}
 use_last_failed=${USE_LAST_FAILED:-0}
 ngpus=${1:-1}
 
 match_tests_expression=${2:-""}
 if [ -z "$match_tests_expression" ]; then
-    cuda_pattern="cuda"
+    cuda_pattern="cuda or nccl or gloo"
 else
-    cuda_pattern="cuda and $match_tests_expression"
+    cuda_pattern="(cuda or nccl or gloo) and $match_tests_expression"
 fi
 
 run_tests \
-    --core_args "-vvv tests/ignite" \
+    --core_args "-vvv tests/ignite -m 'not distributed'" \
     --cache_dir ".gpu-cuda" \
     --skip_distrib_tests "${skip_distrib_tests}" \
     --use_coverage 1 \
     --match_tests_expression "${cuda_pattern}" \
     --use_last_failed ${use_last_failed}
 
-# https://pubs.opengroup.org/onlinepubs/009695399/utilities/xcu_chap02.html#tag_02_06_02
 if [ "${skip_distrib_tests}" -eq "1" ]; then
     exit 0
 fi

From 4f462109858291f3499e8fab809d91e69a9d9532 Mon Sep 17 00:00:00 2001
From: vfdev <vfdev.5@gmail.com>
Date: Tue, 3 Dec 2024 13:39:54 +0100
Subject: [PATCH 05/10] Updated GpuInfo metric, pynvml<12 (#3311)

---
 ignite/contrib/engines/common.py | 2 +-
 ignite/metrics/gpu_info.py       | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/ignite/contrib/engines/common.py b/ignite/contrib/engines/common.py
index 09f769a18d0f..bcfa54be55ea 100644
--- a/ignite/contrib/engines/common.py
+++ b/ignite/contrib/engines/common.py
@@ -78,7 +78,7 @@ def setup_common_training_handlers(
         lr_scheduler: learning rate scheduler
             as native torch LRScheduler or ignite's parameter scheduler.
         with_gpu_stats: if True, :class:`~ignite.metrics.GpuInfo` is attached to the
-            trainer. This requires `pynvml` package to be installed.
+            trainer. This requires `pynvml<12` package to be installed.
         output_names: list of names associated with `update_function` output dictionary.
         with_pbars: if True, two progress bars on epochs and optionally on iterations are attached.
             Default, True.
diff --git a/ignite/metrics/gpu_info.py b/ignite/metrics/gpu_info.py
index 96ed4f07c57c..d13bbd8a1dae 100644
--- a/ignite/metrics/gpu_info.py
+++ b/ignite/metrics/gpu_info.py
@@ -10,7 +10,7 @@
 
 class GpuInfo(Metric):
     """Provides GPU information: a) used memory percentage, b) gpu utilization percentage values as Metric
-    on each iterations.
+    on each iterations. This metric requires `pynvml <https://pypi.org/project/pynvml/>`_ package of version `<12`.
 
     .. Note ::
 
@@ -39,7 +39,7 @@ def __init__(self) -> None:
         except ImportError:
             raise ModuleNotFoundError(
                 "This contrib module requires pynvml to be installed. "
-                "Please install it with command: \n pip install pynvml"
+                "Please install it with command: \n pip install 'pynvml<12'"
             )
             # Let's check available devices
         if not torch.cuda.is_available():

From 6f8ad2a16b2d82fd6b2b83b849b86160fe8a8b6a Mon Sep 17 00:00:00 2001
From: Fabio Bonassi <bonassi.fabio.94@gmail.com>
Date: Tue, 3 Dec 2024 17:28:22 +0100
Subject: [PATCH 06/10] =?UTF-8?q?Give=20the=20option=20to=20terminate=20th?=
 =?UTF-8?q?e=20engine=20without=20firing=20Events.COMPLET=E2=80=A6=20(#330?=
 =?UTF-8?q?9)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Give the option to terminate the engine without firing Events.COMPLETED. The default behaviour is not changed.

Note that even though Events.COMPLETED is not fired, its timer is updated.

* Update ignite/engine/engine.py

Co-authored-by: vfdev <vfdev.5@gmail.com>

* Update ignite/engine/engine.py

Co-authored-by: vfdev <vfdev.5@gmail.com>

* Update ignite/engine/engine.py

Co-authored-by: vfdev <vfdev.5@gmail.com>

* Update ignite/engine/engine.py

Co-authored-by: vfdev <vfdev.5@gmail.com>

* Update ignite/engine/events.py

Co-authored-by: vfdev <vfdev.5@gmail.com>

* Argument `skip_event_completed` renamed to `skip_completed`

* - Fixed docs broken links.
- Do not update self.state.times[Events.COMPLETED.name]  if terminated
- Fixed unit test

* Update ignite/engine/engine.py

Co-authored-by: vfdev <vfdev.5@gmail.com>

* Refactoring and patching.

- Engine time logging moved out of the if clause. In the log message "completed" has been replaced with "finished" to avoid confusion.
- Same changes applied to the method `_internal_run_legacy()`

* Restored .gitignore

Sorry for accidentally including it into the previous commit!

* Update ignite/engine/events.py

* Fixed typo in test_engine.py

* Parametrized test for engine.terminate(skip_completed)

* Update event table

* Fixed documentation

---------

Co-authored-by: vfdev <vfdev.5@gmail.com>
---
 ignite/engine/engine.py                     | 41 +++++++++++++------
 ignite/engine/events.py                     | 25 ++++++++----
 tests/ignite/contrib/engines/test_common.py |  1 -
 tests/ignite/engine/test_engine.py          | 45 ++++++++++++++-------
 4 files changed, 76 insertions(+), 36 deletions(-)

diff --git a/ignite/engine/engine.py b/ignite/engine/engine.py
index 27a949cacca2..e2a148986075 100644
--- a/ignite/engine/engine.py
+++ b/ignite/engine/engine.py
@@ -140,6 +140,7 @@ def __init__(self, process_function: Callable[["Engine", Any], Any]):
         self._process_function = process_function
         self.last_event_name: Optional[Events] = None
         self.should_terminate = False
+        self.skip_completed_after_termination = False
         self.should_terminate_single_epoch = False
         self.should_interrupt = False
         self.state = State()
@@ -538,7 +539,7 @@ def call_interrupt():
         self.logger.info("interrupt signaled. Engine will interrupt the run after current iteration is finished.")
         self.should_interrupt = True
 
-    def terminate(self) -> None:
+    def terminate(self, skip_completed: bool = False) -> None:
         """Sends terminate signal to the engine, so that it terminates completely the run. The run is
         terminated after the event on which ``terminate`` method was called. The following events are triggered:
 
@@ -547,6 +548,9 @@ def terminate(self) -> None:
         - :attr:`~ignite.engine.events.Events.TERMINATE`
         - :attr:`~ignite.engine.events.Events.COMPLETED`
 
+        Args:
+            skip_completed: if True, the event :attr:`~ignite.engine.events.Events.COMPLETED` is not fired after
+                :attr:`~ignite.engine.events.Events.TERMINATE`. Default is False.
 
         Examples:
             .. testcode::
@@ -617,9 +621,12 @@ def terminate():
         .. versionchanged:: 0.4.10
             Behaviour changed, for details see https://github.com/pytorch/ignite/issues/2669
 
+        .. versionchanged:: 0.5.2
+            Added `skip_completed` flag
         """
         self.logger.info("Terminate signaled. Engine will stop after current iteration is finished.")
         self.should_terminate = True
+        self.skip_completed_after_termination = skip_completed
 
     def terminate_epoch(self) -> None:
         """Sends terminate signal to the engine, so that it terminates the current epoch. The run
@@ -993,13 +1000,17 @@ def _internal_run_as_gen(self) -> Generator[Any, None, State]:
             time_taken = time.time() - start_time
             # time is available for handlers but must be updated after fire
             self.state.times[Events.COMPLETED.name] = time_taken
-            handlers_start_time = time.time()
-            self._fire_event(Events.COMPLETED)
-            time_taken += time.time() - handlers_start_time
-            # update time wrt handlers
-            self.state.times[Events.COMPLETED.name] = time_taken
+
+            # do not fire Events.COMPLETED if we terminated the run with flag `skip_completed=True`
+            if not (self.should_terminate and self.skip_completed_after_termination):
+                handlers_start_time = time.time()
+                self._fire_event(Events.COMPLETED)
+                time_taken += time.time() - handlers_start_time
+                # update time wrt handlers
+                self.state.times[Events.COMPLETED.name] = time_taken
+
             hours, mins, secs = _to_hours_mins_secs(time_taken)
-            self.logger.info(f"Engine run complete. Time taken: {hours:02d}:{mins:02d}:{secs:06.3f}")
+            self.logger.info(f"Engine run finished. Time taken: {hours:02d}:{mins:02d}:{secs:06.3f}")
 
         except BaseException as e:
             self._dataloader_iter = None
@@ -1174,13 +1185,17 @@ def _internal_run_legacy(self) -> State:
             time_taken = time.time() - start_time
             # time is available for handlers but must be updated after fire
             self.state.times[Events.COMPLETED.name] = time_taken
-            handlers_start_time = time.time()
-            self._fire_event(Events.COMPLETED)
-            time_taken += time.time() - handlers_start_time
-            # update time wrt handlers
-            self.state.times[Events.COMPLETED.name] = time_taken
+
+            # do not fire Events.COMPLETED if we terminated the run with flag `skip_completed=True`
+            if not (self.should_terminate and self.skip_completed_after_termination):
+                handlers_start_time = time.time()
+                self._fire_event(Events.COMPLETED)
+                time_taken += time.time() - handlers_start_time
+                # update time wrt handlers
+                self.state.times[Events.COMPLETED.name] = time_taken
+
             hours, mins, secs = _to_hours_mins_secs(time_taken)
-            self.logger.info(f"Engine run complete. Time taken: {hours:02d}:{mins:02d}:{secs:06.3f}")
+            self.logger.info(f"Engine run finished. Time taken: {hours:02d}:{mins:02d}:{secs:06.3f}")
 
         except BaseException as e:
             self._dataloader_iter = None
diff --git a/ignite/engine/events.py b/ignite/engine/events.py
index 9dd99348492b..87622d3415cc 100644
--- a/ignite/engine/events.py
+++ b/ignite/engine/events.py
@@ -259,36 +259,47 @@ class Events(EventEnum):
     - TERMINATE_SINGLE_EPOCH : triggered when the run is about to end the current epoch,
       after receiving a :meth:`~ignite.engine.engine.Engine.terminate_epoch()` or
       :meth:`~ignite.engine.engine.Engine.terminate()` call.
+    - EPOCH_COMPLETED : triggered when the epoch is ended. Note that this is triggered even
+      when :meth:`~ignite.engine.engine.Engine.terminate_epoch()` is called.
 
     - TERMINATE : triggered when the run is about to end completely,
       after receiving :meth:`~ignite.engine.engine.Engine.terminate()` call.
 
-    - EPOCH_COMPLETED : triggered when the epoch is ended. Note that this is triggered even
-      when :meth:`~ignite.engine.engine.Engine.terminate_epoch()` is called.
-    - COMPLETED : triggered when engine's run is completed
+    - COMPLETED : triggered when engine's run is completed or terminated with
+      :meth:`~ignite.engine.engine.Engine.terminate()`, unless the flag
+      `skip_completed` is set to True.
 
     The table below illustrates which events are triggered when various termination methods are called.
 
     .. list-table::
-       :widths: 24 25 33 18
+       :widths: 35 38 28 20 20
        :header-rows: 1
 
        * - Method
-         - EVENT_COMPLETED
          - TERMINATE_SINGLE_EPOCH
+         - EPOCH_COMPLETED
          - TERMINATE
+         - COMPLETED
        * - no termination
-         - ✔
          - ✗
+         - ✔
          - ✗
+         - ✔
        * - :meth:`~ignite.engine.engine.Engine.terminate_epoch()`
          - ✔
          - ✔
          - ✗
+         - ✔
        * - :meth:`~ignite.engine.engine.Engine.terminate()`
          - ✗
          - ✔
          - ✔
+         - ✔
+       * - :meth:`~ignite.engine.engine.Engine.terminate()` with `skip_completed=True`
+         - ✗
+         - ✔
+         - ✔
+         - ✗
 
     Since v0.3.0, Events become more flexible and allow to pass an event filter to the Engine:
 
@@ -357,7 +368,7 @@ class CustomEvents(EventEnum):
     STARTED = "started"
     """triggered when engine's run is started."""
     COMPLETED = "completed"
-    """triggered when engine's run is completed"""
+    """triggered when engine's run is completed, or after receiving terminate() call."""
 
     ITERATION_STARTED = "iteration_started"
     """triggered when an iteration is started."""
diff --git a/tests/ignite/contrib/engines/test_common.py b/tests/ignite/contrib/engines/test_common.py
index d0100be9e8da..e14042e62c15 100644
--- a/tests/ignite/contrib/engines/test_common.py
+++ b/tests/ignite/contrib/engines/test_common.py
@@ -8,7 +8,6 @@
 from torch.utils.data.distributed import DistributedSampler
 
 import ignite.distributed as idist
-
 import ignite.handlers as handlers
 from ignite.contrib.engines.common import (
     _setup_logging,
diff --git a/tests/ignite/engine/test_engine.py b/tests/ignite/engine/test_engine.py
index 130212426504..fcb0299aa22d 100644
--- a/tests/ignite/engine/test_engine.py
+++ b/tests/ignite/engine/test_engine.py
@@ -40,11 +40,14 @@ class TestEngine:
     def set_interrupt_resume_enabled(self, interrupt_resume_enabled):
         Engine.interrupt_resume_enabled = interrupt_resume_enabled
 
-    def test_terminate(self):
+    @pytest.mark.parametrize("skip_completed", [True, False])
+    def test_terminate(self, skip_completed):
         engine = Engine(lambda e, b: 1)
         assert not engine.should_terminate
-        engine.terminate()
+        assert not engine.skip_completed_after_termination
+        engine.terminate(skip_completed)
         assert engine.should_terminate
+        assert engine.skip_completed_after_termination == skip_completed
 
     def test_invalid_process_raises_with_invalid_signature(self):
         with pytest.raises(ValueError, match=r"Engine must be given a processing function in order to run"):
@@ -236,25 +239,32 @@ def check_iter_and_data():
         assert num_calls_check_iter_epoch == 1
 
     @pytest.mark.parametrize(
-        "terminate_event, e, i",
+        "terminate_event, e, i, skip_completed",
         [
-            (Events.STARTED, 0, 0),
-            (Events.EPOCH_STARTED(once=2), 2, None),
-            (Events.EPOCH_COMPLETED(once=2), 2, None),
-            (Events.GET_BATCH_STARTED(once=12), None, 12),
-            (Events.GET_BATCH_COMPLETED(once=12), None, 12),
-            (Events.ITERATION_STARTED(once=14), None, 14),
-            (Events.ITERATION_COMPLETED(once=14), None, 14),
+            (Events.STARTED, 0, 0, True),
+            (Events.EPOCH_STARTED(once=2), 2, None, True),
+            (Events.EPOCH_COMPLETED(once=2), 2, None, True),
+            (Events.GET_BATCH_STARTED(once=12), None, 12, True),
+            (Events.GET_BATCH_COMPLETED(once=12), None, 12, False),
+            (Events.ITERATION_STARTED(once=14), None, 14, True),
+            (Events.ITERATION_COMPLETED(once=14), None, 14, True),
+            (Events.STARTED, 0, 0, False),
+            (Events.EPOCH_STARTED(once=2), 2, None, False),
+            (Events.EPOCH_COMPLETED(once=2), 2, None, False),
+            (Events.GET_BATCH_STARTED(once=12), None, 12, False),
+            (Events.GET_BATCH_COMPLETED(once=12), None, 12, False),
+            (Events.ITERATION_STARTED(once=14), None, 14, False),
+            (Events.ITERATION_COMPLETED(once=14), None, 14, False),
         ],
     )
-    def test_terminate_events_sequence(self, terminate_event, e, i):
+    def test_terminate_events_sequence(self, terminate_event, e, i, skip_completed):
         engine = RecordedEngine(MagicMock(return_value=1))
         data = range(10)
         max_epochs = 5
 
         @engine.on(terminate_event)
         def call_terminate():
-            engine.terminate()
+            engine.terminate(skip_completed)
 
         @engine.on(Events.EXCEPTION_RAISED)
         def assert_no_exceptions(ee):
@@ -271,10 +281,15 @@ def assert_no_exceptions(ee):
         if e is None:
             e = i // len(data) + 1
 
+        if skip_completed:
+            assert engine.called_events[-1] == (e, i, Events.TERMINATE)
+            assert engine.called_events[-2] == (e, i, terminate_event)
+        else:
+            assert engine.called_events[-1] == (e, i, Events.COMPLETED)
+            assert engine.called_events[-2] == (e, i, Events.TERMINATE)
+            assert engine.called_events[-3] == (e, i, terminate_event)
+
         assert engine.called_events[0] == (0, 0, Events.STARTED)
-        assert engine.called_events[-1] == (e, i, Events.COMPLETED)
-        assert engine.called_events[-2] == (e, i, Events.TERMINATE)
-        assert engine.called_events[-3] == (e, i, terminate_event)
         assert engine._dataloader_iter is None
 
     @pytest.mark.parametrize("data, epoch_length", [(None, 10), (range(10), None)])

From b636374108e425f86c8e050528d8fd240694806e Mon Sep 17 00:00:00 2001
From: Fabio Bonassi <bonassi.fabio.94@gmail.com>
Date: Mon, 9 Dec 2024 14:20:29 +0100
Subject: [PATCH 07/10] Allow to terminate an epoch without firing
 `Events.EPOCH_COMPLETED` (#3313)

* Added optional flag skip_epoch_completed to Engine.terminate_epoch()

* Improved docs for terminate() and terminate_epoch()

* Make the internal attribute skip_completed_after_termination private

* - Merged flags "should_terminate" and "_skip_completed_after_termination".
- Merged flags "should_terminate_single_epoch" and "_skip_epoch_completed_after_termination".

* Union[bool, str] instead of the pipe operator for compatibility with older Python versions

* Raise an RuntimeError when terminate_epoch() is called on Events.STARTED or Events.EPOCH_STARTED

* Ignoring comparison-overlap warning from mypy to keep the code simple

* Apply suggestions from code review

* Update engine.py

---------

Co-authored-by: vfdev <vfdev.5@gmail.com>
---
 ignite/engine/engine.py            | 79 +++++++++++++++++++++---------
 ignite/engine/events.py            | 12 +++--
 tests/ignite/engine/test_engine.py | 71 ++++++++++++++++++++-------
 3 files changed, 119 insertions(+), 43 deletions(-)

diff --git a/ignite/engine/engine.py b/ignite/engine/engine.py
index e2a148986075..f3f95c9a2e27 100644
--- a/ignite/engine/engine.py
+++ b/ignite/engine/engine.py
@@ -139,9 +139,12 @@ def __init__(self, process_function: Callable[["Engine", Any], Any]):
         self.logger = logging.getLogger(__name__ + "." + self.__class__.__name__)
         self._process_function = process_function
         self.last_event_name: Optional[Events] = None
-        self.should_terminate = False
-        self.skip_completed_after_termination = False
-        self.should_terminate_single_epoch = False
+        # should_terminate flag: False - don't terminate, True - terminate,
+        # "skip_completed" - terminate and skip the event "COMPLETED"
+        self.should_terminate: Union[bool, str] = False
+        # should_terminate_single_epoch flag: False - don't terminate, True - terminate,
+        # "skip_epoch_completed" - terminate and skip the event "EPOCH_COMPLETED"
+        self.should_terminate_single_epoch: Union[bool, str] = False
         self.should_interrupt = False
         self.state = State()
         self._state_dict_user_keys: List[str] = []
@@ -546,7 +549,7 @@ def terminate(self, skip_completed: bool = False) -> None:
         - ...
         - Terminating event
         - :attr:`~ignite.engine.events.Events.TERMINATE`
-        - :attr:`~ignite.engine.events.Events.COMPLETED`
+        - :attr:`~ignite.engine.events.Events.COMPLETED` (unless `skip_completed=True`)
 
         Args:
             skip_completed: if True, the event :attr:`~ignite.engine.events.Events.COMPLETED` is not fired after
@@ -625,25 +628,31 @@ def terminate():
             Added `skip_completed` flag
         """
         self.logger.info("Terminate signaled. Engine will stop after current iteration is finished.")
-        self.should_terminate = True
-        self.skip_completed_after_termination = skip_completed
+        self.should_terminate = "skip_completed" if skip_completed else True
 
-    def terminate_epoch(self) -> None:
+    def terminate_epoch(self, skip_epoch_completed: bool = False) -> None:
         """Sends terminate signal to the engine, so that it terminates the current epoch. The run
         continues from the next epoch. The following events are triggered:
 
         - ...
         - Event on which ``terminate_epoch`` method is called
         - :attr:`~ignite.engine.events.Events.TERMINATE_SINGLE_EPOCH`
-        - :attr:`~ignite.engine.events.Events.EPOCH_COMPLETED`
+        - :attr:`~ignite.engine.events.Events.EPOCH_COMPLETED` (unless `skip_epoch_completed=True`)
         - :attr:`~ignite.engine.events.Events.EPOCH_STARTED`
         - ...
+
+        Args:
+            skip_epoch_completed: if True, the event :attr:`~ignite.engine.events.Events.EPOCH_COMPLETED`
+                is not fired after :attr:`~ignite.engine.events.Events.TERMINATE_SINGLE_EPOCH`. Default is False.
+
+        .. versionchanged:: 0.5.2
+            Added `skip_epoch_completed` flag
         """
         self.logger.info(
             "Terminate current epoch is signaled. "
             "Current epoch iteration will stop after current iteration is finished."
         )
-        self.should_terminate_single_epoch = True
+        self.should_terminate_single_epoch = "skip_epoch_completed" if skip_epoch_completed else True
 
     def _handle_exception(self, e: BaseException) -> None:
         if Events.EXCEPTION_RAISED in self._event_handlers:
@@ -982,11 +991,17 @@ def _internal_run_as_gen(self) -> Generator[Any, None, State]:
                     # time is available for handlers but must be updated after fire
                     self.state.times[Events.EPOCH_COMPLETED.name] = epoch_time_taken
 
-                    handlers_start_time = time.time()
-                    self._fire_event(Events.EPOCH_COMPLETED)
-                    epoch_time_taken += time.time() - handlers_start_time
-                    # update time wrt handlers
-                    self.state.times[Events.EPOCH_COMPLETED.name] = epoch_time_taken
+                    if self.should_terminate_single_epoch != "skip_epoch_completed":  # type: ignore[comparison-overlap]
+                        handlers_start_time = time.time()
+                        self._fire_event(Events.EPOCH_COMPLETED)
+                        epoch_time_taken += time.time() - handlers_start_time
+                        # update time wrt handlers
+                        self.state.times[Events.EPOCH_COMPLETED.name] = epoch_time_taken
+
+                    if self.should_terminate_single_epoch:
+                        # We skip raising _EngineTerminateSingleEpochException exception on Events.EPOCH_COMPLETED
+                        # as epoch is already completed and nothing to terminate
+                        self.should_terminate_single_epoch = False
                     yield from self._maybe_terminate_or_interrupt()
 
                     hours, mins, secs = _to_hours_mins_secs(epoch_time_taken)
@@ -997,12 +1012,19 @@ def _internal_run_as_gen(self) -> Generator[Any, None, State]:
             except _EngineTerminateException:
                 self._fire_event(Events.TERMINATE)
 
+            except _EngineTerminateSingleEpochException:
+                raise RuntimeError(
+                    "The method terminate_epoch() should not be called on Event.STARTED or Event.EPOCH_STARTED."
+                    "If this is a desired behaviour, please open a feature request on"
+                    "https://github.com/pytorch/ignite/issues/new/choose"
+                )
+
             time_taken = time.time() - start_time
             # time is available for handlers but must be updated after fire
             self.state.times[Events.COMPLETED.name] = time_taken
 
             # do not fire Events.COMPLETED if we terminated the run with flag `skip_completed=True`
-            if not (self.should_terminate and self.skip_completed_after_termination):
+            if self.should_terminate != "skip_completed":  # type: ignore[comparison-overlap]
                 handlers_start_time = time.time()
                 self._fire_event(Events.COMPLETED)
                 time_taken += time.time() - handlers_start_time
@@ -1121,7 +1143,6 @@ def _run_once_on_dataset_as_gen(self) -> Generator[State, None, float]:
 
         except _EngineTerminateSingleEpochException:
             self._fire_event(Events.TERMINATE_SINGLE_EPOCH, iter_counter=iter_counter)
-            self.should_terminate_single_epoch = False
             self._setup_dataloader_iter()
 
         except _EngineTerminateException as e:
@@ -1167,11 +1188,17 @@ def _internal_run_legacy(self) -> State:
                     # time is available for handlers but must be updated after fire
                     self.state.times[Events.EPOCH_COMPLETED.name] = epoch_time_taken
 
-                    handlers_start_time = time.time()
-                    self._fire_event(Events.EPOCH_COMPLETED)
-                    epoch_time_taken += time.time() - handlers_start_time
-                    # update time wrt handlers
-                    self.state.times[Events.EPOCH_COMPLETED.name] = epoch_time_taken
+                    if self.should_terminate_single_epoch != "skip_epoch_completed":  # type: ignore[comparison-overlap]
+                        handlers_start_time = time.time()
+                        self._fire_event(Events.EPOCH_COMPLETED)
+                        epoch_time_taken += time.time() - handlers_start_time
+                        # update time wrt handlers
+                        self.state.times[Events.EPOCH_COMPLETED.name] = epoch_time_taken
+
+                    if self.should_terminate_single_epoch:
+                        # We skip raising _EngineTerminateSingleEpochException exception on Events.EPOCH_COMPLETED
+                        # as epoch is already completed and nothing to terminate
+                        self.should_terminate_single_epoch = False
                     self._maybe_terminate_legacy()
 
                     hours, mins, secs = _to_hours_mins_secs(epoch_time_taken)
@@ -1182,12 +1209,19 @@ def _internal_run_legacy(self) -> State:
             except _EngineTerminateException:
                 self._fire_event(Events.TERMINATE)
 
+            except _EngineTerminateSingleEpochException:
+                raise RuntimeError(
+                    "The method terminate_epoch() should not be called on Event.STARTED or Event.EPOCH_STARTED."
+                    "If this is a desired behaviour, please open a feature request on"
+                    "https://github.com/pytorch/ignite/issues/new/choose"
+                )
+
             time_taken = time.time() - start_time
             # time is available for handlers but must be updated after fire
             self.state.times[Events.COMPLETED.name] = time_taken
 
             # do not fire Events.COMPLETED if we terminated the run with flag `skip_completed=True`
-            if not (self.should_terminate and self.skip_completed_after_termination):
+            if self.should_terminate != "skip_completed":  # type: ignore[comparison-overlap]
                 handlers_start_time = time.time()
                 self._fire_event(Events.COMPLETED)
                 time_taken += time.time() - handlers_start_time
@@ -1292,7 +1326,6 @@ def _run_once_on_dataset_legacy(self) -> float:
 
         except _EngineTerminateSingleEpochException:
             self._fire_event(Events.TERMINATE_SINGLE_EPOCH, iter_counter=iter_counter)
-            self.should_terminate_single_epoch = False
             self._setup_dataloader_iter()
 
         except _EngineTerminateException as e:
diff --git a/ignite/engine/events.py b/ignite/engine/events.py
index 87622d3415cc..7a348f947624 100644
--- a/ignite/engine/events.py
+++ b/ignite/engine/events.py
@@ -259,8 +259,9 @@ class Events(EventEnum):
     - TERMINATE_SINGLE_EPOCH : triggered when the run is about to end the current epoch,
       after receiving a :meth:`~ignite.engine.engine.Engine.terminate_epoch()` or
       :meth:`~ignite.engine.engine.Engine.terminate()` call.
-    - EPOCH_COMPLETED : triggered when the epoch is ended. Note that this is triggered even
-      when :meth:`~ignite.engine.engine.Engine.terminate_epoch()` is called.
+    - EPOCH_COMPLETED : triggered when the epoch is ended. This is triggered even
+      when :meth:`~ignite.engine.engine.Engine.terminate_epoch()` is called,
+      unless the flag `skip_epoch_completed` is set to True.
 
     - TERMINATE : triggered when the run is about to end completely,
       after receiving :meth:`~ignite.engine.engine.Engine.terminate()` call.
@@ -272,7 +273,7 @@ class Events(EventEnum):
     The table below illustrates which events are triggered when various termination methods are called.
 
     .. list-table::
-       :widths: 35 38 28 20 20
+       :widths: 38 38 28 20 20
        :header-rows: 1
 
        * - Method
@@ -290,6 +291,11 @@ class Events(EventEnum):
          - ✔
          - ✗
          - ✔
+       * - :meth:`~ignite.engine.engine.Engine.terminate_epoch()` with `skip_epoch_completed=True`
+         - ✔
+         - ✗
+         - ✗
+         - ✔
        * - :meth:`~ignite.engine.engine.Engine.terminate()`
          - ✗
          - ✔
diff --git a/tests/ignite/engine/test_engine.py b/tests/ignite/engine/test_engine.py
index fcb0299aa22d..76e1ad837605 100644
--- a/tests/ignite/engine/test_engine.py
+++ b/tests/ignite/engine/test_engine.py
@@ -44,10 +44,13 @@ def set_interrupt_resume_enabled(self, interrupt_resume_enabled):
     def test_terminate(self, skip_completed):
         engine = Engine(lambda e, b: 1)
         assert not engine.should_terminate
-        assert not engine.skip_completed_after_termination
+
         engine.terminate(skip_completed)
-        assert engine.should_terminate
-        assert engine.skip_completed_after_termination == skip_completed
+
+        if skip_completed:
+            assert engine.should_terminate == "skip_completed"
+        else:
+            assert engine.should_terminate == True  # noqa: E712
 
     def test_invalid_process_raises_with_invalid_signature(self):
         with pytest.raises(ValueError, match=r"Engine must be given a processing function in order to run"):
@@ -292,8 +295,11 @@ def assert_no_exceptions(ee):
         assert engine.called_events[0] == (0, 0, Events.STARTED)
         assert engine._dataloader_iter is None
 
-    @pytest.mark.parametrize("data, epoch_length", [(None, 10), (range(10), None)])
-    def test_terminate_epoch_stops_mid_epoch(self, data, epoch_length):
+    @pytest.mark.parametrize(
+        "data, epoch_length, skip_epoch_completed",
+        [(None, 10, False), (range(10), None, False), (None, 10, True), (range(10), None, True)],
+    )
+    def test_terminate_epoch_stops_mid_epoch(self, data, epoch_length, skip_epoch_completed):
         real_epoch_length = epoch_length if data is None else len(data)
         iteration_to_stop = real_epoch_length + 4
 
@@ -301,7 +307,7 @@ def test_terminate_epoch_stops_mid_epoch(self, data, epoch_length):
 
         def start_of_iteration_handler(engine):
             if engine.state.iteration == iteration_to_stop:
-                engine.terminate_epoch()
+                engine.terminate_epoch(skip_epoch_completed)
 
         max_epochs = 3
         engine.add_event_handler(Events.ITERATION_STARTED, start_of_iteration_handler)
@@ -312,15 +318,23 @@ def start_of_iteration_handler(engine):
         assert state.epoch == max_epochs
 
     @pytest.mark.parametrize(
-        "terminate_epoch_event, i",
+        "terminate_epoch_event, i, skip_epoch_completed",
         [
-            (Events.GET_BATCH_STARTED(once=12), 12),
-            (Events.GET_BATCH_COMPLETED(once=12), 12),
-            (Events.ITERATION_STARTED(once=14), 14),
-            (Events.ITERATION_COMPLETED(once=14), 14),
+            (Events.GET_BATCH_STARTED(once=12), 12, False),
+            (Events.GET_BATCH_COMPLETED(once=12), 12, False),
+            (Events.ITERATION_STARTED(once=14), 14, False),
+            (Events.ITERATION_COMPLETED(once=14), 14, False),
+            (Events.GET_BATCH_STARTED(once=12), 12, True),
+            (Events.GET_BATCH_COMPLETED(once=12), 12, True),
+            (Events.ITERATION_STARTED(once=14), 14, True),
+            (Events.ITERATION_COMPLETED(once=14), 14, True),
+            (Events.STARTED, 30, False),
+            (Events.STARTED, 30, True),
+            (Events.EPOCH_STARTED(once=2), 10, False),
+            (Events.EPOCH_STARTED(once=2), 10, True),
         ],
     )
-    def test_terminate_epoch_events_sequence(self, terminate_epoch_event, i):
+    def test_terminate_epoch_events_sequence(self, terminate_epoch_event, i, skip_epoch_completed):
         engine = RecordedEngine(MagicMock(return_value=1))
         data = range(10)
         max_epochs = 3
@@ -331,31 +345,54 @@ def test_terminate_epoch_events_sequence(self, terminate_epoch_event, i):
 
         @engine.on(terminate_epoch_event)
         def call_terminate_epoch():
+            assert not engine.should_terminate_single_epoch
             nonlocal call_count
             if call_count < 1:
-                engine.terminate_epoch()
+                engine.terminate_epoch(skip_epoch_completed)
+                if skip_epoch_completed:
+                    assert engine.should_terminate_single_epoch == "skip_epoch_completed"
+                else:
+                    assert engine.should_terminate_single_epoch == True  # noqa: E712
+
             call_count += 1
 
+        @engine.on(Events.EPOCH_STARTED)
+        def check_skip_reset():
+            if terminate_epoch_event != Events.EPOCH_STARTED:
+                assert engine.should_terminate_single_epoch == False  # noqa: E712
+
         @engine.on(Events.TERMINATE_SINGLE_EPOCH)
         def check_previous_events(iter_counter):
             e = i // len(data) + 1
-
             assert engine.called_events[0] == (0, 0, Events.STARTED)
             assert engine.called_events[-2] == (e, i, terminate_epoch_event)
             assert engine.called_events[-1] == (e, i, Events.TERMINATE_SINGLE_EPOCH)
+            if skip_epoch_completed:
+                assert engine.should_terminate_single_epoch == "skip_epoch_completed"
+            else:
+                assert engine.should_terminate_single_epoch == True  # noqa: E712
 
         @engine.on(Events.EPOCH_COMPLETED)
         def check_previous_events2():
             e = i // len(data) + 1
             if e == engine.state.epoch and i == engine.state.iteration:
+                assert not skip_epoch_completed
+                assert isinstance(engine.should_terminate_single_epoch, bool)
                 assert engine.called_events[-3] == (e, i, terminate_epoch_event)
                 assert engine.called_events[-2] == (e, i, Events.TERMINATE_SINGLE_EPOCH)
                 assert engine.called_events[-1] == (e, i, Events.EPOCH_COMPLETED)
 
-        engine.run(data, max_epochs=max_epochs)
+        if terminate_epoch_event in [Events.STARTED, Events.EPOCH_STARTED]:
+            with pytest.raises(RuntimeError):
+                engine.run(data, max_epochs=max_epochs)
+        else:
+            engine.run(data, max_epochs=max_epochs)
+
+            assert engine.state.epoch == max_epochs
+            assert (max_epochs - 1) * len(data) < engine.state.iteration < max_epochs * len(data)
 
-        assert engine.state.epoch == max_epochs
-        assert (max_epochs - 1) * len(data) < engine.state.iteration < max_epochs * len(data)
+            epoch_completed_events = [e for e in engine.called_events if e[2] == Events.EPOCH_COMPLETED.name]
+            assert len(epoch_completed_events) == max_epochs - skip_epoch_completed
 
     @pytest.mark.parametrize("data", [None, "mock_data_loader"])
     def test_iteration_events_are_fired(self, data):

From a3d691c3c2badce2a8e5d540538af2c7fed7811d Mon Sep 17 00:00:00 2001
From: Tudor Gulin <gulintudor@gmail.com>
Date: Mon, 9 Dec 2024 15:58:35 +0200
Subject: [PATCH 08/10] Fix deprecated statement (#3307)

* fix-deprecated-warning

Replaced torch.cuda.amp.autocast with torch.amp.autocast("cuda",...).

* autopep8 fix

* Update torch version to 1.12.0

* Address PR comments

* Revert unwanted changes

* Fix regex

* Revert change in CycleGAN_with_torch_cuda_amp

* Fix regex in test_create_supervised

* Update ignite/engine/__init__.py

* Update tests/ignite/engine/test_create_supervised.py

---------

Co-authored-by: Gulin7 <Gulin7@users.noreply.github.com>
Co-authored-by: vfdev <vfdev.5@gmail.com>
---
 examples/cifar10/main.py                      |  7 +++--
 .../benchmark_torch_cuda_amp.py               |  5 ++--
 examples/cifar10_qat/main.py                  |  5 ++--
 .../CycleGAN_with_torch_cuda_amp.ipynb        |  5 ++--
 .../classification/imagenet/main.py           |  9 +++---
 .../segmentation/pascal_voc2012/main.py       |  9 +++---
 examples/transformers/main.py                 |  7 +++--
 ignite/engine/__init__.py                     | 12 ++++----
 tests/ignite/engine/test_create_supervised.py | 30 +++++++++----------
 9 files changed, 48 insertions(+), 41 deletions(-)

diff --git a/examples/cifar10/main.py b/examples/cifar10/main.py
index b64b81c1d036..b8dbce5d9601 100644
--- a/examples/cifar10/main.py
+++ b/examples/cifar10/main.py
@@ -7,7 +7,8 @@
 import torch.nn as nn
 import torch.optim as optim
 import utils
-from torch.cuda.amp import autocast, GradScaler
+from torch.amp import autocast
+from torch.cuda.amp import GradScaler
 
 import ignite
 import ignite.distributed as idist
@@ -299,7 +300,7 @@ def train_step(engine, batch):
 
         model.train()
 
-        with autocast(enabled=with_amp):
+        with autocast("cuda", enabled=with_amp):
             y_pred = model(x)
             loss = criterion(y_pred, y)
 
@@ -355,7 +356,7 @@ def evaluate_step(engine: Engine, batch):
             x = x.to(device, non_blocking=True)
             y = y.to(device, non_blocking=True)
 
-        with autocast(enabled=with_amp):
+        with autocast("cuda", enabled=with_amp):
             output = model(x)
         return output, y
 
diff --git a/examples/cifar100_amp_benchmark/benchmark_torch_cuda_amp.py b/examples/cifar100_amp_benchmark/benchmark_torch_cuda_amp.py
index 992f305bf24a..746d7eb54c49 100644
--- a/examples/cifar100_amp_benchmark/benchmark_torch_cuda_amp.py
+++ b/examples/cifar100_amp_benchmark/benchmark_torch_cuda_amp.py
@@ -1,6 +1,7 @@
 import fire
 import torch
-from torch.cuda.amp import autocast, GradScaler
+from torch.amp import autocast
+from torch.cuda.amp import GradScaler
 from torch.nn import CrossEntropyLoss
 from torch.optim import SGD
 from torchvision.models import wide_resnet50_2
@@ -34,7 +35,7 @@ def train_step(engine, batch):
         optimizer.zero_grad()
 
         # Runs the forward pass with autocasting.
-        with autocast():
+        with autocast("cuda"):
             y_pred = model(x)
             loss = criterion(y_pred, y)
 
diff --git a/examples/cifar10_qat/main.py b/examples/cifar10_qat/main.py
index f965ce1e6e4d..7b8366a2a63f 100644
--- a/examples/cifar10_qat/main.py
+++ b/examples/cifar10_qat/main.py
@@ -6,7 +6,8 @@
 import torch.nn as nn
 import torch.optim as optim
 import utils
-from torch.cuda.amp import autocast, GradScaler
+from torch.amp import autocast
+from torch.cuda.amp import GradScaler
 
 import ignite
 import ignite.distributed as idist
@@ -283,7 +284,7 @@ def train_step(engine, batch):
 
         model.train()
 
-        with autocast(enabled=with_amp):
+        with autocast("cuda", enabled=with_amp):
             y_pred = model(x)
             loss = criterion(y_pred, y)
 
diff --git a/examples/notebooks/CycleGAN_with_torch_cuda_amp.ipynb b/examples/notebooks/CycleGAN_with_torch_cuda_amp.ipynb
index 614c8528b8d6..f6271eaf3bda 100644
--- a/examples/notebooks/CycleGAN_with_torch_cuda_amp.ipynb
+++ b/examples/notebooks/CycleGAN_with_torch_cuda_amp.ipynb
@@ -887,7 +887,7 @@
     "id": "JE8dLeEfIl_Z"
    },
    "source": [
-    "We will use [`torch.cuda.amp.autocast`](https://pytorch.org/docs/master/amp.html#torch.cuda.amp.autocast) and [`torch.cuda.amp.GradScaler`](https://pytorch.org/docs/master/amp.html#torch.cuda.amp.GradScaler) to perform automatic mixed precision training. Our code follows a [typical mixed precision training example](https://pytorch.org/docs/master/notes/amp_examples.html#typical-mixed-precision-training)."
+    "We will use [`torch.amp.autocast`](https://pytorch.org/docs/master/amp.html#torch.amp.autocast) and [`torch.cuda.amp.GradScaler`](https://pytorch.org/docs/master/amp.html#torch.cuda.amp.GradScaler) to perform automatic mixed precision training. Our code follows a [typical mixed precision training example](https://pytorch.org/docs/master/notes/amp_examples.html#typical-mixed-precision-training)."
    ]
   },
   {
@@ -896,7 +896,8 @@
     "id": "vrJls4p-FRcA"
    },
    "source": [
-    "from torch.cuda.amp import autocast, GradScaler\n",
+    "from torch.cuda.amp import GradScaler\n",
+    "from torch.amp import autocast\n",
     "\n",
     "from ignite.utils import convert_tensor\n",
     "import torch.nn.functional as F\n",
diff --git a/examples/references/classification/imagenet/main.py b/examples/references/classification/imagenet/main.py
index 85c20c08a62b..defb4ddc1510 100644
--- a/examples/references/classification/imagenet/main.py
+++ b/examples/references/classification/imagenet/main.py
@@ -6,9 +6,10 @@
 import torch
 
 try:
-    from torch.cuda.amp import autocast, GradScaler
+    from torch.amp import autocast
+    from torch.cuda.amp import GradScaler
 except ImportError:
-    raise RuntimeError("Please, use recent PyTorch version, e.g. >=1.6.0")
+    raise RuntimeError("Please, use recent PyTorch version, e.g. >=1.12.0")
 
 import dataflow as data
 import utils
@@ -144,7 +145,7 @@ def create_trainer(model, optimizer, criterion, train_sampler, config, logger, w
     def training_step(engine, batch):
         model.train()
         x, y = prepare_batch(batch, device=device, non_blocking=True)
-        with autocast(enabled=with_amp):
+        with autocast("cuda", enabled=with_amp):
             y_pred = model(x)
             y_pred = model_output_transform(y_pred)
             loss = criterion(y_pred, y) / accumulation_steps
@@ -235,7 +236,7 @@ def create_evaluator(model, metrics, config, with_clearml, tag="val"):
     @torch.no_grad()
     def evaluate_step(engine, batch):
         model.eval()
-        with autocast(enabled=with_amp):
+        with autocast("cuda", enabled=with_amp):
             x, y = prepare_batch(batch, device=config.device, non_blocking=True)
             y_pred = model(x)
             y_pred = model_output_transform(y_pred)
diff --git a/examples/references/segmentation/pascal_voc2012/main.py b/examples/references/segmentation/pascal_voc2012/main.py
index 20afebbb7d36..b6fbc7ad494a 100644
--- a/examples/references/segmentation/pascal_voc2012/main.py
+++ b/examples/references/segmentation/pascal_voc2012/main.py
@@ -6,9 +6,10 @@
 import torch
 
 try:
-    from torch.cuda.amp import autocast, GradScaler
+    from torch.amp import autocast
+    from torch.cuda.amp import GradScaler
 except ImportError:
-    raise RuntimeError("Please, use recent PyTorch version, e.g. >=1.6.0")
+    raise RuntimeError("Please, use recent PyTorch version, e.g. >=1.12.0")
 
 import dataflow as data
 import utils
@@ -191,7 +192,7 @@ def create_trainer(model, optimizer, criterion, train_sampler, config, logger, w
     def forward_pass(batch):
         model.train()
         x, y = prepare_batch(batch, device=device, non_blocking=True)
-        with autocast(enabled=with_amp):
+        with autocast("cuda", enabled=with_amp):
             y_pred = model(x)
             y_pred = model_output_transform(y_pred)
             loss = criterion(y_pred, y) / accumulation_steps
@@ -272,7 +273,7 @@ def create_evaluator(model, metrics, config, with_clearml, tag="val"):
     @torch.no_grad()
     def evaluate_step(engine, batch):
         model.eval()
-        with autocast(enabled=with_amp):
+        with autocast("cuda", enabled=with_amp):
             x, y = prepare_batch(batch, device=config.device, non_blocking=True)
             y_pred = model(x)
             y_pred = model_output_transform(y_pred)
diff --git a/examples/transformers/main.py b/examples/transformers/main.py
index cd1a84d2195b..f8118eabf90e 100644
--- a/examples/transformers/main.py
+++ b/examples/transformers/main.py
@@ -7,7 +7,8 @@
 import torch.nn as nn
 import torch.optim as optim
 import utils
-from torch.cuda.amp import autocast, GradScaler
+from torch.amp import autocast
+from torch.cuda.amp import GradScaler
 
 import ignite
 import ignite.distributed as idist
@@ -309,7 +310,7 @@ def train_step(engine, batch):
 
         model.train()
 
-        with autocast(enabled=with_amp):
+        with autocast("cuda", enabled=with_amp):
             y_pred = model(input_batch)
             loss = criterion(y_pred, labels)
 
@@ -373,7 +374,7 @@ def evaluate_step(engine, batch):
             input_batch = {k: v.to(device, non_blocking=True, dtype=torch.long) for k, v in batch[0].items()}
             labels = labels.to(device, non_blocking=True, dtype=torch.float)
 
-        with autocast(enabled=with_amp):
+        with autocast("cuda", enabled=with_amp):
             output = model(input_batch)
         return output, labels
 
diff --git a/ignite/engine/__init__.py b/ignite/engine/__init__.py
index cbaac4e16cb7..6e82bc2f6bc7 100644
--- a/ignite/engine/__init__.py
+++ b/ignite/engine/__init__.py
@@ -185,9 +185,9 @@ def supervised_training_step_amp(
     """
 
     try:
-        from torch.cuda.amp import autocast
+        from torch.amp import autocast
     except ImportError:
-        raise ImportError("Please install torch>=1.6.0 to use amp_mode='amp'.")
+        raise ImportError("Please install torch>=1.12.0 to use amp_mode='amp'.")
 
     if gradient_accumulation_steps <= 0:
         raise ValueError(
@@ -200,7 +200,7 @@ def update(engine: Engine, batch: Sequence[torch.Tensor]) -> Union[Any, Tuple[to
             optimizer.zero_grad()
         model.train()
         x, y = prepare_batch(batch, device=device, non_blocking=non_blocking)
-        with autocast(enabled=True):
+        with autocast("cuda", enabled=True):
             output = model_fn(model, x)
             y_pred = model_transform(output)
             loss = loss_fn(y_pred, y)
@@ -726,15 +726,15 @@ def supervised_evaluation_step_amp(
         Added `model_fn` to customize model's application on the sample
     """
     try:
-        from torch.cuda.amp import autocast
+        from torch.amp import autocast
     except ImportError:
-        raise ImportError("Please install torch>=1.6.0 to use amp_mode='amp'.")
+        raise ImportError("Please install torch>=1.12.0 to use amp_mode='amp'.")
 
     def evaluate_step(engine: Engine, batch: Sequence[torch.Tensor]) -> Union[Any, Tuple[torch.Tensor]]:
         model.eval()
         with torch.no_grad():
             x, y = prepare_batch(batch, device=device, non_blocking=non_blocking)
-            with autocast(enabled=True):
+            with autocast("cuda", enabled=True):
                 output = model_fn(model, x)
                 y_pred = model_transform(output)
             return output_transform(x, y, y_pred)
diff --git a/tests/ignite/engine/test_create_supervised.py b/tests/ignite/engine/test_create_supervised.py
index 54938167601a..4f07c95929e0 100644
--- a/tests/ignite/engine/test_create_supervised.py
+++ b/tests/ignite/engine/test_create_supervised.py
@@ -168,7 +168,7 @@ def _():
                 trainer.run(data)
 
 
-@pytest.mark.skipif(Version(torch.__version__) < Version("1.6.0"), reason="Skip if < 1.6.0")
+@pytest.mark.skipif(Version(torch.__version__) < Version("1.12.0"), reason="Skip if < 1.12.0")
 def test_create_supervised_training_scalar_assignment():
     with mock.patch("ignite.engine._check_arg") as check_arg_mock:
         check_arg_mock.return_value = None, torch.cuda.amp.GradScaler(enabled=False)
@@ -447,21 +447,21 @@ def test_create_supervised_trainer_apex_error():
 def mock_torch_cuda_amp_module():
     with patch.dict(
         "sys.modules",
-        {"torch.cuda.amp": None, "torch.cuda.amp.grad_scaler": None, "torch.cuda.amp.autocast_mode": None},
+        {"torch.amp": None, "torch.cuda.amp": None, "torch.amp.autocast_mode": None},
     ):
         yield torch
 
 
 def test_create_supervised_trainer_amp_error(mock_torch_cuda_amp_module):
-    with pytest.raises(ImportError, match="Please install torch>=1.6.0 to use amp_mode='amp'."):
+    with pytest.raises(ImportError, match="Please install torch>=1.12.0 to use amp_mode='amp'."):
         _test_create_supervised_trainer_wrong_accumulation(trainer_device="cpu", amp_mode="amp")
-    with pytest.raises(ImportError, match="Please install torch>=1.6.0 to use amp_mode='amp'."):
+    with pytest.raises(ImportError, match="Please install torch>=1.12.0 to use amp_mode='amp'."):
         _test_create_supervised_trainer(amp_mode="amp")
     with pytest.raises(ImportError, match="Please install torch>=1.6.0 to use scaler argument."):
         _test_create_supervised_trainer(amp_mode="amp", scaler=True)
 
 
-@pytest.mark.skipif(Version(torch.__version__) < Version("1.5.0"), reason="Skip if < 1.5.0")
+@pytest.mark.skipif(Version(torch.__version__) < Version("1.12.0"), reason="Skip if < 1.12.0")
 def test_create_supervised_trainer_scaler_not_amp():
     scaler = torch.cuda.amp.GradScaler(enabled=torch.cuda.is_available())
 
@@ -501,7 +501,7 @@ def test_create_supervised_trainer_on_mps():
     _test_create_mocked_supervised_trainer(model_device=model_device, trainer_device=trainer_device)
 
 
-@pytest.mark.skipif(Version(torch.__version__) < Version("1.6.0"), reason="Skip if < 1.6.0")
+@pytest.mark.skipif(Version(torch.__version__) < Version("1.12.0"), reason="Skip if < 1.12.0")
 @pytest.mark.skipif(not torch.cuda.is_available(), reason="Skip if no GPU")
 def test_create_supervised_trainer_on_cuda_amp():
     model_device = trainer_device = "cuda"
@@ -517,7 +517,7 @@ def test_create_supervised_trainer_on_cuda_amp():
     _test_create_mocked_supervised_trainer(model_device=model_device, trainer_device=trainer_device, amp_mode="amp")
 
 
-@pytest.mark.skipif(Version(torch.__version__) < Version("1.6.0"), reason="Skip if < 1.6.0")
+@pytest.mark.skipif(Version(torch.__version__) < Version("1.12.0"), reason="Skip if < 1.12.0")
 @pytest.mark.skipif(not torch.cuda.is_available(), reason="Skip if no GPU")
 def test_create_supervised_trainer_on_cuda_amp_scaler():
     model_device = trainer_device = "cuda"
@@ -630,8 +630,8 @@ def test_create_supervised_evaluator():
     _test_mocked_supervised_evaluator()
 
     # older versions didn't have the autocast method so we skip the test for older builds
-    if Version(torch.__version__) >= Version("1.6.0"):
-        with mock.patch("torch.cuda.amp.autocast") as mock_torch_cuda_amp_module:
+    if Version(torch.__version__) >= Version("1.12.0"):
+        with mock.patch("torch.amp.autocast") as mock_torch_cuda_amp_module:
             _test_create_evaluation_step_amp(mock_torch_cuda_amp_module)
 
 
@@ -640,8 +640,8 @@ def test_create_supervised_evaluator_on_cpu():
     _test_mocked_supervised_evaluator(evaluator_device="cpu")
 
     # older versions didn't have the autocast method so we skip the test for older builds
-    if Version(torch.__version__) >= Version("1.6.0"):
-        with mock.patch("torch.cuda.amp.autocast") as mock_torch_cuda_amp_module:
+    if Version(torch.__version__) >= Version("1.12.0"):
+        with mock.patch("torch.amp.autocast") as mock_torch_cuda_amp_module:
             _test_create_evaluation_step(mock_torch_cuda_amp_module, evaluator_device="cpu")
             _test_create_evaluation_step_amp(mock_torch_cuda_amp_module, evaluator_device="cpu")
 
@@ -651,8 +651,8 @@ def test_create_supervised_evaluator_traced_on_cpu():
     _test_mocked_supervised_evaluator(evaluator_device="cpu", trace=True)
 
     # older versions didn't have the autocast method so we skip the test for older builds
-    if Version(torch.__version__) >= Version("1.6.0"):
-        with mock.patch("torch.cuda.amp.autocast") as mock_torch_cuda_amp_module:
+    if Version(torch.__version__) >= Version("1.12.0"):
+        with mock.patch("torch.amp.autocast") as mock_torch_cuda_amp_module:
             _test_create_evaluation_step(mock_torch_cuda_amp_module, evaluator_device="cpu", trace=True)
 
 
@@ -682,7 +682,7 @@ def test_create_supervised_evaluator_on_mps_with_model_on_cpu():
     _test_mocked_supervised_evaluator(evaluator_device="mps")
 
 
-@pytest.mark.skipif(Version(torch.__version__) < Version("1.6.0"), reason="Skip if < 1.6.0")
+@pytest.mark.skipif(Version(torch.__version__) < Version("1.12.0"), reason="Skip if < 1.12.0")
 @pytest.mark.skipif(not torch.cuda.is_available(), reason="Skip if no GPU")
 def test_create_supervised_evaluator_on_cuda_amp():
     model_device = evaluator_device = "cuda"
@@ -691,7 +691,7 @@ def test_create_supervised_evaluator_on_cuda_amp():
 
 
 def test_create_supervised_evaluator_amp_error(mock_torch_cuda_amp_module):
-    with pytest.raises(ImportError, match="Please install torch>=1.6.0 to use amp_mode='amp'."):
+    with pytest.raises(ImportError, match="Please install torch>=1.12.0 to use amp_mode='amp'."):
         _test_create_supervised_evaluator(amp_mode="amp")
 
 

From d2f935d14625f3e8c7ffa38628a1585fe5e76db9 Mon Sep 17 00:00:00 2001
From: vfdev <vfdev.5@gmail.com>
Date: Mon, 16 Dec 2024 15:58:46 +0100
Subject: [PATCH 09/10] Fixed failing test_roc_auc.py::test_check_compute_fn
 test (#3316)

Test is failing due to scikit-learn changed:
ValueError exception -> UndefinedMetricWarning between 1.15 and 1.16
---
 tests/ignite/metrics/test_roc_auc.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/ignite/metrics/test_roc_auc.py b/tests/ignite/metrics/test_roc_auc.py
index 1e60c480ca18..8695f188b5fb 100644
--- a/tests/ignite/metrics/test_roc_auc.py
+++ b/tests/ignite/metrics/test_roc_auc.py
@@ -4,6 +4,7 @@
 import pytest
 import sklearn
 import torch
+from sklearn.exceptions import UndefinedMetricWarning
 from sklearn.metrics import roc_auc_score
 
 import ignite.distributed as idist
@@ -112,7 +113,7 @@ def test_check_compute_fn():
     em = ROC_AUC(check_compute_fn=True)
 
     em.reset()
-    with pytest.warns(EpochMetricWarning, match=r"Probably, there can be a problem with `compute_fn`"):
+    with pytest.warns((UndefinedMetricWarning, EpochMetricWarning), match=r"Only one class.+present in y_true"):
         em.update(output)
 
     em = ROC_AUC(check_compute_fn=False)

From 06fe8bd914711e9554c823c684b065d01710df82 Mon Sep 17 00:00:00 2001
From: vfdev <vfdev.5@gmail.com>
Date: Tue, 17 Dec 2024 00:32:20 +0100
Subject: [PATCH 10/10] Update pytorch-version-tests.yml (#3315)

* Update pytorch-version-tests.yml

* Update pytorch-version-tests.yml

* Update pytorch-version-tests.yml
---
 .github/workflows/pytorch-version-tests.yml | 23 +++++++++++----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/pytorch-version-tests.yml b/.github/workflows/pytorch-version-tests.yml
index e14d46f27619..1456a045e7e6 100644
--- a/.github/workflows/pytorch-version-tests.yml
+++ b/.github/workflows/pytorch-version-tests.yml
@@ -15,24 +15,25 @@ jobs:
       max-parallel: 5
       fail-fast: false
       matrix:
-        # Here we keep python 3.8 tests until the end of the 2024 and 
-        # will drop python version and related pytorch versions
-        python-version: [3.8, 3.9, "3.10"]
+        python-version: [3.9, "3.10", "3.11"]
         pytorch-version:
-          [2.4.1, 2.3.1, 2.2.2, 2.0.1, 1.13.1, 1.12.1, 1.10.0, 1.8.1]
+          [2.4.1, 2.3.1, 2.2.2, 2.0.1, 1.13.1, 1.12.1, 1.10.0]
         exclude:
-          # disabling python 3.9 support with PyTorch 1.7.1 and 1.8.1, to stop repeated pytorch-version test fail.
-          # https://github.com/pytorch/ignite/issues/2383
-          - pytorch-version: 1.8.1
-            python-version: 3.9
-          - pytorch-version: 1.8.1
-            python-version: "3.10"
-
           - pytorch-version: 1.10.0
             python-version: "3.10"
+          - pytorch-version: 1.10.0
+            python-version: "3.11"
 
           - pytorch-version: 1.11.0
             python-version: "3.10"
+          - pytorch-version: 1.11.0
+            python-version: "3.11"
+          - pytorch-version: 1.12.1
+            python-version: "3.11"
+          # Conda fails to install cpuonly version and few cpu distributed tests are
+          # failing with unrelated errors
+          - pytorch-version: 1.13.1
+            python-version: "3.11"
 
     steps:
       - uses: actions/checkout@v4