diff --git a/.github/scripts/unittest-linux/run_test.sh b/.github/scripts/unittest-linux/run_test.sh index 7740932530..06e77dc6ae 100755 --- a/.github/scripts/unittest-linux/run_test.sh +++ b/.github/scripts/unittest-linux/run_test.sh @@ -27,6 +27,7 @@ fi export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MOD_unidecode=true export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MOD_inflect=true export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MOD_pytorch_lightning=true + export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MULTIGPU_CUDA=true cd test pytest torchaudio_unittest -k "not torchscript and not fairseq and not demucs ${PYTEST_K_EXTRA}" ) diff --git a/.github/workflows/unittest-linux-cpu.yml b/.github/workflows/unittest-linux-cpu.yml index a695e87a0c..8b84929165 100644 --- a/.github/workflows/unittest-linux-cpu.yml +++ b/.github/workflows/unittest-linux-cpu.yml @@ -92,5 +92,7 @@ jobs: export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_SOX_DECODER=true export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_SOX_ENCODER=true export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_FFMPEG=true + export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MULTIGPU_CUDA=true + pytest test/torchaudio_unittest -k "not torchscript and not fairseq and not demucs" -x echo "::endgroup::" diff --git a/.github/workflows/unittest-linux-gpu.yml b/.github/workflows/unittest-linux-gpu.yml index 10a48e38ff..7f1c84a58b 100644 --- a/.github/workflows/unittest-linux-gpu.yml +++ b/.github/workflows/unittest-linux-gpu.yml @@ -44,6 +44,8 @@ jobs: export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_SOX_ENCODER=true export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_FFMPEG=true export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MOD_demucs=true + export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MULTIGPU_CUDA=true + export CUBLAS_WORKSPACE_CONFIG=:16:8 # Set UPLOAD_CHANNEL diff --git a/.github/workflows/unittest-macos-cpu.yml b/.github/workflows/unittest-macos-cpu.yml index e6adb85238..d083343d8e 100644 --- a/.github/workflows/unittest-macos-cpu.yml +++ b/.github/workflows/unittest-macos-cpu.yml @@ -89,6 +89,7 @@ jobs: export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MOD_unidecode=true export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MOD_inflect=true export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MOD_pytorch_lightning=true + export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MULTIGPU_CUDA=true pytest test/torchaudio_unittest -k "not torchscript and not fairseq and not demucs and not librosa" -x echo "::endgroup::" diff --git a/.github/workflows/unittest-windows-cpu.yml b/.github/workflows/unittest-windows-cpu.yml index 3b872a4715..4a9c41d81c 100644 --- a/.github/workflows/unittest-windows-cpu.yml +++ b/.github/workflows/unittest-windows-cpu.yml @@ -54,6 +54,7 @@ jobs: export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MOD_inflect=true export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MOD_pytorch_lightning=true export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MOD_sentencepiece=true + export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MULTIGPU_CUDA=true .github/scripts/unittest-windows/setup_env.sh .github/scripts/unittest-windows/install.sh diff --git a/.github/workflows/unittest-windows-gpu.yml b/.github/workflows/unittest-windows-gpu.yml index 2b0e66c171..726c5e2137 100644 --- a/.github/workflows/unittest-windows-gpu.yml +++ b/.github/workflows/unittest-windows-gpu.yml @@ -55,6 +55,7 @@ jobs: export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MOD_inflect=true export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MOD_pytorch_lightning=true export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MOD_sentencepiece=true + export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MULTIGPU_CUDA=true .github/scripts/unittest-windows/setup_env.sh .github/scripts/unittest-windows/install.sh diff --git a/src/libtorchaudio/forced_align/gpu/compute.cu b/src/libtorchaudio/forced_align/gpu/compute.cu index cb94bf5dee..6e064c4958 100644 --- a/src/libtorchaudio/forced_align/gpu/compute.cu +++ b/src/libtorchaudio/forced_align/gpu/compute.cu @@ -122,6 +122,7 @@ void forced_align_impl( const int64_t blank, Tensor& paths) { auto device_index = logProbs.get_device_index(); + const torch::stable::accelerator::DeviceGuard device_guard(device_index); auto defaultStream = libtorchaudio::cuda::getCurrentCUDAStream(device_index); auto cpuDataTranferStream = libtorchaudio::cuda::getStreamFromPool(false, device_index); const scalar_t kNegInfinity = -std::numeric_limits::infinity(); diff --git a/test/torchaudio_unittest/common_utils/__init__.py b/test/torchaudio_unittest/common_utils/__init__.py index c464bede69..0fba71e451 100644 --- a/test/torchaudio_unittest/common_utils/__init__.py +++ b/test/torchaudio_unittest/common_utils/__init__.py @@ -18,6 +18,7 @@ skipIfNoQengine, skipIfPy310, skipIfRocm, + skipIfSingleCuda, TempDirMixin, TestBaseMixin, TorchaudioTestCase, @@ -65,6 +66,7 @@ def inject_request(self, request): "skipIfNoFFmpeg", "skipIfNoHWAccel", "skipIfPy310", + "skipIfSingleCuda", "disabledInCI", "get_wav_data", "normalize_wav", diff --git a/test/torchaudio_unittest/common_utils/case_utils.py b/test/torchaudio_unittest/common_utils/case_utils.py index e1af754b04..8aadece898 100644 --- a/test/torchaudio_unittest/common_utils/case_utils.py +++ b/test/torchaudio_unittest/common_utils/case_utils.py @@ -249,6 +249,15 @@ def skipIfNoModule(module, display_name=None): reason="Tests are failing on CI consistently. Disabled while investigating.", key="TEMPORARY_DISABLED", ) +skipIfSingleCuda = _skipIf( + not (torch.cuda.is_available() and torch.cuda.device_count() > 1), + reason=( + "CUDA is not available." + if not torch.cuda.is_available() + else f"Not a multi-GPU platform (device count is {torch.cuda.device_count()})." + ), + key="NO_MULTIGPU_CUDA", +) def skipIfNoHWAccel(name): diff --git a/test/torchaudio_unittest/functional/functional_cuda_test.py b/test/torchaudio_unittest/functional/functional_cuda_test.py index c4d7ab2975..037b053099 100644 --- a/test/torchaudio_unittest/functional/functional_cuda_test.py +++ b/test/torchaudio_unittest/functional/functional_cuda_test.py @@ -1,7 +1,7 @@ import unittest import torch -from torchaudio_unittest.common_utils import PytorchTestCase, skipIfNoCuda +from torchaudio_unittest.common_utils import PytorchTestCase, skipIfNoCuda, skipIfSingleCuda from .functional_impl import Functional, FunctionalCUDAOnly @@ -28,6 +28,12 @@ class TestFunctionalCUDAOnlyFloat32(FunctionalCUDAOnly, PytorchTestCase): device = torch.device("cuda") +@skipIfSingleCuda +class TestFunctionalMultiGPUCUDAOnlyFloat32(FunctionalCUDAOnly, PytorchTestCase): + dtype = torch.float32 + device = torch.device("cuda:1") + + @skipIfNoCuda class TestFunctionalCUDAOnlyFloat64(FunctionalCUDAOnly, PytorchTestCase): dtype = torch.float64