diff --git a/thunder/tests/distributed/helper.py b/thunder/tests/distributed/helper.py index 2211531608..bc53fbe520 100644 --- a/thunder/tests/distributed/helper.py +++ b/thunder/tests/distributed/helper.py @@ -125,6 +125,7 @@ def forward(self, x): if torch.distributed.is_available(): from torch.testing._internal import common_distributed, common_utils + # note(crcrpar): How to write a test with `DDP` # Just add a method to :class:`CompileDDPTest`. The class is responsible for # - calling `torch.distributed.init_process_group` with NCCL backend @@ -201,7 +202,6 @@ def _run(cls, rank, test_name, file_name, pipe, *, fake_pg=False): torch.distributed.destroy_process_group() sys.exit(0) - # Configures PyTorch's default process group, must be called at the start of each # distributed process def init_per_process_distributed( @@ -225,7 +225,6 @@ def init_per_process_distributed( # so we want to pass the ProcessGroup explicitly return torch.distributed.distributed_c10d._get_default_group() - # Wraps a function so that it becomes one process of several executing the test # See test_native_ddp and its helper _test_native_ddp_helper below for an example # of how to use this wrapper. @@ -293,7 +292,6 @@ def error_callback(ex): return test_fn - # Creates a dataloader for a process # If sample_seed is specified then the dataloader will load tensors with the same values # on each process. @@ -325,7 +323,6 @@ def to_device(tensors: list[torch.Tensor]) -> list[torch.Tensor]: return dataloader - def run_test_no_sync_grad_accumulation( test_case: DistributedParallelTestCase, get_model_and_optimizer: Callable[[torch.device], tuple[torch.nn.Module, torch.optim.Optimizer]], diff --git a/thunder/tests/distributed/test_checkpoint.py b/thunder/tests/distributed/test_checkpoint.py index c96d8b7cf8..e3bca6c74e 100644 --- a/thunder/tests/distributed/test_checkpoint.py +++ b/thunder/tests/distributed/test_checkpoint.py @@ -5,6 +5,7 @@ import pytest import torch + if not torch.distributed.is_available(): pytest.skip(allow_module_level=True) from torch.distributed import distributed_c10d as c10d diff --git a/thunder/tests/distributed/test_ddp.py b/thunder/tests/distributed/test_ddp.py index e92e951327..861f361062 100644 --- a/thunder/tests/distributed/test_ddp.py +++ b/thunder/tests/distributed/test_ddp.py @@ -6,6 +6,7 @@ import pytest import torch import torch.distributed as tdist + if not tdist.is_available(): pytest.skip(allow_module_level=True) import torch.nn as nn diff --git a/thunder/tests/distributed/test_fsdp.py b/thunder/tests/distributed/test_fsdp.py index eeb6dc8c5f..a5716e8c9b 100644 --- a/thunder/tests/distributed/test_fsdp.py +++ b/thunder/tests/distributed/test_fsdp.py @@ -7,6 +7,7 @@ import pytest import torch import torch.distributed as tdist + if not tdist.is_available(): pytest.skip(allow_module_level=True) import torch.nn as nn diff --git a/thunder/tests/distributed/test_ops.py b/thunder/tests/distributed/test_ops.py index ed246d7300..ae7be62f7b 100644 --- a/thunder/tests/distributed/test_ops.py +++ b/thunder/tests/distributed/test_ops.py @@ -4,6 +4,7 @@ import pytest import torch + if not torch.distributed.is_available(): pytest.skip(allow_module_level=True) from torch.testing import make_tensor diff --git a/thunder/tests/distributed/test_tensor_parallel.py b/thunder/tests/distributed/test_tensor_parallel.py index 2194bebf45..af4389fbcd 100644 --- a/thunder/tests/distributed/test_tensor_parallel.py +++ b/thunder/tests/distributed/test_tensor_parallel.py @@ -3,6 +3,7 @@ import pytest import torch import torch.nn as nn + if not torch.distributed.is_available(): pytest.skip(allow_module_level=True)