diff --git a/thunder/tests/distributed/helper.py b/thunder/tests/distributed/helper.py index 55a3e3cc51..de76e59e4a 100644 --- a/thunder/tests/distributed/helper.py +++ b/thunder/tests/distributed/helper.py @@ -112,6 +112,11 @@ def world_size(self) -> int: def init_method(self): return f"{common_utils.FILE_SCHEMA}{self.file_name}" + @property + def destroy_pg_upon_exit(self) -> bool: + # Overriding base test class: do not auto destroy PG upon exit. + return False + @classmethod def _run(cls, rank, test_name, file_name, pipe, *, fake_pg=False): assert not fake_pg, "Not yet supported here..." @@ -130,14 +135,10 @@ def _run(cls, rank, test_name, file_name, pipe, *, fake_pg=False): local_rank = self.rank % torch.cuda.device_count() torch.cuda.set_device(local_rank) os.environ["LOCAL_RANK"] = str(local_rank) - if "destroy_process_group" in inspect.signature(self.run_test).parameters: - run_test_kwargs = {"destroy_process_group": False} - else: - run_test_kwargs = {} torch.distributed.barrier() try: - self.run_test(test_name, pipe, **run_test_kwargs) + self.run_test(test_name, pipe) except Exception: raise finally: diff --git a/thunder/tests/test_dynamo.py b/thunder/tests/test_dynamo.py index 42299c149c..233b126d94 100644 --- a/thunder/tests/test_dynamo.py +++ b/thunder/tests/test_dynamo.py @@ -20,6 +20,7 @@ DynamoThunderExecutor, IS_WINDOWS, requiresCUDA, + version_between, ) from thunder.tests.make_tensor import make_tensor @@ -450,6 +451,10 @@ def func(x): LooseVersion(torch.__version__) < LooseVersion("2.6.0"), reason="Skip until the Torch bug is fixed - https://github.com/pytorch/pytorch/pull/139275", ), + pytest.mark.skipif( + version_between(torch.__version__, min_ver="2.6.0a0", max_ver="2.6.0a99"), + reason="https://github.com/Lightning-AI/lightning-thunder/issues/1471", + ), ), ) @requiresCUDA diff --git a/thunder/tests/test_networks.py b/thunder/tests/test_networks.py index bec0d7f0d4..287314c18c 100644 --- a/thunder/tests/test_networks.py +++ b/thunder/tests/test_networks.py @@ -11,7 +11,13 @@ import thunder import thunder.torch as ttorch -from thunder.tests.framework import instantiate, requiresCUDA, DynamoThunderExecutor, _all_test_executors +from thunder.tests.framework import ( + instantiate, + requiresCUDA, + DynamoThunderExecutor, + _all_test_executors, + version_between, +) import thunder.tests.nanogpt_model as nanogpt_model import thunder.tests.hf_bart_self_attn as hf_bart_self_attn @@ -214,7 +220,16 @@ def test_nanogpt_mlp(executor, device, dtype): assert_close(torch_result, thunder_result) -@instantiate(dtypes=(thunder.float32,), executors=all_test_executors_and_dynamo) +@instantiate( + dtypes=(thunder.float32,), + executors=all_test_executors_and_dynamo, + decorators=( + pytest.mark.skipif( + version_between(torch.__version__, min_ver="2.6.0a0", max_ver="2.6.0a99"), + reason="https://github.com/Lightning-AI/lightning-thunder/issues/1471", + ), + ), +) def test_nanogpt_gelu(executor, device, dtype): tdtype = ttorch.to_torch_dtype(dtype) make = partial(make_tensor, dtype=tdtype, device=device) @@ -269,6 +284,10 @@ def dummy(*args): assert_close(actual, expected) +@pytest.mark.skipif( + version_between(torch.__version__, min_ver="2.6.0a0", max_ver="2.6.0a99"), + reason="https://github.com/bitsandbytes-foundation/bitsandbytes/pull/1413", +) @requiresCUDA def test_quantization(): try: @@ -349,6 +368,10 @@ def test_quantization(): assert_close(v, sd2[k]) +@pytest.mark.skipif( + version_between(torch.__version__, min_ver="2.6.0a0", max_ver="2.6.0a99"), + reason="https://github.com/Lightning-AI/lightning-thunder/issues/1471", +) @thunder.tests.framework.requiresCUDA def test_thunderfx_mistral_nemo_small(): """ @@ -400,6 +423,10 @@ def test_thunderfx_mistral_nemo_small(): assert th_backend.subgraph_infos, "Should have at least 1 subgraph" +@pytest.mark.skipif( + version_between(torch.__version__, min_ver="2.6.0a0", max_ver="2.6.0a99"), + reason="https://github.com/Lightning-AI/lightning-thunder/issues/1471", +) @thunder.tests.framework.requiresCUDA @pytest.mark.parametrize("model_id", ["Qwen/Qwen2.5-7B-Instruct", "microsoft/Phi-3-mini-128k-instruct"]) def test_hf_for_nemo(model_id): diff --git a/thunder/tests/test_recipes.py b/thunder/tests/test_recipes.py index def8a590a5..eec3df8cb0 100644 --- a/thunder/tests/test_recipes.py +++ b/thunder/tests/test_recipes.py @@ -1,8 +1,10 @@ import thunder import transformers import torch +import pytest from torch.testing import assert_close, make_tensor +from thunder.tests.framework import version_between def test_recipe_basic_bert(): @@ -22,6 +24,10 @@ def test_recipe_basic_bert(): assert_close(actual, expected) +@pytest.mark.skipif( + version_between(torch.__version__, min_ver="2.6.0a0", max_ver="2.6.0a99"), + reason="https://github.com/Lightning-AI/lightning-thunder/issues/1471", +) def test_recipe_basic_bert_dynamo(): bert = transformers.BertForSequenceClassification(transformers.BertConfig()) del bert.bert.encoder.layer[1:] diff --git a/thunder/tests/test_transforms.py b/thunder/tests/test_transforms.py index 750894ab31..7bdfcef32f 100644 --- a/thunder/tests/test_transforms.py +++ b/thunder/tests/test_transforms.py @@ -5,7 +5,7 @@ import thunder from thunder.dev_utils.nvtx_profile_transform import NvtxProfileTransform, nvtx_push, nvtx_pop -from thunder.tests.framework import requiresCUDA +from thunder.tests.framework import requiresCUDA, version_between @requiresCUDA @@ -112,6 +112,10 @@ def test_materialization(): assert_close(actual, expected, rtol=1e-2, atol=1e-2) +@pytest.mark.skipif( + version_between(torch.__version__, min_ver="2.6.0a0", max_ver="2.6.0a99"), + reason="https://github.com/bitsandbytes-foundation/bitsandbytes/pull/1413", +) @pytest.mark.skipif(not package_available("bitsandbytes"), reason="`bitsandbytes` is not available") @requiresCUDA def test_quantization_on_meta(): @@ -185,7 +189,14 @@ def test_quantization_on_meta(): assert_close(actual, actual2) -@pytest.mark.skipif(not package_available("bitsandbytes"), reason="`bitsandbytes` is not available") +@pytest.mark.skipif( + version_between(torch.__version__, min_ver="2.6.0a0", max_ver="2.6.0a99"), + reason="https://github.com/bitsandbytes-foundation/bitsandbytes/pull/1413", +) +@pytest.mark.skipif( + not package_available("bitsandbytes"), + reason="`bitsandbytes` is not available", +) @requiresCUDA def test_nvfuser_cse(): with torch.device("cuda"): @@ -289,6 +300,10 @@ def f(x): jf(weights) +@pytest.mark.skipif( + version_between(torch.__version__, min_ver="2.6.0a0", max_ver="2.6.0a99"), + reason="https://github.com/bitsandbytes-foundation/bitsandbytes/pull/1413", +) @pytest.mark.skipif(not package_available("bitsandbytes"), reason="`bitsandbytes` is not available") @requiresCUDA def test_materialization_init():