From f1c75741bfcd41723f96730771d43f51b79b3f42 Mon Sep 17 00:00:00 2001 From: Aarni Koskela Date: Wed, 24 Jan 2024 21:20:42 +0200 Subject: [PATCH] Tests: improve CUDA support detection (#985) * implicitly skip any test that implicitly uses CUDA on a non-CUDA box * add a `requires_cuda` fixture --- tests/conftest.py | 19 +++++++++++++++++++ tests/test_autograd.py | 4 ---- tests/test_cuda_setup_evaluator.py | 4 ++-- tests/test_functional.py | 7 ++++--- tests/test_generation.py | 2 +- tests/test_linear4bit.py | 1 - tests/test_linear8bitlt.py | 2 -- tests/test_modules.py | 1 - 8 files changed, 26 insertions(+), 14 deletions(-) create mode 100644 tests/conftest.py diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 000000000..0b4b91225 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,19 @@ +import pytest +import torch + + +def pytest_runtest_call(item): + try: + item.runtest() + except AssertionError as ae: + if str(ae) == "Torch not compiled with CUDA enabled": + pytest.skip("Torch not compiled with CUDA enabled") + raise + + +@pytest.fixture(scope="session") +def requires_cuda() -> bool: + cuda_available = torch.cuda.is_available() + if not cuda_available: + pytest.skip("CUDA is required") + return cuda_available diff --git a/tests/test_autograd.py b/tests/test_autograd.py index 803fde145..27b010105 100644 --- a/tests/test_autograd.py +++ b/tests/test_autograd.py @@ -40,7 +40,6 @@ ids=names, ) def test_matmul(dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose): - if not torch.cuda.is_available(): pytest.skip('No GPU found.') if dim2 > 0: dim2 = dim2 - (dim2 % 16) dim3 = dim3 - (dim3 % 16) @@ -307,7 +306,6 @@ def test_matmullt( has_fp16_weights, has_bias ): - if not torch.cuda.is_available(): pytest.skip('No GPU found.') dimA = (dim2, dim3) if not transpose[0] else (dim3, dim2) dimB = (dim3, dim4) if not transpose[1] else (dim4, dim3) outlier_dim = torch.randint(0, dimA[1], size=(dimA[1] // 8,), device="cuda") @@ -461,7 +459,6 @@ def test_matmullt( values = list(product(dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose, has_bias, compress_statistics, quant_type)) str_values = list(product(dim1, dim2, dim3, dim4, str_funcs, dtype, req_grad_str, str_transpose, has_bias, compress_statistics, quant_type)) names = ["dim1_{}_dim2_{}_dim3_{}_dim4_{}_func_{}_dtype_{}_requires_grad_{}_transpose_{}_has_bias_{}_compress_statistics_{}_quant_type_{}".format(*vals) for vals in str_values] -@pytest.mark.skipif(not torch.cuda.is_available(), reason="this test requires a GPU") @pytest.mark.parametrize( "dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose, has_bias, compress_statistics, quant_type", values, ids=names) def test_matmul_4bit( dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose, has_bias, compress_statistics, quant_type): dimA = (dim2, dim3) if not transpose[0] else (dim3, dim2) @@ -551,7 +548,6 @@ def test_matmul_4bit( dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose, values = list(product(dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose)) str_values = list(product(dim1, dim2, dim3, dim4, str_funcs, dtype, req_grad_str, str_transpose)) names = ["dim1_{}_dim2_{}_dim3_{}_dim4_{}_func_{}_dtype_{}_requires_grad_{}_transpose_{}".format(*vals) for vals in str_values] -@pytest.mark.skipif(not torch.cuda.is_available(), reason="this test requires a GPU") @pytest.mark.parametrize( "dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose", values, ids=names) def test_matmul_fp8( dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose): dimA = (dim2, dim3) if not transpose[0] else (dim3, dim2) diff --git a/tests/test_cuda_setup_evaluator.py b/tests/test_cuda_setup_evaluator.py index aef9ae6a3..596d0a030 100644 --- a/tests/test_cuda_setup_evaluator.py +++ b/tests/test_cuda_setup_evaluator.py @@ -5,12 +5,12 @@ # hardcoded test. Not good, but a sanity check for now # TODO: improve this -def test_manual_override(): +def test_manual_override(requires_cuda): manual_cuda_path = str(Path('/mmfs1/home/dettmers/data/local/cuda-12.2')) pytorch_version = torch.version.cuda.replace('.', '') - assert pytorch_version != 122 + assert pytorch_version != 122 # TODO: this will never be true... os.environ['CUDA_HOME']='{manual_cuda_path}' os.environ['BNB_CUDA_VERSION']='122' diff --git a/tests/test_functional.py b/tests/test_functional.py index f314dc6e2..970b4dbdb 100644 --- a/tests/test_functional.py +++ b/tests/test_functional.py @@ -617,7 +617,10 @@ def test_nvidia_transform(dim1, dim2, dim3, dims, dtype, orderA, orderOut, trans return if dtype == torch.int32 and out_order != "col32": return - func = F.get_transform_func(dtype, orderA, orderOut, transpose) + try: + func = F.get_transform_func(dtype, orderA, orderOut, transpose) + except ValueError as ve: + pytest.skip(str(ve)) # skip if not supported if dims == 2: A = torch.randint(-128, 127, size=(dim1, dim2), device="cuda").to(dtype) @@ -2278,7 +2281,6 @@ def test_fp4_quant(dtype): assert relerr.item() < 0.28 -@pytest.mark.skipif(not torch.cuda.is_available(), reason="this test requires a GPU") @pytest.mark.parametrize("quant_type", ['fp4', 'nf4']) def test_4bit_compressed_stats(quant_type): for blocksize in [128, 64]: @@ -2317,7 +2319,6 @@ def test_4bit_compressed_stats(quant_type): -@pytest.mark.skipif(not torch.cuda.is_available(), reason="this test requires a GPU") #@pytest.mark.parametrize("quant_type", ['fp4', 'nf4']) @pytest.mark.parametrize("quant_type", ['nf4']) def test_bench_4bit_dequant(quant_type): diff --git a/tests/test_generation.py b/tests/test_generation.py index b4c1a8c6e..ecafdddf8 100644 --- a/tests/test_generation.py +++ b/tests/test_generation.py @@ -79,7 +79,7 @@ def model_and_tokenizer(request): @pytest.mark.parametrize("DQ", [True, False], ids=['DQ_True', 'DQ_False']) @pytest.mark.parametrize("inference_kernel", [True, False], ids=['inference_kernel_True', 'inference_kernel_False']) #@pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16, torch.float32], ids=['fp16', 'bf16', 'fp32']) -def test_pi(model_and_tokenizer, inference_kernel, DQ): +def test_pi(requires_cuda, model_and_tokenizer, inference_kernel, DQ): print('') dtype = torch.float16 diff --git a/tests/test_linear4bit.py b/tests/test_linear4bit.py index f6be79a84..478255eee 100644 --- a/tests/test_linear4bit.py +++ b/tests/test_linear4bit.py @@ -15,7 +15,6 @@ 'float32': torch.float32 } -@pytest.mark.skipif(not torch.cuda.is_available(), reason="this test requires a GPU") @pytest.mark.parametrize( "quant_type, compress_statistics, bias, quant_storage", list(product(["nf4", "fp4"], [False, True], [False, True], ['uint8', 'float16', 'bfloat16', 'float32'])), diff --git a/tests/test_linear8bitlt.py b/tests/test_linear8bitlt.py index 37f7af9cb..8904aaf1b 100644 --- a/tests/test_linear8bitlt.py +++ b/tests/test_linear8bitlt.py @@ -33,7 +33,6 @@ def test_layout_exact_match(): assert torch.all(torch.eq(restored_x, x)) -@pytest.mark.skipif(not torch.cuda.is_available(), reason="this test requires a GPU") def test_linear_no_igemmlt(): linear = torch.nn.Linear(1024, 3072) x = torch.randn(3, 1024, dtype=torch.half) @@ -68,7 +67,6 @@ def test_linear_no_igemmlt(): assert linear_custom.state.CxB is None -@pytest.mark.skipif(not torch.cuda.is_available(), reason="this test requires a GPU") @pytest.mark.parametrize("has_fp16_weights, serialize_before_forward, deserialize_before_cuda, force_no_igemmlt", list(product([False, True], [False, True], [False, True], [False, True]))) def test_linear_serialization(has_fp16_weights, serialize_before_forward, deserialize_before_cuda, force_no_igemmlt): diff --git a/tests/test_modules.py b/tests/test_modules.py index cb4368a09..cabd7cf54 100644 --- a/tests/test_modules.py +++ b/tests/test_modules.py @@ -520,7 +520,6 @@ def test_linear_kbit_fp32_bias(module): modules.append(lambda d1, d2: bnb.nn.LinearFP4(d1, d2, compute_dtype=torch.float16)) modules.append(lambda d1, d2: bnb.nn.LinearFP4(d1, d2, compute_dtype=torch.bfloat16)) names = ['Int8Lt', '4bit', 'FP4', 'NF4', 'FP4+C', 'NF4+C', 'NF4+fp32', 'NF4+fp16', 'NF4+bf16'] -@pytest.mark.skipif(not torch.cuda.is_available(), reason="this test requires a GPU") @pytest.mark.parametrize("module", modules, ids=names) def test_kbit_backprop(module): b = 17