From 98616f4fd1ecd971580412487451c7244cc77a21 Mon Sep 17 00:00:00 2001 From: Zhitao Yu Date: Mon, 24 Nov 2025 15:18:19 -0800 Subject: [PATCH 1/3] Update CVCUDA tests for horizontal and vertical flip and make changes according to the comments --- test/test_transforms_v2.py | 103 +++++++++++++++--- torchvision/transforms/v2/_geometry.py | 12 +- .../transforms/v2/functional/_geometry.py | 24 +++- 3 files changed, 119 insertions(+), 20 deletions(-) diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py index 670a9d00ffb..d7684d7c9a2 100644 --- a/test/test_transforms_v2.py +++ b/test/test_transforms_v2.py @@ -1240,6 +1240,10 @@ def test_kernel_video(self): make_image_tensor, make_image_pil, make_image, + pytest.param( + functools.partial(make_image_cvcuda, batch_dims=(1,)), + marks=pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="CVCUDA is not available"), + ), make_bounding_boxes, make_segmentation_mask, make_video, @@ -1255,6 +1259,11 @@ def test_functional(self, make_input): (F.horizontal_flip_image, torch.Tensor), (F._geometry._horizontal_flip_image_pil, PIL.Image.Image), (F.horizontal_flip_image, tv_tensors.Image), + pytest.param( + F._geometry._horizontal_flip_image_cvcuda, + cvcuda.Tensor, + marks=pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="CVCUDA is not available"), + ), (F.horizontal_flip_bounding_boxes, tv_tensors.BoundingBoxes), (F.horizontal_flip_mask, tv_tensors.Mask), (F.horizontal_flip_video, tv_tensors.Video), @@ -1270,6 +1279,10 @@ def test_functional_signature(self, kernel, input_type): make_image_tensor, make_image_pil, make_image, + pytest.param( + functools.partial(make_image_cvcuda, batch_dims=(1,)), + marks=pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="CVCUDA is not available"), + ), make_bounding_boxes, make_segmentation_mask, make_video, @@ -1283,13 +1296,32 @@ def test_transform(self, make_input, device): @pytest.mark.parametrize( "fn", [F.horizontal_flip, transform_cls_to_functional(transforms.RandomHorizontalFlip, p=1)] ) - def test_image_correctness(self, fn): - image = make_image(dtype=torch.uint8, device="cpu") - actual = fn(image) - expected = F.to_image(F.horizontal_flip(F.to_pil_image(image))) + @pytest.mark.parametrize( + "make_input", + [ + make_image, + pytest.param( + functools.partial(make_image_cvcuda, batch_dims=(1,)), + marks=pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="CVCUDA is not available"), + ), + ], + ) - torch.testing.assert_close(actual, expected) + def test_image_correctness(self, fn, make_input): + image = make_input() + actual = fn(image) + if isinstance(image, cvcuda.Tensor): + # For CVCUDA input + expected = F.horizontal_flip(F.cvcuda_to_tensor(image)) + print("actual is ", F.cvcuda_to_tensor(actual)) + print("expected is ", expected) + assert_equal(F.cvcuda_to_tensor(actual), expected) + + else: + # For PIL/regular image input + expected = F.to_image(F.horizontal_flip(F.to_pil_image(image))) + assert_equal(actual, expected) def _reference_horizontal_flip_bounding_boxes(self, bounding_boxes: tv_tensors.BoundingBoxes): affine_matrix = np.array( @@ -1345,6 +1377,10 @@ def test_keypoints_correctness(self, fn): make_image_tensor, make_image_pil, make_image, + pytest.param( + functools.partial(make_image_cvcuda, batch_dims=(1,)), + marks=pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="CVCUDA is not available"), + ), make_bounding_boxes, make_segmentation_mask, make_video, @@ -1354,12 +1390,13 @@ def test_keypoints_correctness(self, fn): @pytest.mark.parametrize("device", cpu_and_cuda()) def test_transform_noop(self, make_input, device): input = make_input(device=device) - transform = transforms.RandomHorizontalFlip(p=0) - output = transform(input) + if isinstance(input, cvcuda.Tensor): + assert_equal(F.cvcuda_to_tensor(output), F.cvcuda_to_tensor(input)) + else: + assert_equal(output, input) - assert_equal(output, input) class TestAffine: @@ -1856,6 +1893,10 @@ def test_kernel_video(self): make_image_tensor, make_image_pil, make_image, + pytest.param( + functools.partial(make_image_cvcuda, batch_dims=(1,)), + marks=pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="CVCUDA is not available"), + ), make_bounding_boxes, make_segmentation_mask, make_video, @@ -1871,6 +1912,11 @@ def test_functional(self, make_input): (F.vertical_flip_image, torch.Tensor), (F._geometry._vertical_flip_image_pil, PIL.Image.Image), (F.vertical_flip_image, tv_tensors.Image), + pytest.param( + F._geometry._vertical_flip_image_cvcuda, + cvcuda.Tensor, + marks=pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="CVCUDA is not available"), + ), (F.vertical_flip_bounding_boxes, tv_tensors.BoundingBoxes), (F.vertical_flip_mask, tv_tensors.Mask), (F.vertical_flip_video, tv_tensors.Video), @@ -1886,6 +1932,10 @@ def test_functional_signature(self, kernel, input_type): make_image_tensor, make_image_pil, make_image, + pytest.param( + functools.partial(make_image_cvcuda, batch_dims=(1,)), + marks=pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="CVCUDA is not available"), + ), make_bounding_boxes, make_segmentation_mask, make_video, @@ -1897,13 +1947,28 @@ def test_transform(self, make_input, device): check_transform(transforms.RandomVerticalFlip(p=1), make_input(device=device)) @pytest.mark.parametrize("fn", [F.vertical_flip, transform_cls_to_functional(transforms.RandomVerticalFlip, p=1)]) - def test_image_correctness(self, fn): - image = make_image(dtype=torch.uint8, device="cpu") + @pytest.mark.parametrize( + "make_input", + [ + make_image, + pytest.param( + functools.partial(make_image_cvcuda, batch_dims=(1,)), + marks=pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="CVCUDA is not available"), + ), + ], + ) + def test_image_correctness(self, fn, make_input): + image = make_input() actual = fn(image) - expected = F.to_image(F.vertical_flip(F.to_pil_image(image))) - - torch.testing.assert_close(actual, expected) + if isinstance(image, cvcuda.Tensor): + # For CVCUDA input + expected = F.vertical_flip(F.cvcuda_to_tensor(image)) + assert_equal(F.cvcuda_to_tensor(actual), expected) + else: + # For PIL/regular image input + expected = F.to_image(F.vertical_flip(F.to_pil_image(image))) + assert_equal(actual, expected) def _reference_vertical_flip_bounding_boxes(self, bounding_boxes: tv_tensors.BoundingBoxes): affine_matrix = np.array( @@ -1955,6 +2020,10 @@ def test_keypoints_correctness(self, fn): make_image_tensor, make_image_pil, make_image, + pytest.param( + functools.partial(make_image_cvcuda, batch_dims=(1,)), + marks=pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="CVCUDA is not available"), + ), make_bounding_boxes, make_segmentation_mask, make_video, @@ -1964,12 +2033,12 @@ def test_keypoints_correctness(self, fn): @pytest.mark.parametrize("device", cpu_and_cuda()) def test_transform_noop(self, make_input, device): input = make_input(device=device) - transform = transforms.RandomVerticalFlip(p=0) - output = transform(input) - - assert_equal(output, input) + if isinstance(input, cvcuda.Tensor): + assert_equal(F.cvcuda_to_tensor(output), F.cvcuda_to_tensor(input)) + else: + assert_equal(output, input) class TestRotate: diff --git a/torchvision/transforms/v2/_geometry.py b/torchvision/transforms/v2/_geometry.py index 1418a6b4953..bef6894de1b 100644 --- a/torchvision/transforms/v2/_geometry.py +++ b/torchvision/transforms/v2/_geometry.py @@ -11,7 +11,7 @@ from torchvision.ops.boxes import box_iou from torchvision.transforms.functional import _get_perspective_coeffs from torchvision.transforms.v2 import functional as F, InterpolationMode, Transform -from torchvision.transforms.v2.functional._utils import _FillType +from torchvision.transforms.v2.functional._utils import _FillType, _import_cvcuda, _is_cvcuda_available from ._transform import _RandomApplyTransform from ._utils import ( @@ -30,6 +30,9 @@ query_size, ) +CVCUDA_AVAILABLE = _is_cvcuda_available() +if CVCUDA_AVAILABLE: + cvcuda = _import_cvcuda() class RandomHorizontalFlip(_RandomApplyTransform): """Horizontally flip the input with a given probability. @@ -45,6 +48,9 @@ class RandomHorizontalFlip(_RandomApplyTransform): _v1_transform_cls = _transforms.RandomHorizontalFlip + if CVCUDA_AVAILABLE: + _transformed_types = (torch.Tensor, PIL.Image.Image, cvcuda.Tensor) + def transform(self, inpt: Any, params: dict[str, Any]) -> Any: return self._call_kernel(F.horizontal_flip, inpt) @@ -63,6 +69,10 @@ class RandomVerticalFlip(_RandomApplyTransform): _v1_transform_cls = _transforms.RandomVerticalFlip + if CVCUDA_AVAILABLE: + _transformed_types = (torch.Tensor, PIL.Image.Image, cvcuda.Tensor) + + def transform(self, inpt: Any, params: dict[str, Any]) -> Any: return self._call_kernel(F.vertical_flip, inpt) diff --git a/torchvision/transforms/v2/functional/_geometry.py b/torchvision/transforms/v2/functional/_geometry.py index 4fcb7fabe0d..82ac3a95bb5 100644 --- a/torchvision/transforms/v2/functional/_geometry.py +++ b/torchvision/transforms/v2/functional/_geometry.py @@ -2,7 +2,7 @@ import numbers import warnings from collections.abc import Sequence -from typing import Any, Optional, Union +from typing import Any, Optional, TYPE_CHECKING, Union import PIL.Image import torch @@ -26,7 +26,13 @@ from ._meta import _get_size_image_pil, clamp_bounding_boxes, convert_bounding_box_format -from ._utils import _FillTypeJIT, _get_kernel, _register_five_ten_crop_kernel_internal, _register_kernel_internal +from ._utils import _FillTypeJIT, _get_kernel, _import_cvcuda, _is_cvcuda_available, _register_five_ten_crop_kernel_internal, _register_kernel_internal + +CVCUDA_AVAILABLE = _is_cvcuda_available() +if TYPE_CHECKING: + import cvcuda +if CVCUDA_AVAILABLE: + cvcuda = _import_cvcuda() def _check_interpolation(interpolation: Union[InterpolationMode, int]) -> InterpolationMode: @@ -61,6 +67,12 @@ def horizontal_flip_image(image: torch.Tensor) -> torch.Tensor: def _horizontal_flip_image_pil(image: PIL.Image.Image) -> PIL.Image.Image: return _FP.hflip(image) +def _horizontal_flip_image_cvcuda(image: "cvcuda.Tensor") -> "cvcuda.Tensor": + return _import_cvcuda().flip(image, flipCode=1) + + +if CVCUDA_AVAILABLE: + _horizontal_flip_image_cvcuda_registered = _register_kernel_internal(horizontal_flip, _import_cvcuda().Tensor)(_horizontal_flip_image_cvcuda) @_register_kernel_internal(horizontal_flip, tv_tensors.Mask) def horizontal_flip_mask(mask: torch.Tensor) -> torch.Tensor: @@ -150,6 +162,14 @@ def _vertical_flip_image_pil(image: PIL.Image.Image) -> PIL.Image.Image: return _FP.vflip(image) +def _vertical_flip_image_cvcuda(image: "cvcuda.Tensor") -> "cvcuda.Tensor": + return _import_cvcuda().flip(image, flipCode=0) + + +if CVCUDA_AVAILABLE: + _vertical_flip_image_cvcuda_registered = _register_kernel_internal(vertical_flip, _import_cvcuda().Tensor)(_vertical_flip_image_cvcuda) + + @_register_kernel_internal(vertical_flip, tv_tensors.Mask) def vertical_flip_mask(mask: torch.Tensor) -> torch.Tensor: return vertical_flip_image(mask) From 42fcc4114a1c1838591ddb3e1a3121084775d450 Mon Sep 17 00:00:00 2001 From: Zhitao Yu Date: Wed, 26 Nov 2025 02:17:14 -0800 Subject: [PATCH 2/3] WIP: cvcuda flip transforms - pending tech lead review --- test/test_transforms_v2.py | 6 ------ torchvision/transforms/v2/_geometry.py | 8 +------- torchvision/transforms/v2/_transform.py | 6 +++++- .../transforms/v2/functional/_geometry.py | 19 ++++++++++++++++--- 4 files changed, 22 insertions(+), 17 deletions(-) diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py index d7684d7c9a2..f15faafebf8 100644 --- a/test/test_transforms_v2.py +++ b/test/test_transforms_v2.py @@ -1312,14 +1312,10 @@ def test_image_correctness(self, fn, make_input): image = make_input() actual = fn(image) if isinstance(image, cvcuda.Tensor): - # For CVCUDA input expected = F.horizontal_flip(F.cvcuda_to_tensor(image)) - print("actual is ", F.cvcuda_to_tensor(actual)) - print("expected is ", expected) assert_equal(F.cvcuda_to_tensor(actual), expected) else: - # For PIL/regular image input expected = F.to_image(F.horizontal_flip(F.to_pil_image(image))) assert_equal(actual, expected) @@ -1962,11 +1958,9 @@ def test_image_correctness(self, fn, make_input): image = make_input() actual = fn(image) if isinstance(image, cvcuda.Tensor): - # For CVCUDA input expected = F.vertical_flip(F.cvcuda_to_tensor(image)) assert_equal(F.cvcuda_to_tensor(actual), expected) else: - # For PIL/regular image input expected = F.to_image(F.vertical_flip(F.to_pil_image(image))) assert_equal(actual, expected) diff --git a/torchvision/transforms/v2/_geometry.py b/torchvision/transforms/v2/_geometry.py index bef6894de1b..cbf3fae6982 100644 --- a/torchvision/transforms/v2/_geometry.py +++ b/torchvision/transforms/v2/_geometry.py @@ -34,6 +34,7 @@ if CVCUDA_AVAILABLE: cvcuda = _import_cvcuda() + class RandomHorizontalFlip(_RandomApplyTransform): """Horizontally flip the input with a given probability. @@ -48,9 +49,6 @@ class RandomHorizontalFlip(_RandomApplyTransform): _v1_transform_cls = _transforms.RandomHorizontalFlip - if CVCUDA_AVAILABLE: - _transformed_types = (torch.Tensor, PIL.Image.Image, cvcuda.Tensor) - def transform(self, inpt: Any, params: dict[str, Any]) -> Any: return self._call_kernel(F.horizontal_flip, inpt) @@ -69,10 +67,6 @@ class RandomVerticalFlip(_RandomApplyTransform): _v1_transform_cls = _transforms.RandomVerticalFlip - if CVCUDA_AVAILABLE: - _transformed_types = (torch.Tensor, PIL.Image.Image, cvcuda.Tensor) - - def transform(self, inpt: Any, params: dict[str, Any]) -> Any: return self._call_kernel(F.vertical_flip, inpt) diff --git a/torchvision/transforms/v2/_transform.py b/torchvision/transforms/v2/_transform.py index ac84fcb6c82..28297e9e4f2 100644 --- a/torchvision/transforms/v2/_transform.py +++ b/torchvision/transforms/v2/_transform.py @@ -12,7 +12,8 @@ from torchvision.utils import _log_api_usage_once from .functional._utils import _get_kernel - +from torchvision.transforms.v2.functional._utils import _import_cvcuda, _is_cvcuda_available +CVCUDA_AVAILABLE = _is_cvcuda_available() class Transform(nn.Module): """Base class to implement your own v2 transforms. @@ -24,6 +25,9 @@ class Transform(nn.Module): # Class attribute defining transformed types. Other types are passed-through without any transformation # We support both Types and callables that are able to do further checks on the type of the input. _transformed_types: tuple[type | Callable[[Any], bool], ...] = (torch.Tensor, PIL.Image.Image) + if CVCUDA_AVAILABLE: + _transformed_types += (_import_cvcuda().Tensor,) + def __init__(self) -> None: super().__init__() diff --git a/torchvision/transforms/v2/functional/_geometry.py b/torchvision/transforms/v2/functional/_geometry.py index 82ac3a95bb5..d0e76cdc358 100644 --- a/torchvision/transforms/v2/functional/_geometry.py +++ b/torchvision/transforms/v2/functional/_geometry.py @@ -26,7 +26,14 @@ from ._meta import _get_size_image_pil, clamp_bounding_boxes, convert_bounding_box_format -from ._utils import _FillTypeJIT, _get_kernel, _import_cvcuda, _is_cvcuda_available, _register_five_ten_crop_kernel_internal, _register_kernel_internal +from ._utils import ( + _FillTypeJIT, + _get_kernel, + _import_cvcuda, + _is_cvcuda_available, + _register_five_ten_crop_kernel_internal, + _register_kernel_internal, +) CVCUDA_AVAILABLE = _is_cvcuda_available() if TYPE_CHECKING: @@ -67,12 +74,16 @@ def horizontal_flip_image(image: torch.Tensor) -> torch.Tensor: def _horizontal_flip_image_pil(image: PIL.Image.Image) -> PIL.Image.Image: return _FP.hflip(image) + def _horizontal_flip_image_cvcuda(image: "cvcuda.Tensor") -> "cvcuda.Tensor": return _import_cvcuda().flip(image, flipCode=1) if CVCUDA_AVAILABLE: - _horizontal_flip_image_cvcuda_registered = _register_kernel_internal(horizontal_flip, _import_cvcuda().Tensor)(_horizontal_flip_image_cvcuda) + _register_kernel_internal(horizontal_flip, _import_cvcuda().Tensor)( + _horizontal_flip_image_cvcuda + ) + @_register_kernel_internal(horizontal_flip, tv_tensors.Mask) def horizontal_flip_mask(mask: torch.Tensor) -> torch.Tensor: @@ -167,7 +178,9 @@ def _vertical_flip_image_cvcuda(image: "cvcuda.Tensor") -> "cvcuda.Tensor": if CVCUDA_AVAILABLE: - _vertical_flip_image_cvcuda_registered = _register_kernel_internal(vertical_flip, _import_cvcuda().Tensor)(_vertical_flip_image_cvcuda) + _register_kernel_internal(vertical_flip, _import_cvcuda().Tensor)( + _vertical_flip_image_cvcuda + ) @_register_kernel_internal(vertical_flip, tv_tensors.Mask) From 9423b4d97cb4b28407bd70272d41cb87f350cf88 Mon Sep 17 00:00:00 2001 From: Zhitao Yu Date: Thu, 27 Nov 2025 03:31:47 -0800 Subject: [PATCH 3/3] Address review comments from Nov 26th --- test/common_utils.py | 42 ++++++++++++++++++---- test/test_transforms_v2.py | 47 +++++++++++-------------- torchvision/transforms/v2/_geometry.py | 6 ++++ torchvision/transforms/v2/_transform.py | 6 ++-- 4 files changed, 64 insertions(+), 37 deletions(-) diff --git a/test/common_utils.py b/test/common_utils.py index 8c3c9dd58a8..6bd585d394d 100644 --- a/test/common_utils.py +++ b/test/common_utils.py @@ -188,7 +188,12 @@ def _assert_equal_tensor_to_pil(tensor, pil_image, msg=None): def _assert_approx_equal_tensor_to_pil( - tensor, pil_image, tol=1e-5, msg=None, agg_method="mean", allowed_percentage_diff=None + tensor, + pil_image, + tol=1e-5, + msg=None, + agg_method="mean", + allowed_percentage_diff=None, ): # FIXME: this is handled automatically by `assert_close` below. Let's remove this in favor of it # TODO: we could just merge this into _assert_equal_tensor_to_pil @@ -284,8 +289,29 @@ def __init__( mae=False, **other_parameters, ): - if all(isinstance(input, PIL.Image.Image) for input in [actual, expected]): - actual, expected = (to_image(input) for input in [actual, expected]) + # Convert PIL images to tv_tensors.Image (regardless of what the other is) + if isinstance(actual, PIL.Image.Image): + actual = to_image(actual) + if isinstance(expected, PIL.Image.Image): + expected = to_image(expected) + + # Convert CV-CUDA tensors to torch.Tensor (regardless of what the other is) + try: + import cvcuda + from torchvision.transforms.v2.functional import cvcuda_to_tensor + + if isinstance(actual, cvcuda.Tensor): + actual = cvcuda_to_tensor(actual) + # Remove batch dimension if it's 1 for easier comparison + if actual.shape[0] == 1: + actual = actual[0] + if isinstance(expected, cvcuda.Tensor): + expected = cvcuda_to_tensor(expected) + # Remove batch dimension if it's 1 for easier comparison + if expected.shape[0] == 1: + expected = expected[0] + except ImportError: + pass super().__init__(actual, expected, **other_parameters) self.mae = mae @@ -400,8 +426,8 @@ def make_image_pil(*args, **kwargs): return to_pil_image(make_image(*args, **kwargs)) -def make_image_cvcuda(*args, **kwargs): - return to_cvcuda_tensor(make_image(*args, **kwargs)) +def make_image_cvcuda(*args, batch_dims=(1,), **kwargs): + return to_cvcuda_tensor(make_image(*args, batch_dims=batch_dims, **kwargs)) def make_keypoints(canvas_size=DEFAULT_SIZE, *, num_points=4, dtype=None, device="cpu"): @@ -541,5 +567,9 @@ def ignore_jit_no_profile_information_warning(): # with varying `INT1` and `INT2`. Since these are uninteresting for us and only clutter the test summary, we ignore # them. with warnings.catch_warnings(): - warnings.filterwarnings("ignore", message=re.escape("operator() profile_node %"), category=UserWarning) + warnings.filterwarnings( + "ignore", + message=re.escape("operator() profile_node %"), + category=UserWarning, + ) yield diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py index f15faafebf8..1a21c08013a 100644 --- a/test/test_transforms_v2.py +++ b/test/test_transforms_v2.py @@ -1241,7 +1241,7 @@ def test_kernel_video(self): make_image_pil, make_image, pytest.param( - functools.partial(make_image_cvcuda, batch_dims=(1,)), + make_image_cvcuda, marks=pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="CVCUDA is not available"), ), make_bounding_boxes, @@ -1280,7 +1280,7 @@ def test_functional_signature(self, kernel, input_type): make_image_pil, make_image, pytest.param( - functools.partial(make_image_cvcuda, batch_dims=(1,)), + make_image_cvcuda, marks=pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="CVCUDA is not available"), ), make_bounding_boxes, @@ -1296,28 +1296,24 @@ def test_transform(self, make_input, device): @pytest.mark.parametrize( "fn", [F.horizontal_flip, transform_cls_to_functional(transforms.RandomHorizontalFlip, p=1)] ) - @pytest.mark.parametrize( "make_input", [ make_image, pytest.param( - functools.partial(make_image_cvcuda, batch_dims=(1,)), + make_image_cvcuda, marks=pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="CVCUDA is not available"), ), ], ) - def test_image_correctness(self, fn, make_input): image = make_input() actual = fn(image) - if isinstance(image, cvcuda.Tensor): - expected = F.horizontal_flip(F.cvcuda_to_tensor(image)) - assert_equal(F.cvcuda_to_tensor(actual), expected) - - else: - expected = F.to_image(F.horizontal_flip(F.to_pil_image(image))) - assert_equal(actual, expected) + if make_input is make_image_cvcuda: + image = F.cvcuda_to_tensor(image)[0] # Remove batch dimension: [1, C, H, W] -> [C, H, W] + expected = F.horizontal_flip(F.to_pil_image(image)) + # CV-CUDA tensors are on CUDA, PIL images are on CPU, so disable device checking + assert_equal(actual, expected, check_device=False) def _reference_horizontal_flip_bounding_boxes(self, bounding_boxes: tv_tensors.BoundingBoxes): affine_matrix = np.array( @@ -1374,7 +1370,7 @@ def test_keypoints_correctness(self, fn): make_image_pil, make_image, pytest.param( - functools.partial(make_image_cvcuda, batch_dims=(1,)), + make_image_cvcuda, marks=pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="CVCUDA is not available"), ), make_bounding_boxes, @@ -1394,7 +1390,6 @@ def test_transform_noop(self, make_input, device): assert_equal(output, input) - class TestAffine: _EXHAUSTIVE_TYPE_AFFINE_KWARGS = dict( # float, int @@ -1890,7 +1885,7 @@ def test_kernel_video(self): make_image_pil, make_image, pytest.param( - functools.partial(make_image_cvcuda, batch_dims=(1,)), + make_image_cvcuda, marks=pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="CVCUDA is not available"), ), make_bounding_boxes, @@ -1929,7 +1924,7 @@ def test_functional_signature(self, kernel, input_type): make_image_pil, make_image, pytest.param( - functools.partial(make_image_cvcuda, batch_dims=(1,)), + make_image_cvcuda, marks=pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="CVCUDA is not available"), ), make_bounding_boxes, @@ -1948,21 +1943,19 @@ def test_transform(self, make_input, device): [ make_image, pytest.param( - functools.partial(make_image_cvcuda, batch_dims=(1,)), + make_image_cvcuda, marks=pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="CVCUDA is not available"), ), ], ) - def test_image_correctness(self, fn, make_input): image = make_input() actual = fn(image) - if isinstance(image, cvcuda.Tensor): - expected = F.vertical_flip(F.cvcuda_to_tensor(image)) - assert_equal(F.cvcuda_to_tensor(actual), expected) - else: - expected = F.to_image(F.vertical_flip(F.to_pil_image(image))) - assert_equal(actual, expected) + if make_input is make_image_cvcuda: + image = F.cvcuda_to_tensor(image)[0] # Remove batch dimension: [1, C, H, W] -> [C, H, W] + expected = F.vertical_flip(F.to_pil_image(image)) + # CV-CUDA tensors are on CUDA, PIL images are on CPU, so disable device checking + assert_equal(actual, expected, check_device=False) def _reference_vertical_flip_bounding_boxes(self, bounding_boxes: tv_tensors.BoundingBoxes): affine_matrix = np.array( @@ -2015,7 +2008,7 @@ def test_keypoints_correctness(self, fn): make_image_pil, make_image, pytest.param( - functools.partial(make_image_cvcuda, batch_dims=(1,)), + make_image_cvcuda, marks=pytest.mark.skipif(not CVCUDA_AVAILABLE, reason="CVCUDA is not available"), ), make_bounding_boxes, @@ -7164,7 +7157,7 @@ def test_classification_preset(image_type, label_type, dataset_return_type, to_t out = t(sample) - assert type(out) == type(sample) + assert type(out) is type(sample) if dataset_return_type is tuple: out_image, out_label = out @@ -7475,7 +7468,7 @@ def test_functional(self, input_type): boxes, valid = F.sanitize_bounding_boxes(boxes, format=format, canvas_size=canvas_size, min_size=min_size) assert_equal(valid, torch.tensor(expected_valid_mask)) - assert type(valid) == torch.Tensor + assert type(valid) is torch.Tensor assert boxes.shape[0] == sum(valid) assert isinstance(boxes, input_type) diff --git a/torchvision/transforms/v2/_geometry.py b/torchvision/transforms/v2/_geometry.py index cbf3fae6982..7bb17aa7f41 100644 --- a/torchvision/transforms/v2/_geometry.py +++ b/torchvision/transforms/v2/_geometry.py @@ -49,6 +49,9 @@ class RandomHorizontalFlip(_RandomApplyTransform): _v1_transform_cls = _transforms.RandomHorizontalFlip + if CVCUDA_AVAILABLE: + _transformed_types = (torch.Tensor, PIL.Image.Image, cvcuda.Tensor) + def transform(self, inpt: Any, params: dict[str, Any]) -> Any: return self._call_kernel(F.horizontal_flip, inpt) @@ -67,6 +70,9 @@ class RandomVerticalFlip(_RandomApplyTransform): _v1_transform_cls = _transforms.RandomVerticalFlip + if CVCUDA_AVAILABLE: + _transformed_types = (torch.Tensor, PIL.Image.Image, cvcuda.Tensor) + def transform(self, inpt: Any, params: dict[str, Any]) -> Any: return self._call_kernel(F.vertical_flip, inpt) diff --git a/torchvision/transforms/v2/_transform.py b/torchvision/transforms/v2/_transform.py index 28297e9e4f2..610e7d7e83b 100644 --- a/torchvision/transforms/v2/_transform.py +++ b/torchvision/transforms/v2/_transform.py @@ -12,9 +12,10 @@ from torchvision.utils import _log_api_usage_once from .functional._utils import _get_kernel -from torchvision.transforms.v2.functional._utils import _import_cvcuda, _is_cvcuda_available + CVCUDA_AVAILABLE = _is_cvcuda_available() + class Transform(nn.Module): """Base class to implement your own v2 transforms. @@ -25,9 +26,6 @@ class Transform(nn.Module): # Class attribute defining transformed types. Other types are passed-through without any transformation # We support both Types and callables that are able to do further checks on the type of the input. _transformed_types: tuple[type | Callable[[Any], bool], ...] = (torch.Tensor, PIL.Image.Image) - if CVCUDA_AVAILABLE: - _transformed_types += (_import_cvcuda().Tensor,) - def __init__(self) -> None: super().__init__()