Skip to content

Commit

Permalink
explicitly turn tf32 off
Browse files Browse the repository at this point in the history
Signed-off-by: Masaki Kozuki <mkozuki@nvidia.com>
  • Loading branch information
crcrpar committed Jun 24, 2024
1 parent 4ce822b commit 9524998
Showing 1 changed file with 23 additions and 13 deletions.
36 changes: 23 additions & 13 deletions thunder/tests/test_inplace_functionalization.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from collections.abc import Callable
from dataclasses import dataclass
from functools import partial
from typing import TYPE_CHECKING

import pytest
import torch.testing
Expand All @@ -13,6 +14,9 @@
from thunder.tests.make_tensor import make_tensor
from thunder.torch import _torch_to_thunder_function_map, _inplace_to_out_of_place

if TYPE_CHECKING:
from thunder.core.symbol import Symbol


# `SampleInput`s of ops with `inplace` argument do not seem to come with `inplace` arg, so give it to them.
def sample_generator_wrapper(sample_generator):
Expand All @@ -36,7 +40,7 @@ def inplace_masked_fill_sample_generator(op, device, dtype, requires_grad, **kwa
yield SampleInput(a, pred, value)


_torchsymbol_to_torch: dict[Sybmol, Callable] = {v: k for k, v in _torch_to_thunder_function_map.items()}
_torchsymbol_to_torch: dict[Symbol, Callable] = {v: k for k, v in _torch_to_thunder_function_map.items()}
_functional_to_inplace: dict[Callable, Callable] = {
functional: inplace for inplace, (functional, index) in _inplace_to_out_of_place.items() if index == -1
}
Expand Down Expand Up @@ -125,33 +129,39 @@ def test_functionalization(op: OpInfo, device: str, dtype: dtypes.dtype, executo
)


# TODO(crcrpar): Investigate the numerical accuracy when `train=True` and dtype is fp32.
# with RTX6000 Ada and CUDA 12.3, I see somewhat huge error:
# E AssertionError: Tensor-likes are not close!
# E
# E Mismatched elements: 913 / 1000 (91.3%)
# E Greatest absolute difference: 0.000273287296295166 at index (0, 50) (up to 1e-05 allowed)
# E Greatest relative difference: 0.4177769422531128 at index (0, 727) (up to 1.3e-06 allowed)
@pytest.fixture
def turn_off_tf32_and_set_seed(monkeypatch):
import torch

monkeypatch.setenv("NVIDIA_TF32_OVERRIDE", "0")
torch.manual_seed(42)


@requiresCUDA
@pytest.mark.parametrize("train", (False, True))
def test_parse_resnet18(train: bool):
def test_parse_resnet18(turn_off_tf32_and_set_seed, train: bool):
from contextlib import nullcontext
import thunder

torchvision = pytest.importorskip("torchvision")

device = torch.device("cuda")
dtype = torch.float64 if train else torch.float32
dtype = torch.float32
with device:
model: nn.Module = torchvision.models.resnet18(weights=None).to(device=device, dtype=dtype)
ref_model: nn.Module = torchvision.models.resnet18(weights=None).to(device=device, dtype=dtype)
model = torchvision.models.resnet18(weights=None).to(device=device, dtype=dtype)
ref_model = torchvision.models.resnet18(weights=None).to(device=device, dtype=dtype)
if not train:
model = model.eval()
ref_model = ref_model.eval()
ctx = torch.no_grad
else:
ctx = nullcontext
ref_model.load_state_dict(model.state_dict())

jitted = thunder.jit(model)
x = make_tensor((1, 3, 224, 224), dtype=dtype, device=device)
torch.testing.assert_close(jitted(x), ref_model(x))
with ctx():
torch.testing.assert_close(jitted(x), ref_model(x))


@instantiate(
Expand Down

0 comments on commit 9524998

Please sign in to comment.