Lightning-AI · crcrpar · Sep 27, 2024 · Nov 5, 2024 · Nov 5, 2024 · Nov 13, 2024
diff --git a/thunder/executors/nvfuserex_impl.py b/thunder/executors/nvfuserex_impl.py
@@ -2071,8 +2071,7 @@ def copy_(
 ) -> Any:
     nvcopy_from = getnv(copy_from, fd, lc_to_nv_map)
     nvcopy_to = getnv(copy_to, fd, lc_to_nv_map)
-    alias_output = fd.ops.set(nvcopy_from)
-    fd.add_output(alias_output, alias_input=nvcopy_to)
+    fd.add_output(nvcopy_from, alias_input=nvcopy_to)
     return nvcopy_to
 
 

@@ -5,6 +5,7 @@
 from torch.testing import assert_close, make_tensor
 
 import thunder
+from thunder.core import prims
 import thunder.core.dtypes as datatypes
 import thunder.torch as ttorch
 from thunder.tests.framework import instantiate, nvFuserExecutor
@@ -158,9 +159,9 @@ def func3(x, y):
 def test_inplace_copy_dst_copy_returned_issue_1109(executor, device, dtype):
     def func(T0):
         T1 = torch.sin(T0)
-        T0.copy_(T1)  # destination.copy_(source)
+        prims.copy_(T1, T0)
         T2 = torch.cos(T1)
-        T0.copy_(T2)
+        prims.copy_(T2, T0)
         # T1 & T2 should be returned as separate buffer, instead of sharing
         # storage with T0
         return T1, T2
@@ -169,12 +170,21 @@ def func(T0):
     # This pattern is unsafe in general. Disabling sanity check to silence
     # exception for testing
     traced_foo = executor.make_callable(func, disable_inplace_copy_check=True)
-    a = make_tensor((4, 4), device=device, dtype=tdtype)
-    a_ref = a.clone()
-
-    o_thunder = traced_foo(a)
-    o_eager = func(a_ref)
-
-    assert_close(a_ref, a)
-    for o, o_ref in zip(o_thunder, o_eager):
-        assert_close(o, o_ref)
+    t0 = make_tensor((4, 4), device=device, dtype=tdtype)
+    t0_ref = t0.clone()
+
+    actual_t1, actual_t2 = traced_foo(t0)
+
+    expected = t0_ref.sin().cos()
+    expected_t1 = t0_ref.sin()
+    expected_t2 = expected_t1.cos()
+    expected = expected_t2
+
+    assert_close(t0, expected)
+    assert_close(actual_t2, expected_t2)
+    # FIXME(crcrpar): Since there's no `ltorch.Tensor.copy_`, functions like `func` would not
+    # be observed and executed with pytorch eager mode. Though there should be either an audit of
+    # `prims.copy_` in a nvfuser region and/or what #1110 did.
+    assert actual_t1.data_ptr() == actual_t2.data_ptr()
+    with pytest.raises(AssertionError):
+        assert_close(actual_t1, expected_t1)
@@ -1945,11 +1945,6 @@ def copysign_(a, b, /):
     return prims.copy_(copysign(a, b), a)
 
 
-@torchsymbol(torch.Tensor.copy_, is_method=True)  # , tags=(prims.OpTags.IN_PLACE,))
-def copy_(a, b, /):
-    return prims.copy_(b, a)
-
-
 # TODO Implement div
 @torchsymbol(torch.div, is_method=True)
 def div(