Present Random state (#887)

anijain2305 · web-flow · commit e82e64a2f8ee · 2022-06-17T13:21:47.000-07:00
* Present Random state

* Add tests
diff --git a/functorch/_src/aot_autograd.py b/functorch/_src/aot_autograd.py
@@ -1,3 +1,4 @@
+from contextlib import contextmanager
 import torch
 import torch.nn as nn
 from torch import Tensor
@@ -52,6 +53,19 @@ def _dict_unflatten(values: List[Any], context: Context) -> Dict[Any, Any]:
 aten = torch.ops.aten
 
 
+@contextmanager
+def preserve_rng_state():
+    rng_state = torch.clone(torch.random.get_rng_state())
+    if torch.cuda.is_available():
+        cuda_rng_state = torch.clone(torch.cuda.get_rng_state())
+    try:
+        yield
+    finally:
+        torch.random.set_rng_state(rng_state)
+        if torch.cuda.is_available():
+            torch.cuda.set_rng_state(cuda_rng_state)
+
+
 def create_joint_forward_backward(fn):
     def joint_forward_backward(
         primals: List[Any], tangents: List[Any]
@@ -147,27 +161,29 @@ class CompiledFunction(torch.autograd.Function):
         def forward(ctx, *flat_tensor_args):
             nonlocal compiled_fw, compiled_bw, num_outs
             if compiled_fw is None:
-                # Set input tensors that require grad to leaves
-                flat_tensor_args = pytree.tree_map(
-                    lambda x: x.detach().requires_grad_(x.requires_grad), flat_tensor_args
-                )
-                with torch.set_grad_enabled(grad_state):
-                    out = flat_fn(*flat_tensor_args)
-                out = pytree.tree_map(
-                    lambda x: x.detach().contiguous() if isinstance(x, Tensor) else x, out
-                )
+                with preserve_rng_state():
+                    # Set input tensors that require grad to leaves
+                    flat_tensor_args = pytree.tree_map(
+                        lambda x: x.detach().requires_grad_(x.requires_grad), flat_tensor_args
+                    )
+                    with torch.set_grad_enabled(grad_state):
+                        out = flat_fn(*flat_tensor_args)
+                    out = pytree.tree_map(
+                        lambda x: x.detach().contiguous() if isinstance(x, Tensor) else x, out
+                    )
 
-                if isinstance(out, (list, tuple)):
-                    num_outs = len(out)
-                else:
-                    num_outs = 1
+                    if isinstance(out, (list, tuple)):
+                        num_outs = len(out)
+                    else:
+                        num_outs = 1
+
+                    joint_inputs = (flat_tensor_args, out)
+                    aot_decompositions = {**aot_autograd_decompositions, **decompositions}
+                    with torch.set_grad_enabled(grad_state):
+                        fx_g = make_fx(joint_forward_backward, aot_decompositions)(
+                            *joint_inputs
+                        )
 
-                joint_inputs = (flat_tensor_args, out)
-                aot_decompositions = {**aot_autograd_decompositions, **decompositions}
-                with torch.set_grad_enabled(grad_state):
-                    fx_g = make_fx(joint_forward_backward, aot_decompositions)(
-                        *joint_inputs
-                    )
                 fw_module, bw_module = partition_fn(fx_g, joint_inputs)
                 # print(fw_module.code, bw_module.code)
 
diff --git a/test/test_pythonkey.py b/test/test_pythonkey.py
@@ -546,6 +546,24 @@ def forward(self, x, y):
         assert torch.allclose(inputs[1].grad, cloned_inputs[1].grad)
 
 
+class TestRandom(TestCase):
+    def test_preserve_random(self):
+        def fn(x):
+            return torch.nn.functional.dropout(x, 0.5) + x
+
+
+        x = torch.randn(4)
+
+        torch.manual_seed(0)
+        ref = fn(x)
+
+        torch.manual_seed(0)
+        aot_fn = aot_function(fn, nop)
+        res = aot_fn(x)
+
+        assert torch.allclose(ref, res)
+
+
 only_for = ("cpu")
 instantiate_device_type_tests(
     TestPythonKey,