Merge branch 'main' into add_hardshrink

Lightning-AI · Dec 3, 2024 · 18fcacf · 18fcacf
2 parents aadcf6e + 29adb08
commit 18fcacf
Show file tree

Hide file tree

Showing 18 changed files with 164 additions and 30 deletions.
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
@@ -27,3 +27,7 @@ updates:
     open-pull-requests-limit: 5
     reviewers:
       - "Borda"
+    groups:
+      GHA-updates:
+        patterns:
+          - "*"
diff --git a/.github/workflows/ci-checks.yml b/.github/workflows/ci-checks.yml
@@ -11,17 +11,17 @@ concurrency:
 
 jobs:
   precommit-run:
-    uses: Lightning-AI/utilities/.github/workflows/check-precommit.yml@v0.11.8
+    uses: Lightning-AI/utilities/.github/workflows/check-precommit.yml@v0.11.9
     with:
       python-version: "3.10"
 
   check-schema:
-    uses: Lightning-AI/utilities/.github/workflows/check-schema.yml@v0.11.8
+    uses: Lightning-AI/utilities/.github/workflows/check-schema.yml@v0.11.9
     with:
       azure-dir: ".azure"
 
   check-package:
-    uses: Lightning-AI/utilities/.github/workflows/check-package.yml@v0.11.8
+    uses: Lightning-AI/utilities/.github/workflows/check-package.yml@v0.11.9
     with:
       actions-ref: v0.11.8
       import-name: "thunder"

diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml
@@ -138,7 +138,7 @@ jobs:
           coverage xml
 
       - name: Upload coverage to Codecov
-        uses: codecov/codecov-action@v4
+        uses: codecov/codecov-action@v5
         with:
           token: ${{ secrets.CODECOV_TOKEN }}
           file: ./coverage.xml

diff --git a/.github/workflows/release-nightly.yml b/.github/workflows/release-nightly.yml
@@ -33,15 +33,15 @@ jobs:
       # We do this, since failures on test.pypi aren't that bad
       - name: Publish to Test PyPI
         if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
-        uses: pypa/gh-action-pypi-publish@v1.11.0
+        uses: pypa/gh-action-pypi-publish@v1.12.2
         with:
           user: __token__
           password: ${{ secrets.test_pypi_password }}
           repository_url: https://test.pypi.org/legacy/
 
       - name: Publish distribution 📦 to PyPI
         if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
-        uses: pypa/gh-action-pypi-publish@v1.11.0
+        uses: pypa/gh-action-pypi-publish@v1.12.2
         with:
           user: __token__
           password: ${{ secrets.pypi_password }}
diff --git a/.github/workflows/release-pypi.yml b/.github/workflows/release-pypi.yml
@@ -26,15 +26,15 @@ jobs:
       # We do this, since failures on test.pypi aren't that bad
       - name: Publish to Test PyPI
         if: startsWith(github.event.ref, 'refs/tags') || github.event_name == 'release'
-        uses: pypa/gh-action-pypi-publish@v1.11.0
+        uses: pypa/gh-action-pypi-publish@v1.12.2
         with:
           user: __token__
           password: ${{ secrets.test_pypi_password }}
           repository_url: https://test.pypi.org/legacy/
 
       - name: Publish distribution 📦 to PyPI
         if: startsWith(github.event.ref, 'refs/tags') || github.event_name == 'release'
-        uses: pypa/gh-action-pypi-publish@v1.11.0
+        uses: pypa/gh-action-pypi-publish@v1.12.2
         with:
           user: __token__
           password: ${{ secrets.pypi_password }}
diff --git a/docs/source/reference/transforms/index.rst b/docs/source/reference/transforms/index.rst
@@ -7,3 +7,4 @@ thunder.transforms
     :toctree: generated/
 
     MaterializationTransform
+    ConstantFolding
diff --git a/requirements/docs.txt b/requirements/docs.txt
@@ -1,7 +1,7 @@
 sphinx ==5.3.0
 myst-parser ==1.0.0
 nbsphinx ~=0.9.5
-ipython[all] ~=8.29.0
+ipython[all] ~=8.30.0
 pandoc ==2.4
 docutils >=0.16
 sphinxcontrib-fulltoc ==1.2.0

diff --git a/requirements/notebooks.txt b/requirements/notebooks.txt
@@ -1,4 +1,4 @@
-ipython[all] ~=8.29.0
+ipython[all] ~=8.30.0
 numpy >=1.23.0,<2  # not yet ready for numpy 2
 liger-kernel == 0.4.0
 cuda-python

diff --git a/requirements/test.txt b/requirements/test.txt
@@ -1,6 +1,6 @@
-coverage ~=7.6.0
+coverage ~=7.6.8
 pytest ==8.1.1
-pytest-benchmark ==4.0.0
+pytest-benchmark ==5.1.0
 pytest-timeout ==2.3.1
 pytest-cov ==6.0.0
 pytest-xdist ==3.6.1
@@ -9,7 +9,7 @@ pytest-timestamper ==0.0.10
 graphviz ==0.20.3
 fdm ==0.4.1
 expecttest ==0.2.1  # for test_ddp.py
-hypothesis ~=6.115.0  # for test_ddp.py
+hypothesis ~=6.122.1  # for test_ddp.py
 numpy >=1.23.0,<2  # for test_ops.py; not yet ready for numpy 2
 einops  # for test_einops.py
 litgpt==0.4.11  # for the model definition in tests and benchmarks
@@ -18,7 +18,7 @@ pandas # thunder/benchmarks/test_benchmark_litgpt.py
 xlsxwriter # thunder/benchmarks/test_benchmark_litgpt.py
 jsonargparse # thunder/benchmarks/benchmark_litgpt.py
 bitsandbytes==0.42.0  # fixed version!
-transformers==4.46.2 # for test_networks.py
+transformers==4.46.3 # for test_networks.py
 
 # Installs JAX on Linux and MacOS
 jaxlib; sys_platform == 'linux' or sys_platform == 'darwin'  # required for jax, see https://github.com/google/jax#installation

diff --git a/thunder/__init__.py b/thunder/__init__.py
@@ -678,8 +678,6 @@ def get_computation_and_inputs(*args, **kwargs):
                 )
                 computation_traces.extend(extraces)
                 computation_trc = computation_traces[-1]
-
-            if backward_trc is None:
                 computation_trc = thunder.executors.passes.del_last_used(computation_trc)
 
             if not compile_options.get("disable_inplace_copy_check", False):

diff --git a/thunder/core/interpreter.py b/thunder/core/interpreter.py
@@ -2513,7 +2513,9 @@ def __getitem__(self, key):
         except Exception as e:
             return do_raise(e)
 
-        populate_single_dict_item_wrapper(uv, self, key.value)
+        from thunder.core.proxies import Proxy
+
+        populate_single_dict_item_wrapper(uv, self, key if isinstance(key.value, Proxy) else key.value)
         v = self.item_wrappers[key.value]
         assert uv is v.value or uv is v.original_value, f"value for {key.value} out of sync {uv} {v.value}"
         return v

diff --git a/thunder/dynamo/splitter.py b/thunder/dynamo/splitter.py
@@ -138,6 +138,13 @@ def callback(node) -> int:
     original_split_gm: torch.fx.GraphModule = split_module(
         gm, root_m=None, split_callback=callback, keep_original_order=True, keep_original_node_name=True
     )
+
+    # Workaround for the Torch bug https://github.com/pytorch/pytorch/pull/139275
+    for submodule in original_split_gm.children():
+        if not submodule.graph.find_nodes(op="output"):
+            submodule.graph.output(())
+    if not original_split_gm.graph.find_nodes(op="output"):
+        original_split_gm.graph.output(())
     split_gm = copy.deepcopy(original_split_gm)
 
     def is_thunder_supported_partition(node: torch.fx.Node) -> bool:

diff --git a/thunder/tests/test_dynamo.py b/thunder/tests/test_dynamo.py
@@ -449,10 +449,6 @@ def func(x):
             IS_WINDOWS,
             reason="torch.compile Windows support is still WIP - https://github.com/pytorch/pytorch/issues/122094",
         ),
-        pytest.mark.skipif(
-            LooseVersion(torch.__version__) < LooseVersion("2.6.0"),
-            reason="Skip until the Torch bug is fixed - https://github.com/pytorch/pytorch/pull/139275",
-        ),
         pytest.mark.skipif(
             version_between(torch.__version__, min_ver="2.6.0dev0", max_ver="2.6.0a99"),
             reason="https://github.com/Lightning-AI/lightning-thunder/issues/1471",
@@ -864,3 +860,28 @@ def forward(self, x):
     cmd = "pytest" if use_pytest_benchmark else "python"
     result1 = run([cmd, s1], capture_output=True, text=True)
     assert result1.returncode == 0, f"Reproducer {s1} failed with return code {result1.returncode}"
+
+
+def test_deepcopy_graph_module():
+    class MyModule(torch.nn.Module):
+        def __init__(self):
+            super().__init__()
+
+        def forward(self, x):
+            y = x + 1
+
+    m = MyModule()
+    gm = torch.fx.symbolic_trace(m)
+    n = gm.graph.find_nodes(op="output")
+    gm.graph.erase_node(n[0])
+    import thunder
+
+    _, subgraph_info = thunder.dynamo.splitter._splitter(gm, thunder.jit, thunder.jit, [])
+    original_split_gm = subgraph_info.original_split_graph_module
+    assert original_split_gm.graph.find_nodes(op="output")
+    for subm in original_split_gm.children():
+        assert subm.graph.find_nodes(op="output")
+    import copy
+
+    # No assertion error
+    copy_gm = copy.deepcopy(original_split_gm)
diff --git a/thunder/tests/test_jit_general.py b/thunder/tests/test_jit_general.py
@@ -680,6 +680,8 @@ def test_litgpt_variants(name, device):
 
     if device == "cuda" and not torch.cuda.is_available():
         pytest.skip("CUDA not available")
+    if device == "cuda" and name == "falcon-40b-like":
+        pytest.skip("NVFuser reenable when https://github.com/NVIDIA/Fuser/issues/3505 is fixed, Thunder issue #1504")
     if device == "cuda" and name == "falcon-7b-like":
         pytest.skip("NVFuser reenable when https://github.com/NVIDIA/Fuser/issues/3292 is fixed")
 
@@ -783,7 +785,7 @@ def sample(logits):
     ("cpu", "cuda"),
 )
 def test_tom_overrides_proxy(device):
-    from litgpt.config import Config
+    from thunder.tests.litgpt_model import Config
     from litgpt.model import GPT
 
     if device == "cuda" and not torch.cuda.is_available():
@@ -1027,7 +1029,6 @@ def forward(self, x):
     ids=("remove_duplicate=False", "remove_duplicate=True"),
 )
 def test_named_params_and_named_buffers(prefix, recurse, remove_duplicate):
-
     buffer_tensor = torch.tensor([1.0])
 
     class SubMod(torch.nn.Module):
@@ -1141,7 +1142,6 @@ def test_custom_autograd_function():
     from torch.testing._internal.common_utils import gradcheck
 
     class MyFunction(torch.autograd.Function):
-
         @staticmethod
         def forward(ctx, x: torch.Tensor) -> torch.Tensor:
             return x * 2.0
@@ -1204,7 +1204,6 @@ def forward(self, x):
 
 
 def test_autograd_function_apply():
-
     def forward(ctx, x):
         saved_for_backward = (x,)
         return x.sin(), saved_for_backward
@@ -1273,7 +1272,6 @@ def my_sin_with_wrong_backward(x):
 
 
 def test_autograd_function_empty_forward():
-
     class Fn(torch.autograd.Function):
         @staticmethod
         def forward(self, x):
@@ -1462,3 +1460,30 @@ def foo(a):
     expected = foo(a)
 
     assert_close(actual, expected)
+
+
+def test_cache_symbolic_values_dict():
+    def foo(a, v):
+        return a[v].relu()
+
+    jfoo = thunder.jit(foo, cache="symbolic values")
+
+    a = {
+        2: torch.randn(2, 3, 8, requires_grad=True, device="cpu"),
+        5: torch.randn(4, 8, requires_grad=True, device="cpu"),
+    }
+
+    actual = jfoo(a, 2)
+    expected = foo(a, 2)
+
+    assert_close(actual, expected)
+
+    b = {
+        "a": torch.randn(2, 8, requires_grad=True, device="cpu"),
+        "b": torch.randn(7, requires_grad=True, device="cpu"),
+    }
+
+    actual = jfoo(b, "b")
+    expected = foo(b, "b")
+
+    assert_close(actual, expected)
diff --git a/thunder/tests/test_torch_compile_executor.py b/thunder/tests/test_torch_compile_executor.py
@@ -20,6 +20,7 @@ def test_supported_ops_are_in_pytorch_executor():
 # appropriate visual studio config.
 @pytest.mark.skipif(not is_inductor_supported() or platform.system() == "Windows", reason="inductor unsupported")
 def test_torch_compile_litgpt():
+    from thunder.tests.litgpt_model import Config
     from litgpt.model import GPT
 
     model = GPT.from_name("llama1-like", n_layer=1)
@@ -40,7 +41,7 @@ def test_torch_compile_litgpt():
 @requiresCUDA
 @pytest.mark.skipif(not device_supports_bf16(torch.device("cuda")), reason="bf16 is not supported")
 def test_torch_compile_cat_nvfuser_phi2_tanh():
-    from litgpt.config import Config
+    from thunder.tests.litgpt_model import Config
     from litgpt.model import GPT
 
     device = torch.device("cuda")

diff --git a/thunder/tests/test_transforms.py b/thunder/tests/test_transforms.py
@@ -74,7 +74,8 @@ def _test_equal_nvtx_push_and_pop(trc):
 @requiresCUDA
 def test_materialization():
     from thunder.transforms import MaterializationTransform
-    from litgpt.config import Config
+    from thunder.tests.litgpt_model import Config
+
     from litgpt.model import GPT
 
     config = Config.from_name("llama2-like")
@@ -121,7 +122,7 @@ def test_materialization():
 def test_quantization_on_meta():
     from thunder.transforms import MaterializationTransform
     from thunder.transforms.quantization import BitsAndBytesLinearQuant4bit, get_bitsandbytes_executor
-    from litgpt.config import Config
+    from thunder.tests.litgpt_model import Config
     from litgpt.model import GPT
 
     bitsandbytes_executor = get_bitsandbytes_executor()

diff --git a/thunder/transforms/__init__.py b/thunder/transforms/__init__.py
@@ -1,8 +1,10 @@
+from .constant_folding import ConstantFolding
 from .materialization import MaterializationTransform
 from .qlora import LORATransform
 
 
 __all__ = [
-    "MaterializationTransform",
+    "ConstantFolding",
     "LORATransform",
+    "MaterializationTransform",
 ]
Original file line number	Diff line number	Diff line change
Expand Up		@@ -7,3 +7,4 @@ thunder.transforms
		:toctree: generated/

		MaterializationTransform
		ConstantFolding