Merge remote-tracking branch 'upstream/main' into benchmark-litgpt-de…

…fault-executors
Lightning-AI · Aug 21, 2024 · 1781d58 · 1781d58
2 parents f1fadaa + 3548ba8
commit 1781d58
Show file tree

Hide file tree

Showing 21 changed files with 887 additions and 528 deletions.
diff --git a/README.md b/README.md
@@ -75,11 +75,11 @@ The easiest way to get started with Thunder, requiring no extra installations or
 
 To use Thunder on your local machine:
 
-- install [nvFuser](https://github.com/NVIDIA/Fuser) nightly and PyTorch nightly together as follows:
+- install [nvFuser](https://github.com/NVIDIA/Fuser) and PyTorch stable together as follows:
 
 ```bash
-# install nvFuser which installs the matching nightly PyTorch
-pip install --pre 'nvfuser-cu121[torch]' --extra-index-url https://pypi.nvidia.com
+# install nvFuser which installs the matching stable PyTorch
+pip install --pre nvfuser-cu121-torch24
 ```
 
 - install [cudnn](https://gitlab-master.nvidia.com/cudnn/cudnn_frontend) as follows:
@@ -107,8 +107,8 @@ pip install lightning-thunder
 Alternatively, you can install the latest version of Thunder directly from this GitHub repository as follows:
 
 ```
-# 1) Install nvFuser and PyTorch nightly dependencies:
-pip install --pre 'nvfuser-cu121[torch]' --extra-index-url https://pypi.nvidia.com
+# 1) Install nvFuser and PyTorch dependencies:
+pip install --pre nvfuser-cu121-torch24
 ```
 
 ```bash

diff --git a/docs/source/fundamentals/installation.rst b/docs/source/fundamentals/installation.rst
@@ -6,11 +6,12 @@ Minimal dependencies
 
 Follow these instructions to install PyTorch, nvFuser, and finally Thunder.
 
-Install PyTorch and nvFuser with pip (command shown is for CUDA 12.1)::
+Install PyTorch and nvFuser with pip (command shown is for CUDA 12.1 and PyTorch 2.4.x)::
 
-  pip install --pre "nvfuser-cu121[torch]" --extra-index-url https://pypi.nvidia.com
+  pip install --pre nvfuser-cu121-torch24
 
-cu121 can be replaced with cu118 depending on your CUDA version.
+cu121 can be replaced with cu118 depending on your CUDA version. NVFuser builds typically support the latest point release of PyTorch stable versions.
+For torch 2.4, cu124 is also supported. For nightly versions and more detailed instructions, please see https://github.com/NVIDIA/Fuser/#installation
 
 You're all set with minimal dependencies, so you can follow `Install Thunder`_.
 

diff --git a/thunder/__init__.py b/thunder/__init__.py
@@ -388,6 +388,7 @@ def _alias_tensor_of_args_kwargs(*args, **kwargs) -> str:
             return ""
         return "-".join(alias_indices)
 
+    @langctxs.langctx(cd.langctx)
     @_with_cache_info_ctx
     def get_computation_and_inputs(*args, **kwargs):
         # set up a record of things in the current environment that impact caching / prologues
@@ -530,16 +531,15 @@ def get_computation_and_inputs(*args, **kwargs):
             #   returns the (proxied) result of the operation
             cs.last_trace_tracing_start = time.perf_counter_ns()
 
-            with langctxs.langctx(cd.langctx):
-                prologue_trc: TraceCtx
-                computation_trc: TraceCtx
-                jit_results: TraceResults = interpreter(
-                    fn, args, kwargs, record_history=record_history, sharp_edges=cd.sharp_edges
-                )
-                prologue_trc = jit_results.prologue_trace
-                computation_trc = jit_results.computation_trace
-                epilogue_trc = jit_results.epilogue_trace
-                last_interpreter_log = jit_results.interpreter_log
+            prologue_trc: TraceCtx
+            computation_trc: TraceCtx
+            jit_results: TraceResults = interpreter(
+                fn, args, kwargs, record_history=record_history, sharp_edges=cd.sharp_edges
+            )
+            prologue_trc = jit_results.prologue_trace
+            computation_trc = jit_results.computation_trace
+            epilogue_trc = jit_results.epilogue_trace
+            last_interpreter_log = jit_results.interpreter_log
 
             prologue_traces = [prologue_trc]
             computation_traces = [computation_trc]
@@ -666,19 +666,17 @@ def get_computation_and_inputs(*args, **kwargs):
                 from thunder.executors.passes import _transform_for_operator_executor_execution
                 from thunder.distributed.utils import maybe_sort_waits
 
-                with langctxs.langctx(cd.langctx):
-                    tmp_comp_trc = _transform_for_operator_executor_execution(computation_trc, cd.executors_list)
+                tmp_comp_trc = _transform_for_operator_executor_execution(computation_trc, cd.executors_list)
                 is_transformed, tmp_comp_trc = maybe_sort_waits(tmp_comp_trc)
                 if is_transformed:
                     computation_trc = tmp_comp_trc
                     computation_traces.append(computation_trc)
 
-                with langctxs.langctx(cd.langctx):
-                    extraces = transform_for_execution(
-                        computation_trc,
-                        executors_list=cd.executors_list,
-                        use_del_last_used=False,
-                    )
+                extraces = transform_for_execution(
+                    computation_trc,
+                    executors_list=cd.executors_list,
+                    use_del_last_used=False,
+                )
                 computation_traces.extend(extraces)
                 computation_trc = computation_traces[-1]
 

diff --git a/thunder/core/langctxs.py b/thunder/core/langctxs.py
@@ -52,9 +52,13 @@ def set_langctx(ctx: LanguageContext, /) -> Any:
 
 
 def get_langctx() -> LanguageContext:
-    """Gets the current language context"""
-    t = _langctx.get()
-    return t
+    """Gets the current language context (defaulting to the torch language)"""
+    # The default value is set here and not in the ContextVar constructor
+    # because the torch language context is not available at the time the
+    # ContextVar is created
+    # If get_langctx is called before the torch language context is registered,
+    # it will raise a LookupError
+    return _langctx.get(resolve_language(Languages.TORCH))
 
 
 def reset_langctx(token: Any, /) -> None:
@@ -94,7 +98,7 @@ def resolve_language(id: Any, /) -> LanguageContext:
     lang: None | LanguageContext = _langctx_registry.get(id, None)
 
     if lang is None:
-        raise ValueError(f"Unknown language context {id}")
+        raise LookupError(f"Unknown language context {id}")
 
     return lang
 

diff --git a/thunder/core/module.py b/thunder/core/module.py
@@ -112,7 +112,7 @@ def _get_shared_names(self):
             self.named_parameters(remove_duplicate=False), self.named_buffers(remove_duplicate=False)
         ):
             parameters_to_names.setdefault(v, set()).add(name)
-        shared_names = {}
+        shared_names: dict[str, set[str]] = {}
         for s in parameters_to_names.values():
             for n in s:
                 shared_names[n] = s

diff --git a/thunder/core/proxies.py b/thunder/core/proxies.py
@@ -1475,6 +1475,23 @@ def __getattr__(self, attr: str, /):
         baseutils.check(method_or_value is not None, lambda: f"Unknown attribute {attr}", exception_type=AttributeError)
 
         if callable(method_or_value):
+            # TODO: This is a temporary fix to allow for the `numel` attribute
+            # to be called without arguments. This is a workaround for the fact
+            # that the `numel` was initially in Thunder introduced not as a
+            # method but a property. Now a lot of code relies on it being a
+            # property. But PyTorch uses it as a method. We need to converge on
+            # one or the other.
+            # https://github.com/Lightning-AI/lightning-thunder/issues/925
+            class _Numel(int):
+                def __new__(cls, value):
+                    assert isinstance(value, int), f"Expected int, got {type(value)}"
+                    return int.__new__(cls, value)
+
+                def __call__(self):
+                    return int(self)
+
+            if attr == "numel":
+                return _Numel(self._numel)
             return partial(method_or_value, self)
 
         return method_or_value