Merge branch 'main' into tfogal/nvtx

Lightning-AI · Oct 21, 2024 · 364da54 · 364da54
2 parents 66753b0 + 79e59d0
commit 364da54
Show file tree

Hide file tree

Showing 26 changed files with 438 additions and 150 deletions.
diff --git a/.azure/gpu-tests.yml b/.azure/gpu-tests.yml
@@ -84,7 +84,6 @@ jobs:
             pytest thunder/tests/ \
               -m "not standalone" \
               -v --datefmt="%Y%m%d-%H:%M:%S.%f" \
-              --timeout=240 \
               --random-order-seed=42 \
               --durations=250 \
               --timeout=240 \
@@ -97,7 +96,7 @@ jobs:
           ./codecov --token=$(CODECOV_TOKEN) --commit=$(Build.SourceVersion) \
             --flags=gpu,pytest,regular --name="GPU-coverage" --env=linux,azure
         condition: ne(variables['testing'], 'distributed')
-        timeoutInMinutes: "30"
+        timeoutInMinutes: "40"
         displayName: "Testing: regular"
 
       - bash: |

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -8,7 +8,7 @@ ci:
 
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.6.0
+    rev: v5.0.0
     hooks:
       - id: end-of-file-fixer
       - id: trailing-whitespace
@@ -23,7 +23,7 @@ repos:
       - id: detect-private-key
 
   - repo: https://github.com/asottile/pyupgrade
-    rev: v3.17.0
+    rev: v3.18.0
     hooks:
       - id: pyupgrade
         args: ["--py310-plus"]
@@ -38,14 +38,14 @@ repos:
         #args: ["--write-changes"] # uncomment if you want to get automatic fixing
 
   - repo: https://github.com/psf/black
-    rev: 24.8.0
+    rev: 24.10.0
     hooks:
       - id: black
         name: Black code
         exclude: "examples"
 
   - repo: https://github.com/executablebooks/mdformat
-    rev: 0.7.17
+    rev: 0.7.18
     hooks:
       - id: mdformat
         additional_dependencies:
@@ -55,7 +55,7 @@ repos:
         exclude: "examples"
 
   - repo: https://github.com/sphinx-contrib/sphinx-lint
-    rev: v0.9.1
+    rev: v1.0.0
     hooks:
       - id: sphinx-lint
 

diff --git a/README.md b/README.md
@@ -73,12 +73,18 @@ The easiest way to get started with Thunder, requiring no extra installations or
 
 ## Install Thunder
 
-To use Thunder on your local machine:
+Thunder is in alpha and the latest development is happening on the `main` branch. You can install the latest version of Thunder from the `main` branch as follows:
 
-- install [nvFuser](https://github.com/NVIDIA/Fuser) and PyTorch stable together as follows:
+```bash
+pip install git+https://github.com/Lightning-AI/lightning-thunder.git@main
+```
+
+To achieve the best performance, you can install Thunder with the following additional dependencies:
+
+- install nightly [nvFuser](https://github.com/NVIDIA/Fuser) built for PyTorch 2.4 as follows:
 
 ```bash
-# install nvFuser which installs the matching stable PyTorch
+# install nvFuser built for the matching stable PyTorch
 pip install --pre nvfuser-cu121-torch24
 ```
 
@@ -89,35 +95,12 @@ pip install --pre nvfuser-cu121-torch24
 pip install nvidia-cudnn-frontend
 ```
 
-- Finally, install Thunder as follows:
-
-```
-# install thunder
-pip install lightning-thunder
-```
-
 <details>
   <summary>Advanced install options</summary>
     <!-- following section will be skipped from PyPI description -->
 
 &#160;
 
-### Install from main
-
-Alternatively, you can install the latest version of Thunder directly from this GitHub repository as follows:
-
-```
-# 1) Install nvFuser and PyTorch dependencies:
-pip install --pre nvfuser-cu121-torch24
-```
-
-```bash
-# 2) Install Thunder itself
-pip install git+https://github.com/Lightning-AI/lightning-thunder.git
-```
-
-&#160;
-
 ### Install to tinker and contribute
 
 If you are interested in tinkering with and contributing to Thunder, we recommend cloning the Thunder repository and installing it in pip's editable mode:

diff --git a/requirements/test.txt b/requirements/test.txt
@@ -23,3 +23,6 @@ transformers==4.43.3 # for test_networks.py
 # Installs JAX on Linux and MacOS
 jaxlib; sys_platform == 'linux' or sys_platform == 'darwin'  # required for jax, see https://github.com/google/jax#installation
 jax; sys_platform == 'linux' or sys_platform == 'darwin'  # for test_ops.py
+
+asvdb @ git+https://github.com/rapidsai/asvdb.git
+asv >=0.6.4
diff --git a/thunder/benchmarks/conftest.py b/thunder/benchmarks/conftest.py
@@ -0,0 +1,82 @@
+import os
+import platform
+import psutil
+from typing import Any
+import warnings
+import importlib.util
+
+
+def pytest_addoption(parser):
+    # CLI option to specify where to store the benchmark results in asv format.
+    # If not set or None, results won't be saved in asv.
+    parser.addoption("--asv_bench_dir", action="store", default=os.getenv("THUNDER_BENCH_DIR"))
+
+
+def pytest_sessionfinish(session, exitstatus):
+    # Save result only if the pytest session was a benchmark.
+    if hasattr(session.config, "_benchmarksession"):
+        save_benchmark_results_asv(session.config)
+
+
+def sanitize_params(benchmark_params: list[tuple[str, Any]]) -> list[tuple[str, Any]]:
+    """Util function that takes a list of params and removes serialization information. E.g. given '<function torch_executor at 0xffffffffff>' returns 'torch_executor'."""
+    sane_params = []
+    for k, v in benchmark_params:
+        if k == "executor":
+            sane_params += [(k, str(v).split()[1])]
+        else:
+            sane_params += [(k, v)]
+    return sane_params
+
+
+def save_benchmark_results_asv(config):
+    """Save the benchmark results after a pytest session in the asv format.
+    User must specify the --asv_bench_dir flag to store the results.
+    """
+
+    bench_dir = config.option.asv_bench_dir
+
+    if not importlib.util.find_spec("asv"):
+        warnings.warn("asvdb is not available. Results won't be saved in asv format.")
+        return
+
+    if not bench_dir:
+        warnings.warn("asv_bench_dir' is not set. Results won't be saved in asv format.")
+        return
+
+    from asvdb import utils, ASVDb, BenchmarkResult, BenchmarkInfo
+
+    benchmarks = config._benchmarksession.benchmarks
+
+    # Get system information to store alongside the results.
+    uname = platform.uname()
+    commit_hash, commit_time = utils.getCommitInfo()
+    repo_name, current_branch = utils.getRepoInfo()
+    python_version = platform.python_version()
+    memory_size = str(psutil.virtual_memory().total)
+
+    bench_info = BenchmarkInfo(
+        machineName=uname.machine,
+        osType=f"{uname.system} {uname.release}",
+        pythonVer=python_version,
+        commitHash=commit_hash,
+        commitTime=commit_time,
+        cpuType=uname.processor,
+        arch=uname.machine,
+        ram=memory_size,
+    )
+
+    # Create the asv result database.
+    db = ASVDb(dbDir=bench_dir, repo=repo_name, branches=[current_branch])
+
+    # Add all the benchmarks to the database.
+    for bench in benchmarks:
+        name = bench.name.split("[")[0]
+        params_pairs = sanitize_params(bench.params.items())
+        result = BenchmarkResult(
+            funcName=name,
+            argNameValuePairs=params_pairs,
+            result=bench.stats.median * 1e6,
+            unit="µseconds",
+        )
+        db.addResult(bench_info, result)
diff --git a/thunder/common.py b/thunder/common.py
@@ -303,7 +303,7 @@ def translate(x: Any, *, name: str | None = None) -> Any:
         if isinstance(x, Proxy):
             # register proxy name used by NumberProxies in TensorProxy.shape
             if isinstance(x, TensorProxy):
-                for s_p in filter(lambda s: isinstance(s, Proxy), x.shape):
+                for s_p in filter(lambda s: isinstance(s, Proxy), x._shape):
                     # TODO need to avoid name conflict here, since s_p.name
                     # could have conflicted with something defined earlier in
                     # the trace.