Skip to content

Commit

Permalink
Merge branch 'main' into tom/line-breaks
Browse files Browse the repository at this point in the history
  • Loading branch information
t-vi authored Mar 22, 2024
2 parents fe131af + 5f6d3d3 commit 7fb90bd
Show file tree
Hide file tree
Showing 30 changed files with 818 additions and 743 deletions.
24 changes: 24 additions & 0 deletions .azure/gpu-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,14 @@ jobs:
# drop pt from requirements so not to interfere with the existing one
bash .azure/remove-torch-lines.sh requirements/base.txt
cat requirements/base.txt
# double check on test requirements
pip install -r requirements/test.txt
# https://docs.codecov.com/docs/codecov-uploader
curl -Os https://uploader.codecov.io/latest/linux/codecov
chmod +x codecov
# install this package
python setup.py develop
displayName: 'Install package & ...'
Expand All @@ -85,6 +91,12 @@ jobs:
--durations=250 \
--numprocesses=9 \
--ignore=thunder/tests/distributed --ignore=thunder/tests/test_networks.py
# compile coverage results
python -m coverage report
python -m coverage xml
# upload to codecov
./codecov --token=$(CODECOV_TOKEN) --commit=$(Build.SourceVersion) \
--flags=gpu,pytest,regular --name="GPU-coverage" --env=linux,azure
condition: ne(variables['testing'], 'distributed')
displayName: 'Testing: regular'
Expand All @@ -95,6 +107,12 @@ jobs:
thunder/tests/test_networks.py \
-m "not standalone" \
-v --random-order-seed=42 --durations=0 --numprocesses=3
# compile coverage results
python -m coverage report
python -m coverage xml
# upload to codecov
./codecov --token=$(CODECOV_TOKEN) --commit=$(Build.SourceVersion) \
--flags=gpu,pytest,networks --name="GPU-coverage" --env=linux,azure
condition: ne(variables['testing'], 'distributed')
displayName: 'Testing: networks'
Expand All @@ -108,6 +126,12 @@ jobs:
- bash: |
# run all found tests in given past as standalone
bash scripts/run_standalone_tests.sh "thunder/tests/distributed"
# compile coverage results
python -m coverage report
python -m coverage xml
# upload to codecov
./codecov --token=$(CODECOV_TOKEN) --commit=$(Build.SourceVersion) \
--flags=gpu,pytest,distributed --name="GPU-coverage" --env=linux,azure
condition: eq(variables['testing'], 'distributed')
displayName: 'Testing: distributed'
Expand Down
5 changes: 4 additions & 1 deletion .github/PULL_REQUEST_TEMPLATE.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
# Before submitting
<details>
<summary><b>Before submitting</b></summary>

- [ ] Was this discussed/approved via a Github issue? (no need for typos and docs improvements)
- [ ] Did you read the [contributor guideline](https://github.com/Lightning-AI/pytorch-lightning/blob/main/.github/CONTRIBUTING.md), Pull Request section?
- [ ] Did you make sure to update the docs?
- [ ] Did you write any new necessary tests?

</details>

## What does this PR do?

Fixes # (issue).
Expand Down
18 changes: 9 additions & 9 deletions .github/workflows/ci-testing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -114,15 +114,15 @@ jobs:
coverage report
coverage xml
#- name: Upload coverage to Codecov
# uses: codecov/codecov-action@v3
# with:
# token: ${{ secrets.CODECOV_TOKEN }}
# file: ./coverage.xml
# flags: unittests
# env_vars: OS,PYTHON
# name: codecov-umbrella
# fail_ci_if_error: false
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
with:
token: ${{ secrets.CODECOV_TOKEN }}
file: ./coverage.xml
flags: unittests
env_vars: OS,PYTHON
name: codecov-umbrella
fail_ci_if_error: false


testing-guardian:
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/docs-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ defaults:
shell: bash

jobs:
build-docs:
docs-make:
uses: Lightning-AI/utilities/.github/workflows/check-docs.yml@v0.11.0
with:
python-version: "3.10"
Expand All @@ -28,7 +28,7 @@ jobs:
env:
GCP_TARGET: "gs://lightning-docs-thunder"
steps:
- uses: actions/download-artifact@v4
- uses: actions/download-artifact@v3
with:
name: docs-html-${{ github.sha }}
path: docs/build/
Expand All @@ -50,7 +50,7 @@ jobs:

# Uploading docs to GCS, so they can be served on lightning.ai
- name: Upload docs/thunder/latest to GCS 🪣
if: github.ref == 'refs/heads/master'
if: github.ref == 'refs/heads/main'
run: gsutil -m rsync -d -R docs/build/html/ ${GCP_TARGET}/latest

# Uploading docs to GCS, so they can be served on lightning.ai
Expand Down
3 changes: 3 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ repos:
- id: check-toml
- id: check-json
- id: check-added-large-files
with:
maxkb: 250
enforce-all: true
- id: check-docstring-first
- id: detect-private-key

Expand Down
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
<div align="center">
<img alt="Thunder" src="docs/source/_static/images/lightning_thunder_lightmode_nobyline.png" width="400px" style="max-width: 100%;">
<img alt="Thunder" src="docs/source/_static/images/LightningThunderLightModewByline.png#gh-light-mode-only" width="400px" style="max-width: 100%;">
<img alt="Thunder" src="docs/source/_static/images/LightningThunderDarkModewByline.png#gh-dark-mode-only" width="400px" style="max-width: 100%;">
<br/>
<br/>

Expand Down
30 changes: 21 additions & 9 deletions dockers/ubuntu-cuda/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ ARG CUDNN_FRONTEND_CHECKOUT="v1.1.0"
ARG PYTHON_VERSION="3.10"
ARG TORCH_VERSION="2.2.1"
ARG TRITON_VERSION="2.2.0"
ARG TORCH_INSTALL="stable"

SHELL ["/bin/bash", "-c"]
# https://techoverflow.net/2019/05/18/how-to-fix-configuring-tzdata-interactive-input-when-building-docker-images/
Expand Down Expand Up @@ -96,7 +97,7 @@ ENV \
TORCH_CUDA_ARCH_LIST="8.0" \
CUDA_SELECT_NVCC_ARCH_FLAGS="8.0"

ARG TORCH_INSTALL="wheel"
ARG TORCH_INSTALL

RUN \
if [ "${TORCH_INSTALL}" == "source" ]; then \
Expand All @@ -122,15 +123,26 @@ RUN \
--index-url="https://download.pytorch.org/whl/cu${CUDA_VERSION_MM//'.'/''}"; \
fi

ARG TORCH_INSTALL

RUN \
# building nvFuser from source
git clone https://github.com/NVIDIA/Fuser.git && \
cd Fuser && \
git submodule update --init --recursive && \
pip install -r requirements.txt && \
python setup.py install --no-test --no-benchmark && \
cd .. && \
rm -rf Fuser
if [ "${TORCH_INSTALL}" == "source" ]; then \
# building nvFuser from source
git clone https://github.com/NVIDIA/Fuser.git && \
cd Fuser && \
git submodule update --init --recursive && \
pip install -r requirements.txt && \
python setup.py install --no-test --no-benchmark && \
cd .. && \
rm -rf Fuser ; \
elif [ "${TORCH_INSTALL}" == "test" ]; then \
echo "Not supported option" ; \
else \
# installing pytorch from wheels \
CUDA_VERSION_MM=${CUDA_VERSION%.*} && \
TORCH_VERSION_MM=${TORCH_VERSION%.*} && \
pip install -U "nvfuser-cu${CUDA_VERSION_MM/./}-torch${TORCH_VERSION_MM/./}" ; \
fi

RUN \
ls -lh requirements/ && \
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/source/_static/images/training_throughput_single.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
7 changes: 6 additions & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def _transform_changelog(path_in: str, path_out: str) -> None:
"sphinx.ext.linkcode",
"sphinx.ext.autosummary",
"sphinx.ext.napoleon",
"sphinx.ext.imgmath",
"sphinx.ext.mathjax",
"myst_parser",
"nbsphinx",
"sphinx_autodoc_typehints",
Expand Down Expand Up @@ -209,6 +209,11 @@ def _transform_changelog(path_in: str, path_out: str) -> None:
(master_doc, project + ".tex", project + " Documentation", author, "manual"),
]

# MathJax configuration
mathjax3_config = {
"tex": {"packages": {"[+]": ["ams", "newcommand", "configMacros"]}},
}

# -- Options for manual page output ------------------------------------------

# One entry per manual page. List of tuples
Expand Down
2 changes: 1 addition & 1 deletion notebooks/dev_tutorials/fsdp_tutorial.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1764,7 +1764,7 @@
"%%writefile thunder_fsdp_simple_example.py\n",
"\n",
"# imports\n",
"from thunder.tests.lit_gpt_model import GPT, Config\n",
"from thunder.tests.litgpt_model import GPT, Config\n",
"import torch\n",
"import torch.distributed\n",
"import thunder\n",
Expand Down
18 changes: 9 additions & 9 deletions notebooks/zero_to_thunder.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -312,8 +312,8 @@
}
],
"source": [
"from lit_gpt import GPT\n",
"from thunder.tests.lit_gpt_model import Config\n",
"from litgpt import GPT\n",
"from thunder.tests.litgpt_model import Config\n",
"cfg = Config.from_name('Llama-2-7b-hf')\n",
"cfg.n_layer = 16 # fewer layers\n",
"torch.set_default_dtype(torch.bfloat16)\n",
Expand Down Expand Up @@ -3326,7 +3326,7 @@
],
"source": [
"%%writefile zero_to_thunder_fsdp_simple_example.py\n",
"from thunder.tests.lit_gpt_model import GPT, Config\n",
"from thunder.tests.litgpt_model import GPT, Config\n",
"import os\n",
"import torch, torch.distributed\n",
"import thunder, thunder.distributed\n",
Expand Down Expand Up @@ -3470,7 +3470,7 @@
},
"outputs": [],
"source": [
"import lit_gpt\n",
"import litgpt\n",
"def apply_rope_copy(x: torch.Tensor, cos: torch.Tensor, sin: torch.Tensor) -> torch.Tensor:\n",
" head_size = x.size(-1)\n",
" x1 = x[..., : head_size // 2] # (B, nh, T, hs/2)\n",
Expand All @@ -3493,7 +3493,7 @@
"\n",
"Say we have a function `apply_rope` applying the RoPE transformation in PyTorch.\n",
"\n",
"In thunder, we define a *meta* function that only defines the metadata (like shapes) of outputs and the actual implementation for each operator and then register the pair with our executor using the `register_operator` function and tell it to use the new symbol instead of the original function `lit_gpt.model.apply_rope`.\n"
"In thunder, we define a *meta* function that only defines the metadata (like shapes) of outputs and the actual implementation for each operator and then register the pair with our executor using the `register_operator` function and tell it to use the new symbol instead of the original function `litgpt.model.apply_rope`.\n"
]
},
{
Expand All @@ -3504,17 +3504,17 @@
"outputs": [],
"source": [
"import torch, thunder\n",
"from thunder.tests.lit_gpt_model import GPT\n",
"from thunder.tests.litgpt_model import GPT\n",
"from thunder import TensorProxy\n",
"\n",
"def apply_rope_impl(x: torch.Tensor, cos: torch.Tensor, sin: torch.Tensor) -> torch.Tensor:\n",
" return lit_gpt.model.apply_rope(x, cos, sin)\n",
" return litgpt.model.apply_rope(x, cos, sin)\n",
"\n",
"def apply_rope_meta(x: TensorProxy, cos: TensorProxy, sin: TensorProxy) -> TensorProxy:\n",
" return TensorProxy(like=x)\n",
"\n",
"apply_rope = my_ex.register_operator('apply_rope', like=apply_rope_meta, fn=apply_rope_impl,\n",
" replaces=lit_gpt.model.apply_rope)"
" replaces=litgpt.model.apply_rope)"
]
},
{
Expand Down Expand Up @@ -3569,7 +3569,7 @@
"with torch.device('cuda'): m = GPT.from_name('llama2-like'); Q = torch.randn(2, 128, 4096, 16)\n",
"\n",
"def test_apply_rope(x, m):\n",
" return lit_gpt.model.apply_rope(x, m.cos, m.sin)\n",
" return litgpt.model.apply_rope(x, m.cos, m.sin)\n",
"\n",
"thunder_apply_rope = thunder.jit(test_apply_rope, executors=(my_ex,) + thunder.get_default_executors()) \n",
"\n",
Expand Down
2 changes: 2 additions & 0 deletions requirements/notebooks.txt
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
ipython[all] ==8.22.2

litgpt @ git+https://github.com/Lightning-AI/lit-gpt@24d5eba1724c953b7506edc041a7da1ce226c129
2 changes: 1 addition & 1 deletion requirements/test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ expecttest ==0.2.1 # for test_ddp.py
hypothesis ==6.99.10 # for test_ddp.py
numpy # for test_ops.py
einops # for test_einops.py
lit_gpt @ git+https://github.com/Lightning-AI/lit-gpt@f241d94df59d82b2017bfdcd3800ac8779eb45f5
litgpt @ git+https://github.com/Lightning-AI/lit-gpt@24d5eba1724c953b7506edc041a7da1ce226c129
absl-py # thunder/benchmarks/test_benchmark_litgpt.py
pandas # thunder/benchmarks/test_benchmark_litgpt.py
xlsxwriter # thunder/benchmarks/test_benchmark_litgpt.py
Expand Down
Loading

0 comments on commit 7fb90bd

Please sign in to comment.