From 926121cfe48a2762ae8076e00ae1b1f50593bb45 Mon Sep 17 00:00:00 2001 From: Jirka Borovec <6035284+Borda@users.noreply.github.com> Date: Sat, 30 Mar 2024 11:27:51 +0100 Subject: [PATCH 01/12] ci: testing with `torch==2.3` /test and move `2.4` /nightly (#86) --- .azure/docker-build.yml | 4 +++- .azure/gpu-tests.yml | 13 +++++++++++-- dockers/ubuntu-cuda/Dockerfile | 9 ++++++++- 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/.azure/docker-build.yml b/.azure/docker-build.yml index a1803b596d..dc353f689f 100644 --- a/.azure/docker-build.yml +++ b/.azure/docker-build.yml @@ -42,7 +42,9 @@ jobs: # CUDA 12.1 'cuda 12.1 | torch 2.2 | cudnn FE v1.2': {CUDA_VERSION: '12.1.1', TORCH_VERSION: '2.2.1', TRITON_VERSION: '2.2.0', CUDNN_FRONTEND: "1.2.1"} - 'cuda 12.1 | torch 2.3 /nightly | cudnn FE v1.2': + 'cuda 12.1 | torch 2.3 /test | cudnn FE v1.2': + {CUDA_VERSION: '12.1.1', TORCH_VERSION: '2.3.0', TRITON_VERSION: '2.2.0', TORCH_INSTALL: 'test', CUDNN_FRONTEND: "1.2.1"} + 'cuda 12.1 | torch 2.4 /nightly | cudnn FE v1.2': {CUDA_VERSION: '12.1.1', TORCH_VERSION: 'main', TORCH_INSTALL: 'source', CUDNN_FRONTEND: "1.2.1"} #'cuda 12.1': # this version - '8.9.5.29-1+cuda12.1' for 'libcudnn8' was not found # how much time to give 'run always even if cancelled tasks' before stopping them diff --git a/.azure/gpu-tests.yml b/.azure/gpu-tests.yml index 2671f5309f..df030eaefd 100644 --- a/.azure/gpu-tests.yml +++ b/.azure/gpu-tests.yml @@ -23,6 +23,13 @@ jobs: docker-image: 'ubuntu22.04-cuda12.1.1-cudnn-fe1.2.1-py3.10-pt_2.2.1-apex' CUDA_VERSION_MM: '121' testing: 'distributed' + 'ubuntu22.04 | cuda 12.1 | python 3.10 | torch 2.3 | regular': + docker-image: 'ubuntu22.04-cuda12.1.1-cudnn-fe1.2.1-py3.10-pt_2.3.0-apex' + CUDA_VERSION_MM: '121' + 'ubuntu22.04 | cuda 12.1 | python 3.10 | torch 2.3 | distributed': + docker-image: 'ubuntu22.04-cuda12.1.1-cudnn-fe1.2.1-py3.10-pt_2.3.0-apex' + CUDA_VERSION_MM: '121' + testing: 'distributed' 'ubuntu22.04 | cuda 12.1 | python 3.10 | torch-nightly | regular': docker-image: 'ubuntu22.04-cuda12.1.1-cudnn-fe1.2.1-py3.10-pt_main-apex' CUDA_VERSION_MM: '121' @@ -30,8 +37,6 @@ jobs: docker-image: 'ubuntu22.04-cuda12.1.1-cudnn-fe1.2.1-py3.10-pt_main-apex' CUDA_VERSION_MM: '121' testing: 'distributed' - # how long to run the job before automatically cancelling - timeoutInMinutes: "35" # how much time to give 'run always even if cancelled tasks' before stopping them cancelTimeoutInMinutes: "2" pool: "lit-rtx-3090" @@ -89,6 +94,7 @@ jobs: --timeout=240 \ --random-order-seed=42 \ --durations=250 \ + --timeout=240 \ --numprocesses=9 \ --ignore=thunder/tests/distributed --ignore=thunder/tests/test_networks.py # compile coverage results @@ -98,6 +104,7 @@ jobs: ./codecov --token=$(CODECOV_TOKEN) --commit=$(Build.SourceVersion) \ --flags=gpu,pytest,regular --name="GPU-coverage" --env=linux,azure condition: ne(variables['testing'], 'distributed') + timeoutInMinutes: "30" displayName: 'Testing: regular' - bash: | @@ -117,6 +124,7 @@ jobs: ./codecov --token=$(CODECOV_TOKEN) --commit=$(Build.SourceVersion) \ --flags=gpu,pytest,networks --name="GPU-coverage" --env=linux,azure condition: ne(variables['testing'], 'distributed') + timeoutInMinutes: "15" displayName: 'Testing: networks' #- bash: | @@ -138,6 +146,7 @@ jobs: # ./codecov --token=$(CODECOV_TOKEN) --commit=$(Build.SourceVersion) \ # --flags=gpu,pytest,distributed --name="GPU-coverage" --env=linux,azure condition: eq(variables['testing'], 'distributed') + timeoutInMinutes: "20" displayName: 'Testing: distributed' # todo (mruberry): decide whether this should be here or in another workflow diff --git a/dockers/ubuntu-cuda/Dockerfile b/dockers/ubuntu-cuda/Dockerfile index 5fd03bc134..e37e29c216 100644 --- a/dockers/ubuntu-cuda/Dockerfile +++ b/dockers/ubuntu-cuda/Dockerfile @@ -138,7 +138,14 @@ RUN \ cd .. && \ rm -rf Fuser ; \ elif [ "${TORCH_INSTALL}" == "test" ]; then \ - echo "Not supported option" ; \ + # building nvFuser from source + git clone https://github.com/NVIDIA/Fuser.git && \ + cd Fuser && \ + git submodule update --init --recursive && \ + pip install -r requirements.txt && \ + python setup.py install --no-test --no-benchmark && \ + cd .. && \ + rm -rf Fuser ; \ else \ # installing pytorch from wheels \ CUDA_VERSION_MM=${CUDA_VERSION%.*} && \ From b7bed7bda0c075edd52df652fdc8afa33ec44d43 Mon Sep 17 00:00:00 2001 From: Jirka Borovec <6035284+Borda@users.noreply.github.com> Date: Sat, 30 Mar 2024 11:47:13 +0100 Subject: [PATCH 02/12] update codeowners for ci/cd (#89) --- .github/CODEOWNERS | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 8be0722378..11c973b123 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -5,5 +5,8 @@ * @mruberry @lantiga @robieta @t-vi @carmocca # CI/CD and configs -/.github/ @mruberry @lantiga @t-vi @carmocca -*.yml @mruberry @lantiga @t-vi @carmocca +/.azure/ @borda @lantiga @t-vi @carmocca +/.github/ @borda @lantiga @t-vi @carmocca +/dockers/ @borda @lantiga @t-vi @carmocca +Makefile @borda @lantiga @t-vi @carmocca +*.yml @borda @lantiga @t-vi @carmocca From 9e0f8cf66508b1a9f0e71b45c56345f295046ab8 Mon Sep 17 00:00:00 2001 From: Jirka Borovec <6035284+Borda@users.noreply.github.com> Date: Mon, 1 Apr 2024 10:35:18 +0200 Subject: [PATCH 03/12] Update dependabot - without labels --- .github/dependabot.yml | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/.github/dependabot.yml b/.github/dependabot.yml index f609753245..d49564db45 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -7,15 +7,9 @@ updates: - package-ecosystem: "pip" # Look for a `requirements` in the `root` directory directory: "/" - # Check for updates once a week schedule: interval: "monthly" - # Labels on pull requests for version updates only - labels: - - "ci / tests" pull-request-branch-name: - # Separate sections of the branch name with a hyphen - # for example, `dependabot-npm_and_yarn-next_js-acorn-6.4.1` separator: "-" # Allow up to 5 open pull requests for pip dependencies open-pull-requests-limit: 5 @@ -25,15 +19,9 @@ updates: # Enable version updates for GitHub Actions - package-ecosystem: "github-actions" directory: "/" - # Check for updates once a week schedule: interval: "monthly" - # Labels on pull requests for version updates only - labels: - - "ci / tests" pull-request-branch-name: - # Separate sections of the branch name with a hyphen - # for example, `dependabot-npm_and_yarn-next_js-acorn-6.4.1` separator: "-" # Allow up to 5 open pull requests for GitHub Actions open-pull-requests-limit: 5 From 2a90807a713e3f2eb7a7b094b67578e698c67950 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 1 Apr 2024 11:16:29 +0200 Subject: [PATCH 04/12] Bump pytest from 8.0.2 to 8.1.1 (#108) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements/test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/test.txt b/requirements/test.txt index 36afeba5f5..3bdb89e1ab 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -1,5 +1,5 @@ coverage ==7.4.3 -pytest ==8.0.2 +pytest ==8.1.1 pytest-timeout ==2.2.0 pytest-cov ==4.1.0 pytest-xdist ==3.5.0 From ebf9fd907d1cba77cd3bbea35143b2022b94164e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 1 Apr 2024 11:18:43 +0200 Subject: [PATCH 05/12] Bump ipython[all] from 8.22.2 to 8.23.0 (#110) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements/docs.txt | 2 +- requirements/notebooks.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements/docs.txt b/requirements/docs.txt index 69547efb64..64b473ccd8 100644 --- a/requirements/docs.txt +++ b/requirements/docs.txt @@ -1,7 +1,7 @@ sphinx ==5.3.0 myst-parser ==1.0.0 nbsphinx ==0.9.3 -ipython[all] ==8.22.2 +ipython[all] ==8.23.0 pandoc ==2.3 docutils >=0.16 sphinxcontrib-fulltoc ==1.2.0 diff --git a/requirements/notebooks.txt b/requirements/notebooks.txt index c60affa8b3..51bd98214e 100644 --- a/requirements/notebooks.txt +++ b/requirements/notebooks.txt @@ -1,3 +1,3 @@ -ipython[all] ==8.22.2 +ipython[all] ==8.23.0 litgpt @ git+https://github.com/Lightning-AI/lit-gpt@940ffc96f7214bca24aa77479bc7c33900aaef28 From adc29e526e85f42b38977262a95521ef0866b1f0 Mon Sep 17 00:00:00 2001 From: Jirka Borovec <6035284+Borda@users.noreply.github.com> Date: Mon, 1 Apr 2024 11:19:22 +0200 Subject: [PATCH 06/12] ci/cd: publishing nightly packages with `devYYYYMMMDD` (#105) --- .github/workflows/release-nightly.yml | 47 +++++++++++++++++++++++++++ .github/workflows/release-pypi.yml | 3 +- setup.py | 35 ++++++++++++++++++++ 3 files changed, 83 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/release-nightly.yml diff --git a/.github/workflows/release-nightly.yml b/.github/workflows/release-nightly.yml new file mode 100644 index 0000000000..08a0136694 --- /dev/null +++ b/.github/workflows/release-nightly.yml @@ -0,0 +1,47 @@ +name: Nightly packages + +on: + pull_request: # this shall test only the part of workflow before publishing + branches: [main, "release/*"] + types: [opened, reopened, ready_for_review, synchronize] + paths: + - ".github/workflows/release-nightly.yml" + schedule: + - cron: "0 0 * * 0" # on Sundays + workflow_dispatch: {} + +defaults: + run: + shell: bash + +jobs: + releasing-nightly: + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.10" + + - name: Install dependencies + run: python -m pip install --user --upgrade setuptools wheel + - name: Build + env: + CONVERT_VERSION2NIGHTLY: "1" + run: python setup.py sdist bdist_wheel + + # We do this, since failures on test.pypi aren't that bad + - name: Publish to Test PyPI + if: startsWith(github.event.ref, 'refs/tags') || github.event_name == 'release' + uses: pypa/gh-action-pypi-publish@v1.8.14 + with: + user: __token__ + password: ${{ secrets.test_pypi_password }} + repository_url: https://test.pypi.org/legacy/ + + - name: Publish distribution 📦 to PyPI + if: startsWith(github.event.ref, 'refs/tags') || github.event_name == 'release' + uses: pypa/gh-action-pypi-publish@v1.8.14 + with: + user: __token__ + password: ${{ secrets.pypi_password }} diff --git a/.github/workflows/release-pypi.yml b/.github/workflows/release-pypi.yml index 078f9e6066..cfcf1409eb 100644 --- a/.github/workflows/release-pypi.yml +++ b/.github/workflows/release-pypi.yml @@ -10,9 +10,8 @@ on: # Trigger the workflow on push or pull request, but only for the main branc # based on https://github.com/pypa/gh-action-pypi-publish jobs: - build: + releasing-pypi: runs-on: ubuntu-22.04 - steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 diff --git a/setup.py b/setup.py index b0ee14e897..817caa05da 100755 --- a/setup.py +++ b/setup.py @@ -1,6 +1,7 @@ #!/usr/bin/env python import glob import os +import re from importlib.util import module_from_spec, spec_from_file_location from pathlib import Path @@ -10,6 +11,8 @@ _PATH_ROOT = os.path.dirname(__file__) _PATH_REQUIRES = os.path.join(_PATH_ROOT, "requirements") +# check if os env. variable is set to convert version to nightly +_CONVERT_VERSION = int(os.environ.get("CONVERT_VERSION2NIGHTLY", 0)) def _load_py_module(fname, pkg="thunder"): @@ -19,6 +22,35 @@ def _load_py_module(fname, pkg="thunder"): return py +def convert_version2nightly(about_file: str = "thunder/__about__.py") -> None: + """Load the actual version and convert it to the nightly version.""" + from datetime import datetime + + # load the about file + with open(about_file) as fo: + lines = fo.readlines() + idx = None + # find the line with version + for i, ln in enumerate(lines): + if ln.startswith("__version__"): + idx = i + break + if idx is None: + raise ValueError("The version is not found in the `__about__.py` file.") + # parse the version from variable assignment + version = lines[idx].split("=")[1].strip().strip('"') + # parse X.Y.Z version and prune any suffix + vers = re.match(r"(\d+)\.(\d+)\.(\d+).*", version) + # create timestamp YYYYMMDD + timestamp = datetime.now().strftime("%Y%m%d") + version = f"{'.'.join(vers.groups())}.dev{timestamp}" + # print the new version + lines[idx] = f'__version__ = "{version}"\n' + # dump updated lines + with open(about_file, "w") as fo: + fo.writelines(lines) + + def _load_requirements(path_dir: str, file_name: str = "requirements.txt") -> list: reqs = parse_requirements(open(os.path.join(path_dir, file_name)).readlines()) return [r for r in list(map(str, reqs)) if "@" not in r] @@ -56,6 +88,9 @@ def _load_readme_description(path_dir: str, homepage: str, version: str) -> str: return text +if _CONVERT_VERSION: + convert_version2nightly() + about = _load_py_module("__about__.py") # https://packaging.python.org/discussions/install-requires-vs-requirements / From 6adaa2637ccf3b06e9e7212afbb48d5d0eb0dd9c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 1 Apr 2024 11:20:24 +0200 Subject: [PATCH 07/12] Bump pytest-timestamper from 0.0.9 to 0.0.10 (#111) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements/test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/test.txt b/requirements/test.txt index 3bdb89e1ab..762070767f 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -4,7 +4,7 @@ pytest-timeout ==2.2.0 pytest-cov ==4.1.0 pytest-xdist ==3.5.0 pytest-random-order ==1.1.1 -pytest-timestamper ==0.0.9 +pytest-timestamper ==0.0.10 graphviz ==0.20.1 fdm ==0.4.1 expecttest ==0.2.1 # for test_ddp.py From 813560c290119fe5ba3a48a43570b1e6b0c4e91e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 1 Apr 2024 10:59:33 +0000 Subject: [PATCH 08/12] Bump codecov/codecov-action from 3 to 4 (#113) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/ci-testing.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml index 310279fbbd..492a1754d6 100644 --- a/.github/workflows/ci-testing.yml +++ b/.github/workflows/ci-testing.yml @@ -115,7 +115,7 @@ jobs: coverage xml - name: Upload coverage to Codecov - uses: codecov/codecov-action@v3 + uses: codecov/codecov-action@v4 with: token: ${{ secrets.CODECOV_TOKEN }} file: ./coverage.xml From b5dae0671f63ffb03fafe9e7414b5d70420328a7 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 1 Apr 2024 13:44:51 +0200 Subject: [PATCH 09/12] Bump hypothesis from 6.99.10 to 6.100.0 (#107) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements/test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/test.txt b/requirements/test.txt index 762070767f..87bd2b0105 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -8,7 +8,7 @@ pytest-timestamper ==0.0.10 graphviz ==0.20.1 fdm ==0.4.1 expecttest ==0.2.1 # for test_ddp.py -hypothesis ==6.99.10 # for test_ddp.py +hypothesis ==6.100.0 # for test_ddp.py numpy # for test_ops.py einops # for test_einops.py litgpt @ git+https://github.com/Lightning-AI/lit-gpt@940ffc96f7214bca24aa77479bc7c33900aaef28 From 6823125a32dc018f2877e72393dd9a607adc5419 Mon Sep 17 00:00:00 2001 From: Jirka Borovec <6035284+Borda@users.noreply.github.com> Date: Mon, 1 Apr 2024 13:46:34 +0200 Subject: [PATCH 10/12] bump pkg version to `0.2` (#106) --- thunder/__about__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thunder/__about__.py b/thunder/__about__.py index 15e838ef4f..4e812d79f6 100644 --- a/thunder/__about__.py +++ b/thunder/__about__.py @@ -1,4 +1,4 @@ -__version__ = "0.1.0" +__version__ = "0.2.0dev" __author__ = "Lightning-AI et al" __author_email__ = "community@lightning.ai" __license__ = "Apache 2.0" From a62a00d8b7bafe8fde6104b21317e065234dca8a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 1 Apr 2024 14:00:48 +0200 Subject: [PATCH 11/12] Bump Lightning-AI/utilities from 0.10.1 to 0.11.2 (#112) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> --- .github/workflows/ci-checks.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci-checks.yml b/.github/workflows/ci-checks.yml index 37fff68466..ada9976bdd 100644 --- a/.github/workflows/ci-checks.yml +++ b/.github/workflows/ci-checks.yml @@ -11,19 +11,19 @@ concurrency: jobs: precommit-run: - uses: Lightning-AI/utilities/.github/workflows/check-precommit.yml@v0.10.1 + uses: Lightning-AI/utilities/.github/workflows/check-precommit.yml@v0.11.2 with: python-version: "3.10" check-schema: - uses: Lightning-AI/utilities/.github/workflows/check-schema.yml@v0.11.0 + uses: Lightning-AI/utilities/.github/workflows/check-schema.yml@v0.11.2 with: azure-dir: ".azure" check-package: - uses: Lightning-AI/utilities/.github/workflows/check-package.yml@v0.11.0 + uses: Lightning-AI/utilities/.github/workflows/check-package.yml@v0.11.2 with: - actions-ref: v0.11.0 + actions-ref: v0.11.2 import-name: "thunder" artifact-name: dist-packages-${{ github.sha }} testing-matrix: | From 56bc01b8e17ba9551395a23c639fae79bc9c25b3 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 1 Apr 2024 14:02:57 +0200 Subject: [PATCH 12/12] Bump coverage from 7.4.3 to 7.4.4 (#109) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements/test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/test.txt b/requirements/test.txt index 87bd2b0105..32501c9667 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -1,4 +1,4 @@ -coverage ==7.4.3 +coverage ==7.4.4 pytest ==8.1.1 pytest-timeout ==2.2.0 pytest-cov ==4.1.0