Skip to content

Commit

Permalink
Refactoring UnitTests.yml for concurrency.
Browse files Browse the repository at this point in the history
  • Loading branch information
shralex committed Dec 27, 2024
1 parent 5bba955 commit a4659d0
Show file tree
Hide file tree
Showing 19 changed files with 450 additions and 674 deletions.
113 changes: 113 additions & 0 deletions .github/workflows/RunTests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python

name: Tests

on:
pull_request:
push:
branches: [ "main" ]
workflow_dispatch:
schedule:
# Run the job every 4 hours
- cron: '0 */4 * * *'

jobs:
prelim:
runs-on: ["self-hosted"]
steps:
- name: Test gsutil installation
run: which gsutil >/dev/null 2>&1 || { echo >&2 "gsutil is required but not installed. Aborting"; exit 24;}
- name: Cleanup old docker images
run: docker system prune --all --force

tpu_image:
needs: prelim
uses: ./.github/workflows/build_upload_internal.yml
with:
device_type: tpu
device_name: v4-8
build_mode: stable

gpu_image:
needs: prelim
uses: ./.github/workflows/build_upload_internal.yml
with:
device_type: gpu
device_name: a100-40gb-4
build_mode: pinned

tpu_unit_tests:
needs: tpu_image
uses: ./.github/workflows/run_tests_internal.yml
with:
device_type: tpu
device_name: v4-8
pytest_marker: 'not gpu_only and not integration_test'
test_directory: 'tests'
xla_python_client_mem_fraction: 0.75
tf_force_gpu_allow_growth: false
container_resource_option: "--privileged"

tpu_integration_tests:
needs: tpu_image
uses: ./.github/workflows/run_tests_internal.yml
with:
device_type: tpu
device_name: v4-8
pytest_marker: 'not gpu_only and integration_test'
test_directory: 'tests/integration_tests'
xla_python_client_mem_fraction: 0.75
tf_force_gpu_allow_growth: false
container_resource_option: "--privileged"

gpu_unit_tests:
needs: gpu_image
uses: ./.github/workflows/run_tests_internal.yml
with:
device_type: gpu
device_name: a100-40gb-4
pytest_marker: 'not tpu_only and not integration_test'
test_directory: 'tests'
xla_python_client_mem_fraction: 0.65
tf_force_gpu_allow_growth: true
container_resource_option: "--shm-size 2g --runtime=nvidia --gpus all --privileged"

gpu_integration_tests:
needs: gpu_image
uses: ./.github/workflows/run_tests_internal.yml
with:
device_type: gpu
device_name: a100-40gb-4
pytest_marker: 'not tpu_only and integration_test'
test_directory: 'tests/integration_tests'
xla_python_client_mem_fraction: 0.65
tf_force_gpu_allow_growth: true
container_resource_option: "--shm-size 2g --runtime=nvidia --gpus all --privileged"


clean_up:
if: ${{ always() }} # always execute, regardless of previous jobs or steps.
needs: [gpu_unit_tests, gpu_integration_tests, tpu_unit_tests, tpu_integration_tests]
name: "Clean up"
runs-on: ["self-hosted"]
steps:
- name: Delete GPU image
run: gcloud container images delete gcr.io/tpu-prod-env-multipod/maxtext_${{ github.run_id }}:gpu --force-delete-tags --quiet
- name: Delete TPU image
run: gcloud container images delete gcr.io/tpu-prod-env-multipod/maxtext_${{ github.run_id }}:tpu --force-delete-tags --quiet

107 changes: 0 additions & 107 deletions .github/workflows/UnitTests.yml

This file was deleted.

47 changes: 47 additions & 0 deletions .github/workflows/build_upload_internal.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# This file defines a module for building and uploading an image used in UnitTests.yml

name: Build and Upload Image

on:
workflow_call:
inputs:
device_type:
required: true
type: string
device_name:
required: true
type: string
build_mode:
required: true
type: string

jobs:
build_and_upload:
name: Build and upload image (${{ inputs.device_name }})
runs-on: ["self-hosted", "${{ inputs.device_type }}", "${{ inputs.device_name }}"]
steps:
- uses: actions/checkout@v4
- name: Build an image
run: |
bash docker_build_dependency_image.sh MODE=${{ inputs.build_mode }} DEVICE=${{ inputs.device_type }}
- name: Tag the image
run: |
docker tag maxtext_base_image gcr.io/tpu-prod-env-multipod/maxtext_${{ github.run_id }}:${{ inputs.device_type }}
- name: Upload the image
run: |
docker push gcr.io/tpu-prod-env-multipod/maxtext_${{ github.run_id }}:${{ inputs.device_type }}
60 changes: 60 additions & 0 deletions .github/workflows/run_tests_internal.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# This file defines a module for running tests used in UnitTests.yml

name: Run Tests

on:
workflow_call:
inputs:
device_type:
required: true
type: string
device_name:
required: true
type: string
pytest_marker:
required: true
type: string
test_directory:
required: true
type: string
xla_python_client_mem_fraction:
required: true
type: string
tf_force_gpu_allow_growth:
required: true
type: string
container_resource_option:
required: true
type: string

jobs:
run:
runs-on: ["self-hosted", "${{ inputs.device_type }}", "${{ inputs.device_name }}"]
container:
image: gcr.io/tpu-prod-env-multipod/maxtext_${{ github.run_id }}:${{ inputs.device_type }}
volumes:
- /home/runner/actions-runner/_work/maxtext/maxtext:/deps
env:
XLA_PYTHON_CLIENT_MEM_FRACTION: ${{ inputs.xla_python_client_mem_fraction }}
TF_FORCE_GPU_ALLOW_GROWTH: ${{ inputs.tf_force_gpu_allow_growth }}
options: ${{ inputs.container_resource_option }}
steps:
- uses: actions/checkout@v4
- name: Run Tests
run: |
cd MaxText
python3 -m pytest ${{ inputs.test_directory }} -m "${{ inputs.pytest_marker }}"
55 changes: 0 additions & 55 deletions MaxText/tests/decode_int8_test.py

This file was deleted.

Loading

0 comments on commit a4659d0

Please sign in to comment.