From 9fc8ab74f16f957dbb74215bf616c70aeafbf13f Mon Sep 17 00:00:00 2001 From: Mike Henry <11765982+mikemhenry@users.noreply.github.com> Date: Mon, 26 Aug 2024 14:07:59 -0700 Subject: [PATCH] Use OMSF action for AWS self-hosted runner (#744) --- .github/workflows/gpu-runner.yaml | 107 +++++++++++++++++ .github/workflows/self-hosted-gpu-test.yml | 130 --------------------- 2 files changed, 107 insertions(+), 130 deletions(-) create mode 100644 .github/workflows/gpu-runner.yaml delete mode 100644 .github/workflows/self-hosted-gpu-test.yml diff --git a/.github/workflows/gpu-runner.yaml b/.github/workflows/gpu-runner.yaml new file mode 100644 index 00000000..46e4308c --- /dev/null +++ b/.github/workflows/gpu-runner.yaml @@ -0,0 +1,107 @@ +name: Self-Hosted Runner +on: + workflow_dispatch: + +jobs: + start-aws-runner: + runs-on: ubuntu-latest + permissions: + id-token: write + contents: read + outputs: + mapping: ${{ steps.aws-start.outputs.mapping }} + steps: + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: arn:aws:iam::009563297724:role/gha-runner-omsf + aws-region: us-east-1 + - name: Create cloud runner + id: aws-start + uses: omsf-eco-infra/gha-runner@v0.2.0 + with: + provider: "aws" + action: "start" + aws_image_id: ami-053912f3a44543f8c + aws_instance_type: g4dn.xlarge + aws_region_name: us-east-1 + aws_home_dir: /home/ubuntu + env: + GH_PAT: ${{ secrets.GH_PAT }} + + self-hosted-test: + runs-on: self-hosted + timeout-minutes: 720 # 12 hours + defaults: + run: + shell: bash -leo pipefail {0} + env: + OE_LICENSE: ${{ github.workspace }}/oe_license.txt + + needs: + - start-aws-runner + steps: + - uses: actions/checkout@v4 + + - name: Print disk usage + run: "df -h" + + - name: Print Docker details + run: "docker version || true" + + - name: Check for nvidia-smi + run: "nvidia-smi" + + - name: "Setup Micromamba" + uses: mamba-org/setup-micromamba@v1 + with: + environment-file: devtools/conda-envs/test_env.yaml + environment-name: openfe_env + + - name: "Check if OpenMM can get a GPU" + run: python -m openmm.testInstallation + + - name: "Install" + run: python -m pip install --no-deps -e . + + - name: "Environment Information" + run: | + micromamba info + micromamba list + pip list + + - name: Test OE License & Write License to File + env: + OE_LICENSE_TEXT: ${{ secrets.OE_LICENSE }} + run: | + echo "${OE_LICENSE_TEXT}" > ${OE_LICENSE} + python -c "import openeye; assert openeye.oechem.OEChemIsLicensed(), 'OpenEye license checks failed!'" + + - name: "Run tests" + run: | + pytest -n 4 -v --durations=10 --cov=openmmtools --cov-report=term + + stop-aws-runner: + runs-on: ubuntu-latest + permissions: + id-token: write + contents: read + needs: + - start-aws-runner + - self-hosted-test + if: ${{ always() }} + steps: + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: arn:aws:iam::009563297724:role/gha-runner-omsf + aws-region: us-east-1 + - name: Stop instances + uses: omsf-eco-infra/gha-runner@v0.2.0 + with: + provider: "aws" + action: "stop" + instance_mapping: ${{ needs.start-aws-runner.outputs.mapping }} + aws_region_name: us-east-1 + env: + GH_PAT: ${{ secrets.GH_PAT }} diff --git a/.github/workflows/self-hosted-gpu-test.yml b/.github/workflows/self-hosted-gpu-test.yml deleted file mode 100644 index 2994e91f..00000000 --- a/.github/workflows/self-hosted-gpu-test.yml +++ /dev/null @@ -1,130 +0,0 @@ -name: self-hosted-gpu-test -on: - push: - branches: - - master - - main - workflow_dispatch: - schedule: - # nightly tests - - cron: "0 0 * * *" -jobs: - start-runner: - name: Start self-hosted EC2 runner - runs-on: ubuntu-latest - outputs: - label: ${{ steps.start-ec2-runner.outputs.label }} - ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }} - steps: - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v1 - with: - aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - aws-region: ${{ secrets.AWS_REGION }} - - name: Start EC2 runner - id: start-ec2-runner - uses: machulav/ec2-github-runner@main - with: - mode: start - github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} - ec2-image-id: ami-04d16a12bbc76ff0b - ec2-instance-type: g4dn.xlarge - subnet-id: subnet-0dee8543e12afe0cd # us-east-1a - security-group-id: sg-0f9809618550edb98 - # iam-role-name: self-hosted-runner # optional, requires additional permissions - aws-resource-tags: > # optional, requires additional permissions - [ - {"Key": "Name", "Value": "ec2-github-runner"}, - {"Key": "GitHubRepository", "Value": "${{ github.repository }}"} - ] - do-the-job: - name: Do the job on the runner - needs: start-runner # required to start the main job when the runner is ready - runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner - env: - TEST_MODE: GPU - OPENMM: ${{ matrix.cfg.openmm }} - OE_LICENSE: ${{ github.workspace }}/oe_license.txt - HOME: /home/ec2-user - CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} - - defaults: - run: - shell: bash -l {0} - steps: - - uses: actions/checkout@v3 - - uses: conda-incubator/setup-miniconda@v2 - with: - installer-url: https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-Linux-x86_64.sh - python-version: "3.10" - activate-environment: test - channels: conda-forge,defaults - environment-file: devtools/conda-envs/test_env.yaml - auto-activate-base: false - auto-update-conda: true - show-channel-urls: true - - - name: Refine test env - shell: bash -l {0} - run: | - mamba install -y cudatoolkit==11.7 openmm>=8.0 - - - name: Additional info about the build - shell: bash -l {0} - run: | - uname -a - df -h - ulimit -a - conda info -a - conda list - python -c "import openmm; print(openmm.Platform.getPluginLoadFailures())" - python -m openmm.testInstallation - - - name: Install package - shell: bash -l {0} - run: | - python -m pip install --no-deps -v . - - - name: Environment Information - shell: bash -l {0} - run: | - conda info -a - conda list - - - name: Test the package - shell: bash -l {0} - run: | - pytest -v --cov-report xml --durations=0 --cov=openmmtools openmmtools/tests - - - name: Codecov - if: ${{ github.repository == 'choderalab/openmmtools' - && github.event != 'schedule' }} - uses: codecov/codecov-action@v1 - with: - file: ./coverage.xml - name: codecov-${{ matrix.cfg.os }}-py${{ matrix.cfg.python-version }} - flags: unittests - fail_ci_if_error: false - - stop-runner: - name: Stop self-hosted EC2 runner - needs: - - start-runner # required to get output from the start-runner job - - do-the-job # required to wait when the main job is done - runs-on: ubuntu-20.04 - if: ${{ always() }} # required to stop the runner even if the error happened in the previous jobs - steps: - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v1 - with: - aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - aws-region: ${{ secrets.AWS_REGION }} - - name: Stop EC2 runner - uses: machulav/ec2-github-runner@main - with: - mode: stop - github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} - label: ${{ needs.start-runner.outputs.label }} - ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }}