Skip to content

Test PR

Test PR #79

Workflow file for this run

name: New leaderboard entry
on:
issue_comment:
types: [created]
jobs:
add-new-entry:
if: ${{ github.event.issue.pull_request && startsWith(github.event.comment.body, '/add-to-leaderboard') }}
runs-on: ubuntu-latest
steps:
# - name: Install LFS
# run: git lfs install
- name: Checkout code
uses: actions/checkout@v2
with:
lfs: true
# fetch-depth: 0
# - name: Pull and checkout LFS content
# run: |
# git lfs fetch
# git lfs checkout
- name: Show LFS file size
run: du -sh experiments/outputs/full_code_refl_deepseekcoder_test_challenge/leaderboard.bundle
# name: New leaderboard entry
# on:
# issue_comment:
# types: [created]
# branches:
# - main
# jobs:
# add-new-entry:
# if: ${{ github.event.issue.pull_request && startsWith(github.event.comment.body, '/add-to-leaderboard') }}
# runs-on: ubuntu-latest
# steps:
# # - name: Extract arguments from comment
# # id: extract_args
# # run: |
# # echo "Extracting arguments..."
# # comment="${{ github.event.comment.body }}"
# # python_version=$(echo "$comment" | grep -oP '(?<=--python )\S+')
# # appworld_version=$(echo "$comment" | grep -oP '(?<=--appworld )\S+')
# # experiment_prefix=$(echo "$comment" | grep -oP '(?<=--experiment-prefix )\S+')
# # replace_last_flag=$(echo "$comment" | grep -q -- '--replace-last' && echo "--replace-last" || echo "")
# # echo "Python version: $python_version"
# # echo "Appworld version: $appworld_version"
# # echo "Experiment names: ${{ env.experiment_prefix }}_test_normal and ${{ env.experiment_prefix }}_test_challenge"
# # echo "Replace last flag: $replace_last_flag"
# # echo "python_version=$python_version" >> $GITHUB_ENV
# # echo "appworld_version=$appworld_version" >> $GITHUB_ENV
# # echo "experiment_prefix=$experiment_prefix" >> $GITHUB_ENV
# # echo "replace_last_flag=$replace_last_flag" >> $GITHUB_ENV
# # - name: Obtain PR branch
# # id: get-branch
# # run: echo "branch=$(gh pr view $PR_NO --repo $REPO --json headRefName --jq '.headRefName')" >> $GITHUB_OUTPUT
# # env:
# # REPO: ${{ github.repository }}
# # PR_NO: ${{ github.event.issue.number }}
# # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# # - name: Checkout PR branch
# # uses: actions/checkout@v4
# # with:
# # ref: ${{ steps.get-branch.outputs.branch }}
# # lfs: true
# # - name: Checkout PR branch
# # uses: actions/checkout@v2
# # with:
# # lfs: true
# # # with:
# # # ref: ${{ steps.get-branch.outputs.branch }}
# # # run: git lfs fetch --all
# # - name: Pull LFS files
# # run: |
# # git lfs fetch --all
# # git lfs pull
# # git lfs checkout
# # - name: Checkout LFS objects
# # run: git lfs checkout experiments/outputs/full_code_refl_deepseekcoder_test_challenge/leaderboard.bundle
# # - name: checkout engine without LFS
# # uses: actions/checkout@v2
# # with:
# # lfs: false
# # - name: apply_lfs_filters
# # run: |
# # git config --local lfs.fetchexclude '**/Windows/**'
# # - name: checkout engine with LFS
# # uses: actions/checkout@v2
# # with:
# # lfs: true
# # - name: Show LFS
# # run: git lfs pull
# # - name: Check out LFS objects
# # run: git lfs checkout experiments/outputs/full_code_refl_deepseekcoder_test_challenge/leaderboard.bundle
# - name: checkout
# uses: actions/checkout@v2
# with:
# lfs: 'true'
# - name: checkoutLFS
# uses: actions/checkout@v2
# - run: git lfs pull
# - name: Show LFS
# run: du -sh experiments/outputs/full_code_refl_deepseekcoder_test_challenge/leaderboard.bundle
# # - uses: astral-sh/setup-uv@v3
# # with:
# # version: "0.4.4"
# # - name: Set up Python
# # run: uv python install ${{ env.python_version }}
# # - name: Install venv
# # run: uv venv
# # - name: Install dependencies
# # run: |
# # uv pip install appworld==${{ env.appworld_version }}
# # uv run appworld install
# # - name: Download appworld data
# # run: uv run appworld download data
# # - name: Fetch main branch
# # run: git fetch origin main
# # - name: Verify PR file changes
# # run: |
# # echo "Checking PR for exactly two new files..."
# # experiment_prefix="${{ env.experiment_prefix }}"
# # expected_files=("experiments/outputs/${experiment_prefix}_test_challenge/leaderboard.bundle" "experiments/outputs/${experiment_prefix}_test_normal/leaderboard.bundle")
# # new_files=$(git diff --name-only origin/main..HEAD)
# # echo "Expected files:"
# # printf "%s\n" "${expected_files[@]}"
# # echo "New files in the PR:"
# # echo "$new_files"
# # # Sort and compare file lists
# # expected_sorted=$(printf "%s\n" "${expected_files[@]}")
# # actual_sorted=$(echo "$new_files" | sort)
# # if [[ "$expected_sorted" != "$actual_sorted" ]]; then
# # echo "Error: File list does not match the expected files."
# # echo "Expected:"
# # echo "$expected_sorted"
# # echo "Actual:"
# # echo "$actual_sorted"
# # exit 1
# # fi
# # echo "PR file check passed. The file list matches exactly."
# # - name: Unpack experiments
# # run: |
# # uv run appworld unpack ${{ env.experiment_prefix }}_test_normal
# # uv run appworld unpack ${{ env.experiment_prefix }}_test_challenge
# # - name: Run evaluations
# # run: |
# # uv run appworld evaluate ${{ env.experiment_prefix }}_test_normal test_normal
# # uv run appworld evaluate ${{ env.experiment_prefix }}_test_challenge test_challenge
# # - name: Make and add leaderboard entry
# # run: uv run appworld make ${{ env.experiment_prefix }}_test_normal ${{ env.experiment_prefix }}_test_challenge ${{env.replace_last_flag}}
# # - name: Comment with leaderboard entry
# # if: ${{ success() }}
# # uses: actions/github-script@v6
# # with:
# # script: |
# # const fs = require('fs');
# # const entries = JSON.parse(fs.readFileSync('leaderboard.json', 'utf8'));
# # const formattedEntry = '```json\n' + JSON.stringify(entries[entries.length - 1], null, 4) + '\n```';
# # const commentBody = `### Latest Leaderboard Entry\n${formattedEntry}`;
# # const issue_number = context.issue.number;
# # await github.rest.issues.createComment({
# # ...context.repo,
# # issue_number: issue_number,
# # body: commentBody,
# # });