Skip to content

Test PR

Test PR #52

Workflow file for this run

name: New leaderboard entry
on:
issue_comment:
types: [created]
branches:
- main
jobs:
add-new-entry:
if: ${{ github.event.issue.pull_request && startsWith(github.event.comment.body, '/add-to-leaderboard') }}
runs-on: ubuntu-latest
steps:
- name: Extract arguments from comment
id: extract_args
run: |
echo "Extracting arguments..."
comment="${{ github.event.comment.body }}"
python_version=$(echo "$comment" | grep -oP '(?<=--python )\S+')
appworld_version=$(echo "$comment" | grep -oP '(?<=--appworld )\S+')
experiment_prefix=$(echo "$comment" | grep -oP '(?<=--experiment-prefix )\S+')
replace_last_flag=$(echo "$comment" | grep -q -- '--replace-last' && echo "--replace-last" || echo "")
echo "Python version: $python_version"
echo "Appworld version: $appworld_version"
echo "Experiment names: ${{ env.experiment_prefix }}_test_normal and ${{ env.experiment_prefix }}_test_challenge"
echo "Replace last flag: $replace_last_flag"
echo "python_version=$python_version" >> $GITHUB_ENV
echo "appworld_version=$appworld_version" >> $GITHUB_ENV
echo "experiment_prefix=$experiment_prefix" >> $GITHUB_ENV
echo "replace_last_flag=$replace_last_flag" >> $GITHUB_ENV
- name: Obtain PR branch
id: get-branch
run: echo "branch=$(gh pr view $PR_NO --repo $REPO --json headRefName --jq '.headRefName')" >> $GITHUB_OUTPUT
env:
REPO: ${{ github.repository }}
PR_NO: ${{ github.event.issue.number }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Checkout PR branch
uses: actions/checkout@v4
with:
ref: ${{ steps.get-branch.outputs.branch }}
- name: Pull LFS files
run: |
echo "Fetching and checking out LFS files..."
git lfs install
git lfs pull
- name: Cat files
run: cat experiments/outputs/temp_test_test_challenge/leaderboard.bundle
- uses: astral-sh/setup-uv@v3
with:
version: "0.4.4"
- name: Set up Python
run: uv python install ${{ env.python_version }}
- name: Install venv
run: uv venv
- name: Install dependencies
run: |
uv pip install appworld==${{ env.appworld_version }}
uv run appworld install
- name: Download appworld data
run: uv run appworld download data
- name: Fetch main branch
run: git fetch origin main
- name: Verify PR file changes
run: |
echo "Checking PR for exactly two new files..."
experiment_prefix="${{ env.experiment_prefix }}"
expected_files=("experiments/outputs/${experiment_prefix}_test_challenge/leaderboard.bundle" "experiments/outputs/${experiment_prefix}_test_normal/leaderboard.bundle")
new_files=$(git diff --name-only origin/main..HEAD)
echo "Expected files:"
printf "%s\n" "${expected_files[@]}"
echo "New files in the PR:"
echo "$new_files"
# Sort and compare file lists
expected_sorted=$(printf "%s\n" "${expected_files[@]}")
actual_sorted=$(echo "$new_files" | sort)
if [[ "$expected_sorted" != "$actual_sorted" ]]; then
echo "Error: File list does not match the expected files."
echo "Expected:"
echo "$expected_sorted"
echo "Actual:"
echo "$actual_sorted"
exit 1
fi
echo "PR file check passed. The file list matches exactly."
- name: Unpack experiments
run: |
uv run appworld unpack ${{ env.experiment_prefix }}_test_normal
uv run appworld unpack ${{ env.experiment_prefix }}_test_challenge
- name: Run evaluations
run: |
uv run appworld evaluate ${{ env.experiment_prefix }}_test_normal test_normal
uv run appworld evaluate ${{ env.experiment_prefix }}_test_challenge test_challenge
- name: Make and add leaderboard entry
run: uv run appworld make ${{ env.experiment_prefix }}_test_normal ${{ env.experiment_prefix }}_test_challenge ${{env.replace_last_flag}}
- name: Comment with leaderboard entry
if: ${{ success() }}
uses: actions/github-script@v6
with:
script: |
const fs = require('fs');
const entries = JSON.parse(fs.readFileSync('leaderboard.json', 'utf8'));
const formattedEntry = '```json\n' + JSON.stringify(entries[entries.length - 1], null, 4) + '\n```';
const commentBody = `### Latest Leaderboard Entry\n${formattedEntry}`;
const issue_number = context.issue.number;
await github.rest.issues.createComment({
...context.repo,
issue_number: issue_number,
body: commentBody,
});