diff --git a/.github/workflows/on-check-failure.yml b/.github/workflows/on-check-failure.yml index 9b6338e78..b6d5f983d 100644 --- a/.github/workflows/on-check-failure.yml +++ b/.github/workflows/on-check-failure.yml @@ -80,15 +80,17 @@ jobs: contents: write pull-requests: write actions: read - uses: ./.github/workflows/reusable-autofix.yml + uses: ./.github/workflows/reusable-check-fixer.yml with: pr_number: ${{ fromJson(needs.should-run.outputs.pr_number) }} failed_workflow: ${{ needs.should-run.outputs.failed_workflow }} failed_run_id: ${{ needs.should-run.outputs.failed_run_id }} bot_username: ${{ vars.EGG_BOT_USERNAME }} branch_prefix: ${{ vars.EGG_BRANCH_PREFIX }} + config_file: "shared/check-fixers.yml" + prompt_script: "action/build-check-fixer-prompt.sh" # action_ref: jwbron/egg/action@main # Cannot be passed dynamically; hardcoded in reusable workflow - timeout: "60" + timeout: "15" secrets: BOT_APP_ID: ${{ secrets.BOT_APP_ID }} BOT_APP_PRIVATE_KEY: ${{ secrets.BOT_APP_PRIVATE_KEY }} diff --git a/.github/workflows/reusable-autofix.yml b/.github/workflows/reusable-autofix.yml index b4fd8b8f3..a7c7b9afc 100644 --- a/.github/workflows/reusable-autofix.yml +++ b/.github/workflows/reusable-autofix.yml @@ -1,3 +1,8 @@ +# DEPRECATED: Use reusable-check-fixer.yml instead. +# This workflow is kept for external repos that still reference it. +# It runs a monolithic LLM agent that investigates ALL failures. +# reusable-check-fixer.yml provides per-check fixing with non-LLM +# mechanical fixes, retry tracking, and escalation. name: Reusable Autofix Workflow on: diff --git a/.github/workflows/reusable-check-fixer.yml b/.github/workflows/reusable-check-fixer.yml new file mode 100644 index 000000000..4c5f3a909 --- /dev/null +++ b/.github/workflows/reusable-check-fixer.yml @@ -0,0 +1,568 @@ +name: Reusable Check Fixer Workflow + +on: + workflow_call: + inputs: + pr_number: + description: 'PR number to fix checks for' + required: true + type: number + failed_workflow: + description: 'Name of the failed workflow' + required: false + type: string + default: "manual" + failed_run_id: + description: 'Run ID of the failed workflow' + required: false + type: string + default: "" + bot_username: + description: 'GitHub username of the bot (e.g., my-bot)' + required: true + type: string + branch_prefix: + description: 'Branch prefix for bot branches (e.g., egg)' + required: true + type: string + action_ref: + description: 'Reference to egg action (owner/repo/path@ref format). NOTE: GitHub Actions uses: field cannot be dynamic - consuming repos must hardcode their action reference in the uses step below.' + required: false + type: string + default: "jwbron/egg/action@main" + config_file: + description: 'Path to check-fixers.yml config (relative to repo root)' + required: false + type: string + default: "shared/check-fixers.yml" + prompt_script: + description: 'Path to prompt builder script (relative to repo root)' + required: false + type: string + default: "action/build-check-fixer-prompt.sh" + timeout: + description: 'Timeout in minutes' + required: false + type: string + default: "15" + secrets: + BOT_APP_ID: + required: true + BOT_APP_PRIVATE_KEY: + required: true + BOT_APP_INSTALLATION_ID: + required: true + ANTHROPIC_OAUTH_TOKEN: + required: true + workflow_dispatch: + inputs: + pr_number: + description: 'PR number to fix checks for' + required: true + type: number + failed_workflow: + description: 'Name of the failed workflow (Lint, Test)' + required: false + type: string + default: "manual" + failed_run_id: + description: 'Run ID of the failed workflow' + required: false + type: string + default: "" + bot_username: + description: 'GitHub username of the bot (e.g., my-bot)' + required: true + type: string + branch_prefix: + description: 'Branch prefix for bot branches (e.g., egg)' + required: true + type: string + config_file: + description: 'Path to check-fixers.yml config (relative to repo root)' + required: false + type: string + default: "shared/check-fixers.yml" + prompt_script: + description: 'Path to prompt builder script (relative to repo root)' + required: false + type: string + default: "action/build-check-fixer-prompt.sh" + timeout: + description: 'Timeout in minutes' + required: false + type: string + default: "15" + +jobs: + check-fixer: + name: Fix PR Checks + runs-on: ubuntu-latest + + permissions: + contents: write + pull-requests: write + actions: read + + # Serialize fixers per PR — do NOT cancel in-progress (avoid branch conflicts) + concurrency: + group: egg-autofix-${{ inputs.pr_number }} + cancel-in-progress: false + + env: + PR_NUMBER: ${{ inputs.pr_number }} + FAILED_WORKFLOW: ${{ inputs.failed_workflow }} + FAILED_RUN_ID: ${{ inputs.failed_run_id }} + EGG_BOT_USERNAME: ${{ inputs.bot_username }} + BRANCH_PREFIX: ${{ inputs.branch_prefix }} + + steps: + - name: Generate bot token + id: bot-token + uses: actions/create-github-app-token@v1 + with: + app-id: ${{ secrets.BOT_APP_ID }} + private-key: ${{ secrets.BOT_APP_PRIVATE_KEY }} + + - name: Fetch PR metadata + id: pr-meta + run: | + pr_json=$(gh api "repos/${GITHUB_REPOSITORY}/pulls/${PR_NUMBER}") + { + echo "head-ref=$(echo "$pr_json" | jq -r '.head.ref')" + echo "head-repo=$(echo "$pr_json" | jq -r '.head.repo.full_name')" + echo "title=$(echo "$pr_json" | jq -r '.title')" + } >> "$GITHUB_OUTPUT" + env: + GH_TOKEN: ${{ steps.bot-token.outputs.token }} + GITHUB_REPOSITORY: ${{ github.repository }} + + # Skip if PR has [skip-autofix] in title + - name: Check for skip marker + id: skip-check + env: + PR_TITLE: ${{ steps.pr-meta.outputs.title }} + run: | + if [[ "$PR_TITLE" == *"[skip-autofix]"* ]]; then + echo "skip=true" >> "$GITHUB_OUTPUT" + echo "PR has [skip-autofix] marker, skipping" + else + echo "skip=false" >> "$GITHUB_OUTPUT" + fi + + - name: Early exit if skipped + if: steps.skip-check.outputs.skip == 'true' + run: echo "Skipping autofix due to [skip-autofix] marker" + + # Identify which jobs failed in the triggering workflow run + - name: Identify failed jobs + if: steps.skip-check.outputs.skip != 'true' + id: failed-jobs + run: | + if [[ -z "$FAILED_RUN_ID" || "$FAILED_WORKFLOW" == "manual" ]]; then + # Manual trigger — no specific run to query + echo "jobs=[]" >> "$GITHUB_OUTPUT" + echo "job-count=0" >> "$GITHUB_OUTPUT" + echo "No specific run ID, will build generic prompt" + else + # Query the workflow run's jobs for failures + failed=$(gh api "repos/${GITHUB_REPOSITORY}/actions/runs/${FAILED_RUN_ID}/jobs" \ + --jq '[.jobs[] | select(.conclusion == "failure") | select(.name | test("Aggregate|aggregate") | not) | .name] | unique') + echo "jobs=${failed}" >> "$GITHUB_OUTPUT" + + count=$(echo "$failed" | jq 'length') + echo "job-count=${count}" >> "$GITHUB_OUTPUT" + echo "Failed jobs: ${failed}" + fi + env: + GH_TOKEN: ${{ steps.bot-token.outputs.token }} + GITHUB_REPOSITORY: ${{ github.repository }} + + # Read autofix state from PR comment + - name: Read autofix state + if: steps.skip-check.outputs.skip != 'true' + id: state + run: | + # Find PR comment with egg-autofix-state marker + state_comment=$(gh api "repos/${GITHUB_REPOSITORY}/issues/${PR_NUMBER}/comments" \ + --jq '[.[] | select(.body | contains(""))] | last // empty') + + if [[ -n "$state_comment" ]]; then + comment_id=$(echo "$state_comment" | jq -r '.id') + # Extract JSON from the comment body (between ```json and ```) + # shellcheck disable=SC2016 # backticks are markdown fences, not command substitutions + state_json=$(echo "$state_comment" | jq -r '.body' | sed -n '/```json/,/```/p' | sed '1d;$d') + echo "comment-id=${comment_id}" >> "$GITHUB_OUTPUT" + echo "state=${state_json}" >> "$GITHUB_OUTPUT" + echo "Found existing state: ${state_json}" + else + echo "comment-id=" >> "$GITHUB_OUTPUT" + echo "state={}" >> "$GITHUB_OUTPUT" + echo "No existing autofix state found" + fi + env: + GH_TOKEN: ${{ steps.bot-token.outputs.token }} + GITHUB_REPOSITORY: ${{ github.repository }} + + # Minimize previous status comments + - name: Minimize previous autofix comments + if: steps.skip-check.outputs.skip != 'true' + run: | + gh api "repos/${GITHUB_REPOSITORY}/issues/${PR_NUMBER}/comments" \ + --jq ".[] | select(.user.login == \"$EGG_BOT_USERNAME\" or .user.login == \"${EGG_BOT_USERNAME}[bot]\") | select(.body | contains(\"\")) | .node_id" \ + | while read -r node_id; do + # shellcheck disable=SC2016 # $id is a GraphQL variable, not bash + gh api graphql -f query=' + mutation($id: ID!) { + minimizeComment(input: {subjectId: $id, classifier: OUTDATED}) { + minimizedComment { isMinimized } + } + } + ' -f id="$node_id" || true + done + env: + GH_TOKEN: ${{ steps.bot-token.outputs.token }} + GITHUB_REPOSITORY: ${{ github.repository }} + + # Post acknowledgment + - name: Post starting comment + if: steps.skip-check.outputs.skip != 'true' + run: | + if [[ -n "$FAILED_RUN_ID" && "$FAILED_WORKFLOW" != "manual" ]]; then + RUN_LINK="[${FAILED_WORKFLOW}](${SERVER_URL}/${GITHUB_REPOSITORY}/actions/runs/${FAILED_RUN_ID})" + JOB_LIST="" + if [[ "$FAILED_JOBS_JSON" != "[]" ]]; then + JOB_LIST=$(echo "$FAILED_JOBS_JSON" | jq -r '.[] | "- " + .') + JOB_LIST=$'\n'"${JOB_LIST}" + fi + BODY=" + egg is investigating the ${RUN_LINK} check failure... + ${JOB_LIST}" + else + BODY=" + egg is investigating check failures for PR #${PR_NUMBER}..." + fi + # Strip leading whitespace from each line (caused by YAML indentation) + # shellcheck disable=SC2001 # sed is needed for regex-based multiline substitution + BODY=$(echo "$BODY" | sed 's/^[[:space:]]*//') + gh api "repos/${GITHUB_REPOSITORY}/issues/${PR_NUMBER}/comments" -f body="$BODY" + env: + GH_TOKEN: ${{ steps.bot-token.outputs.token }} + FAILED_JOBS_JSON: ${{ steps.failed-jobs.outputs.jobs }} + SERVER_URL: ${{ github.server_url }} + GITHUB_REPOSITORY: ${{ github.repository }} + + # SECURITY: Build the fixer prompt from a trusted checkout (main). + - name: Checkout main (trusted) + if: steps.skip-check.outputs.skip != 'true' + uses: actions/checkout@v4 + with: + ref: main + persist-credentials: false + + # Build fix plan: prompt, non-LLM fixes, retry state + - name: Build fix plan + if: steps.skip-check.outputs.skip != 'true' + id: plan + run: bash ${{ inputs.prompt_script }} + env: + GH_TOKEN: ${{ steps.bot-token.outputs.token }} + PR_NUMBER: ${{ env.PR_NUMBER }} + FAILED_WORKFLOW: ${{ env.FAILED_WORKFLOW }} + FAILED_RUN_ID: ${{ env.FAILED_RUN_ID }} + FAILED_JOBS: ${{ steps.failed-jobs.outputs.jobs }} + AUTOFIX_STATE: ${{ steps.state.outputs.state }} + CONFIG_FILE: ${{ inputs.config_file }} + + # Check if max retries reached — escalate + - name: Check max retries and escalate + if: steps.skip-check.outputs.skip != 'true' && steps.plan.outputs.max-retries-reached == 'true' + id: escalation + run: | + RUN_URL="${SERVER_URL}/${GITHUB_REPOSITORY}/actions/runs/${FAILED_RUN_ID}" + + # Build escalation table + TABLE="| Check | Attempts | Logs |" + TABLE="${TABLE}"$'\n'"| ----- | -------- | ---- |" + while IFS= read -r row; do + job=$(echo "$row" | jq -r '.job') + attempts=$(echo "$row" | jq -r '.attempts') + max=$(echo "$row" | jq -r '.max') + TABLE="${TABLE}"$'\n'"| ${FAILED_WORKFLOW} / ${job} | ${attempts}/${max} | [View](${RUN_URL}) |" + done < <(echo "$ESCALATION" | jq -c '.[]') + + # Add context if LLM fixer is still running for other checks + STILL_RUNNING="" + if [[ "$NEEDS_LLM" == "true" ]]; then + STILL_RUNNING=$'\n'"_Other failing checks are still being addressed by the autofixer._"$'\n' + fi + + BODY=" + ## Autofix: Human Input Needed + + The following checks could not be fixed automatically after multiple attempts: + + ${TABLE} + ${STILL_RUNNING} + Please investigate manually or push a fix. + + — Authored by egg" + + # shellcheck disable=SC2001 + BODY=$(echo "$BODY" | sed 's/^[[:space:]]*//') + gh api "repos/${GITHUB_REPOSITORY}/issues/${PR_NUMBER}/comments" -f body="$BODY" + env: + GH_TOKEN: ${{ steps.bot-token.outputs.token }} + ESCALATION: ${{ steps.plan.outputs.escalation-details }} + NEEDS_LLM: ${{ steps.plan.outputs.needs-llm }} + SERVER_URL: ${{ github.server_url }} + GITHUB_REPOSITORY: ${{ github.repository }} + + # Run non-LLM fixes (first attempt only for applicable checks) + - name: Checkout PR branch for non-LLM fixes + if: steps.skip-check.outputs.skip != 'true' && steps.plan.outputs.has-non-llm-fixes == 'true' + uses: actions/checkout@v4 + with: + repository: ${{ steps.pr-meta.outputs.head-repo }} + ref: ${{ steps.pr-meta.outputs.head-ref }} + token: ${{ steps.bot-token.outputs.token }} + persist-credentials: true + + - name: Run non-LLM fixes + if: steps.skip-check.outputs.skip != 'true' && steps.plan.outputs.has-non-llm-fixes == 'true' + id: non-llm + run: | + echo "Running non-LLM fixes..." + + # Execute each fix command + # SECURITY: Commands come from check-fixers.yml in the trusted main checkout. + # The `eval` is safe because the config is not user-controlled — it's read from + # the main branch by the prompt builder step (which also runs from main). + echo "$NON_LLM_FIXES" | jq -c '.[]' | while IFS= read -r fix; do + job=$(echo "$fix" | jq -r '.job') + command=$(echo "$fix" | jq -r '.command') + echo "::group::Non-LLM fix for ${job}" + echo "Running: ${command}" + eval "$command" || echo "::warning::Non-LLM fix for ${job} had non-zero exit (continuing)" + echo "::endgroup::" + done + + # Check if any files changed + if git diff --quiet && git diff --cached --quiet; then + echo "changes=false" >> "$GITHUB_OUTPUT" + echo "No changes from non-LLM fixes" + else + echo "changes=true" >> "$GITHUB_OUTPUT" + echo "Non-LLM fixes produced changes" + fi + env: + NON_LLM_FIXES: ${{ steps.plan.outputs.non-llm-fixes }} + + - name: Commit and push non-LLM fixes + if: steps.skip-check.outputs.skip != 'true' && steps.non-llm.outputs.changes == 'true' + id: non-llm-push + run: | + git config user.name "egg" + git config user.email "egg@localhost" + git add -u + git commit -m "Fix checks: apply automated formatting fixes" + git push + echo "pushed=true" >> "$GITHUB_OUTPUT" + echo "Non-LLM fixes committed and pushed. CI will re-run." + env: + GH_TOKEN: ${{ steps.bot-token.outputs.token }} + + # Update state after non-LLM fixes (only increment jobs that were attempted) + - name: Update state after non-LLM fixes + if: steps.skip-check.outputs.skip != 'true' && steps.non-llm-push.outputs.pushed == 'true' + run: | + # Data passed via env vars to avoid shell interpolation injection + NEW_STATE=$(python3 -c " + import json, os + state = json.loads(os.environ['CURRENT_STATE']) if os.environ.get('CURRENT_STATE') else {} + attempted = json.loads(os.environ['ATTEMPTED_JOBS']) + workflow = os.environ['FAILED_WORKFLOW'] + for job in attempted: + key = f'{workflow}/{job}' + state[key] = state.get(key, 0) + 1 + print(json.dumps(state, indent=2)) + ") + + BODY=" +
Autofix tracking + + \`\`\`json + ${NEW_STATE} + \`\`\` +
" + + # shellcheck disable=SC2001 + BODY=$(echo "$BODY" | sed 's/^[[:space:]]*//') + + if [[ -n "$COMMENT_ID" ]]; then + gh api "repos/${GITHUB_REPOSITORY}/issues/comments/${COMMENT_ID}" -X PATCH -f body="$BODY" + else + gh api "repos/${GITHUB_REPOSITORY}/issues/${PR_NUMBER}/comments" -f body="$BODY" + fi + + # Post result comment + RESULT_BODY=" + egg applied automated formatting fixes and pushed. CI will re-run to verify. + + — Authored by egg" + # shellcheck disable=SC2001 + RESULT_BODY=$(echo "$RESULT_BODY" | sed 's/^[[:space:]]*//') + gh api "repos/${GITHUB_REPOSITORY}/issues/${PR_NUMBER}/comments" -f body="$RESULT_BODY" + env: + GH_TOKEN: ${{ steps.bot-token.outputs.token }} + ATTEMPTED_JOBS: ${{ steps.plan.outputs.non-llm-jobs }} + CURRENT_STATE: ${{ steps.state.outputs.state }} + COMMENT_ID: ${{ steps.state.outputs.comment-id }} + FAILED_WORKFLOW: ${{ env.FAILED_WORKFLOW }} + GITHUB_REPOSITORY: ${{ github.repository }} + + # If non-LLM fixes were pushed, exit here — CI will re-run + - name: Exit after non-LLM fixes + if: steps.skip-check.outputs.skip != 'true' && steps.non-llm-push.outputs.pushed == 'true' + run: | + echo "Non-LLM fixes pushed. Exiting — CI will re-run and re-trigger if still failing." + + # Fall through to LLM fixer if non-LLM fixes ran but produced no changes + - name: Check non-LLM fallthrough + if: >- + steps.skip-check.outputs.skip != 'true' + && steps.plan.outputs.has-non-llm-fixes == 'true' + && steps.non-llm.outputs.changes == 'false' + id: non-llm-fallthrough + run: | + echo "Non-LLM fixes produced no changes, falling through to LLM fixer" + echo "fallthrough=true" >> "$GITHUB_OUTPUT" + + # Run LLM fixer (when non-LLM didn't apply or didn't resolve). + # needs-llm is true when at least one check still needs LLM fixing + # (checks that exceeded max retries are excluded from the prompt). + # Also runs when non-LLM fixes produced no changes (fallthrough). + - name: Checkout PR branch for LLM fixer + if: >- + steps.skip-check.outputs.skip != 'true' + && (steps.plan.outputs.needs-llm == 'true' || steps.non-llm-fallthrough.outputs.fallthrough == 'true') + && steps.non-llm-push.outputs.pushed != 'true' + uses: actions/checkout@v4 + with: + repository: ${{ steps.pr-meta.outputs.head-repo }} + ref: ${{ steps.pr-meta.outputs.head-ref }} + persist-credentials: false + + - name: Run egg check fixer + if: >- + steps.skip-check.outputs.skip != 'true' + && (steps.plan.outputs.needs-llm == 'true' || steps.non-llm-fallthrough.outputs.fallthrough == 'true') + && steps.non-llm-push.outputs.pushed != 'true' + id: egg + uses: jwbron/egg/action@main + with: + prompt-file: ${{ steps.plan.outputs.prompt-file }} + model: ${{ steps.plan.outputs.model }} + anthropic-oauth-token: ${{ secrets.ANTHROPIC_OAUTH_TOKEN }} + bot-app-id: ${{ secrets.BOT_APP_ID }} + bot-app-private-key: ${{ secrets.BOT_APP_PRIVATE_KEY }} + bot-app-installation-id: ${{ secrets.BOT_APP_INSTALLATION_ID }} + bot-username: ${{ env.EGG_BOT_USERNAME }} + bot-branch-prefix: ${{ env.BRANCH_PREFIX }} + checkpoint-repo: ${{ vars.EGG_CHECKPOINT_REPO || '' }} + timeout: ${{ inputs.timeout }} + + # Update state after LLM fixer (only increment jobs that were attempted by LLM) + - name: Update state after LLM fixer + if: >- + always() && !cancelled() + && steps.skip-check.outputs.skip != 'true' + && (steps.plan.outputs.needs-llm == 'true' || steps.non-llm-fallthrough.outputs.fallthrough == 'true') + && steps.non-llm-push.outputs.pushed != 'true' + run: | + # Data passed via env vars to avoid shell interpolation injection + NEW_STATE=$(python3 -c " + import json, os + state = json.loads(os.environ['CURRENT_STATE']) if os.environ.get('CURRENT_STATE') else {} + attempted = json.loads(os.environ['ATTEMPTED_JOBS']) + workflow = os.environ['FAILED_WORKFLOW'] + for job in attempted: + key = f'{workflow}/{job}' + state[key] = state.get(key, 0) + 1 + print(json.dumps(state, indent=2)) + ") + + BODY=" +
Autofix tracking + + \`\`\`json + ${NEW_STATE} + \`\`\` +
" + + # shellcheck disable=SC2001 + BODY=$(echo "$BODY" | sed 's/^[[:space:]]*//') + + if [[ -n "$COMMENT_ID" ]]; then + gh api "repos/${GITHUB_REPOSITORY}/issues/comments/${COMMENT_ID}" -X PATCH -f body="$BODY" + else + gh api "repos/${GITHUB_REPOSITORY}/issues/${PR_NUMBER}/comments" -f body="$BODY" + fi + env: + GH_TOKEN: ${{ steps.bot-token.outputs.token }} + COMMENT_ID: ${{ steps.state.outputs.comment-id }} + # In fallthrough, the LLM gets all non-escalated jobs (prompt includes them all). + # Otherwise, only the jobs originally designated for LLM. + ATTEMPTED_JOBS: ${{ steps.non-llm-fallthrough.outputs.fallthrough == 'true' && steps.plan.outputs.all-non-escalated-jobs || steps.plan.outputs.llm-jobs }} + CURRENT_STATE: ${{ steps.state.outputs.state }} + FAILED_WORKFLOW: ${{ env.FAILED_WORKFLOW }} + GITHUB_REPOSITORY: ${{ github.repository }} + + # Post result comment + - name: Post result comment + if: >- + always() && !cancelled() + && steps.skip-check.outputs.skip != 'true' + && steps.non-llm-push.outputs.pushed != 'true' + run: | + RUN_URL="${SERVER_URL}/${GITHUB_REPOSITORY}/actions/runs/${RUN_ID}" + + if [[ "$MAX_RETRIES_REACHED" == "true" && "$NEEDS_LLM" != "true" ]]; then + # All checks exceeded max retries, escalation comment already posted + echo "Escalation comment already posted, skipping result comment" + elif [[ "$EGG_OUTCOME" == "success" ]]; then + BODY=" + egg check fixer completed for **${FAILED_WORKFLOW}**. CI will re-run to verify. [View run logs](${RUN_URL}) + + — Authored by egg" + # shellcheck disable=SC2001 + BODY=$(echo "$BODY" | sed 's/^[[:space:]]*//') + gh api "repos/${GITHUB_REPOSITORY}/issues/${PR_NUMBER}/comments" -f body="$BODY" + elif [[ "$EGG_OUTCOME" == "failure" ]]; then + BODY=" + egg check fixer encountered an issue fixing **${FAILED_WORKFLOW}**. [View run logs](${RUN_URL}) + + — Authored by egg" + # shellcheck disable=SC2001 + BODY=$(echo "$BODY" | sed 's/^[[:space:]]*//') + gh api "repos/${GITHUB_REPOSITORY}/issues/${PR_NUMBER}/comments" -f body="$BODY" + else + # EGG_OUTCOME is empty or unexpected (e.g., egg step was skipped due to + # an earlier step failure). Post a status-unknown comment so the run + # doesn't complete silently. + BODY=" + egg check fixer status unknown for **${FAILED_WORKFLOW}** (fixer step may have been skipped). [View run logs](${RUN_URL}) + + — Authored by egg" + # shellcheck disable=SC2001 + BODY=$(echo "$BODY" | sed 's/^[[:space:]]*//') + gh api "repos/${GITHUB_REPOSITORY}/issues/${PR_NUMBER}/comments" -f body="$BODY" + fi + env: + GH_TOKEN: ${{ steps.bot-token.outputs.token }} + MAX_RETRIES_REACHED: ${{ steps.plan.outputs.max-retries-reached }} + NEEDS_LLM: ${{ steps.plan.outputs.needs-llm }} + EGG_OUTCOME: ${{ steps.egg.outcome }} + SERVER_URL: ${{ github.server_url }} + GITHUB_REPOSITORY: ${{ github.repository }} + RUN_ID: ${{ github.run_id }} diff --git a/action/autofixer-conventions.md b/action/autofixer-conventions.md index a5cc1fcb3..93d63d3d1 100644 --- a/action/autofixer-conventions.md +++ b/action/autofixer-conventions.md @@ -1,8 +1,22 @@ # Autofixer Conventions (GitHub Actions) -Operational conventions specific to the GitHub Actions autofixer. -General autofixer rules (workflow, decision framework, etc.) are in -`shared/prompts/autofixer-rules.md`. +Operational conventions specific to the per-check GitHub Actions autofixer. +These conventions replace `shared/prompts/autofixer-rules.md` for the per-check +fixer context (the shared rules instruct running checks locally, which conflicts +with the CI-driven model below). + +## Per-Check Fixer Model + +The autofixer operates in a CI-driven loop: +1. CI check fails → fixer is invoked with the specific failed checks +2. Fixer investigates and fixes only those checks +3. Fixer pushes fixes (does NOT re-run checks locally) +4. CI re-runs automatically after push +5. If still failing → fixer is re-invoked (up to max retries) +6. If max retries exceeded → escalation comment posted for human + +**Do NOT run checks locally.** CI validates after each push. Running checks +locally wastes agent compute — CI already does this. ## Lint Workflow Structure @@ -23,8 +37,8 @@ that job. For example, a "Python" job failure might be from ruff or mypy. ## Investigating Failures -Use `gh pr checks ` to list all checks and their status. For failed -checks, fetch the logs to understand the error: +Use `gh run view --log-failed` to see the failure output. For broader +context: ```bash # List checks @@ -42,17 +56,17 @@ file to understand what commands are being run. ## Committing Fixes -**Only commit after ALL local checks pass.** Do not push partial fixes. +Fix the issues identified from CI logs, then commit and push: ```bash -# After verifying all checks pass locally: git add git commit -m "Fix checks: " git push ``` -If fixing multiple distinct issues, you may use separate commits for clarity, but -push them all together in a single push after verifying all checks pass. +**Do NOT run checks locally before pushing.** CI will re-run automatically. +If fixes don't resolve the issue, the fixer will be re-invoked with updated +failure context. **Why single push matters:** Each push triggers CI. Fixing one issue at a time causes the workflow to run repeatedly, wasting CI resources and time. diff --git a/action/build-autofixer-prompt.sh b/action/build-autofixer-prompt.sh index bdc220b86..c0646d01c 100755 --- a/action/build-autofixer-prompt.sh +++ b/action/build-autofixer-prompt.sh @@ -1,4 +1,8 @@ #!/usr/bin/env bash +# DEPRECATED: Use build-check-fixer-prompt.sh instead. +# This script is kept for external repos that still reference it. +# New repos should use reusable-check-fixer.yml with build-check-fixer-prompt.sh. +# # build-autofixer-prompt.sh — Build a minimal prompt for agent-driven check autofix # # This script creates a minimal prompt that tells Claude to investigate check diff --git a/action/build-check-fixer-prompt.sh b/action/build-check-fixer-prompt.sh new file mode 100755 index 000000000..a24ffd32b --- /dev/null +++ b/action/build-check-fixer-prompt.sh @@ -0,0 +1,272 @@ +#!/usr/bin/env bash +# build-check-fixer-prompt.sh — Build a per-check focused prompt for autofixing +# +# Replaces build-autofixer-prompt.sh with a per-check fixer approach: +# - Reads check-fixers.yml config for per-job settings +# - Reads autofix state from PR comment to track retry counts +# - Identifies which jobs failed in the triggering workflow run +# - Outputs a focused prompt listing ONLY failed jobs +# - Outputs non-LLM fix commands for applicable checks +# +# Environment variables: +# PR_NUMBER — Pull request number +# GITHUB_REPOSITORY — owner/repo +# FAILED_WORKFLOW — Name of the workflow that failed +# FAILED_RUN_ID — Run ID of the failed workflow +# FAILED_JOBS — JSON array of failed job names (from caller) +# AUTOFIX_STATE — JSON object of retry counts (from caller) +# CONFIG_FILE — Path to check-fixers.yml (from caller, optional) +# RUNNER_TEMP — Temp directory for prompt file +# +# Output (via $GITHUB_OUTPUT): +# prompt-file — Path to the focused prompt for the LLM fixer +# model — Model to use +# non-llm-fixes — JSON array of {job, command} objects +# has-non-llm-fixes — true/false +# needs-llm — true/false +# max-retries-reached — true/false (escalation needed) +# escalation-details — JSON array of {job, attempts, max} for exceeded checks +# non-llm-jobs — JSON array of job names attempted by non-LLM fixes +# llm-jobs — JSON array of job names attempted by LLM fixer +# all-non-escalated-jobs — JSON array of all non-escalated job names (in prompt) + +set -euo pipefail + +# --------------------------------------------------------------------------- + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +REPO_ROOT="${SCRIPT_DIR}/.." + +# --------------------------------------------------------------------------- +# Build the prompt +# --------------------------------------------------------------------------- + +build_prompt() { + local config_file="" + if [[ -n "${CONFIG_FILE:-}" && -f "${CONFIG_FILE}" ]]; then + config_file="${CONFIG_FILE}" + elif [[ -f ".egg/check-fixers.yml" ]]; then + config_file=".egg/check-fixers.yml" + elif [[ -f "${REPO_ROOT}/shared/check-fixers.yml" ]]; then + config_file="${REPO_ROOT}/shared/check-fixers.yml" + fi + + # Parse failed jobs (JSON array from caller) + local failed_jobs_json="${FAILED_JOBS:-[]}" + local autofix_state_json="${AUTOFIX_STATE:-{}}" + + # If no failed jobs provided, we can't build a focused prompt + if [[ "$failed_jobs_json" == "[]" ]]; then + echo "::warning::No failed jobs provided, building generic prompt" + failed_jobs_json='["unknown"]' + fi + + # Determine non-LLM fixes, model, and retry state per job + local non_llm_fixes="[]" + local needs_llm="false" + local has_non_llm="false" + local max_retries_reached="false" + local escalation_details="[]" + local model="" + local failed_job_list="" + + if [[ -n "$config_file" ]]; then + # Use Python to process all job configs at once. + # Data is passed via environment variables (not shell interpolation) + # to prevent code injection from crafted JSON payloads. + local result + result=$(CONFIG_PATH="$config_file" \ + WORKFLOW_NAME="${FAILED_WORKFLOW}" \ + JOBS_JSON="${failed_jobs_json}" \ + STATE_JSON="${autofix_state_json}" \ + python3 -c " +import yaml, json, sys, os + +config_file = os.environ['CONFIG_PATH'] +workflow = os.environ['WORKFLOW_NAME'] +failed_jobs = json.loads(os.environ['JOBS_JSON']) +state = json.loads(os.environ['STATE_JSON']) + +with open(config_file) as f: + cfg = yaml.safe_load(f) or {} + +defaults = cfg.get('defaults', {}) +workflows = cfg.get('workflows', {}) +wf = workflows.get(workflow, {}) + +non_llm_fixes = [] +escalation = [] +model = defaults.get('model', 'sonnet') +needs_llm = False +has_non_llm = False +max_retries_reached = False +jobs_for_llm = [] +all_non_escalated = [] # All jobs not yet escalated (for prompt + fallthrough) + +for job in failed_jobs: + job_cfg = wf.get(job, {}) + job_max = job_cfg.get('max_retries', defaults.get('max_retries', 3)) + job_model = job_cfg.get('model', defaults.get('model', 'sonnet')) + state_key = f'{workflow}/{job}' + attempts = state.get(state_key, 0) + + # Check max retries + if attempts >= job_max: + max_retries_reached = True + escalation.append({'job': job, 'attempts': attempts, 'max': job_max}) + continue + + all_non_escalated.append(job) + + # Check for non-LLM fix (only on first attempt) + non_llm_cmd = job_cfg.get('non_llm_fix', '') + if non_llm_cmd and attempts == 0: + has_non_llm = True + non_llm_fixes.append({'job': job, 'command': non_llm_cmd.strip()}) + else: + needs_llm = True + jobs_for_llm.append(job) + + # Use the highest-tier model among non-escalated failed jobs + if job_model == 'opus': + model = 'opus' + +result = { + 'non_llm_fixes': non_llm_fixes, + 'needs_llm': needs_llm, + 'has_non_llm': has_non_llm, + 'max_retries_reached': max_retries_reached, + 'escalation': escalation, + 'model': model, + 'jobs_for_llm': jobs_for_llm, + 'non_llm_jobs': [f['job'] for f in non_llm_fixes], + # All non-escalated jobs go into the prompt so that fallthrough + # to LLM has full context even if needs_llm was initially false. + 'all_non_escalated': all_non_escalated, +} +print(json.dumps(result)) +" || { + # Log warning to stderr (GitHub Actions still picks up ::warning:: from stderr in run blocks) + echo "::warning::Failed to parse check-fixers.yml config, falling through to LLM for all jobs" >&2 + # Return valid fallback JSON on stdout + echo '{"non_llm_fixes":[],"needs_llm":true,"has_non_llm":false,"max_retries_reached":false,"escalation":[],"model":"sonnet","jobs_for_llm":[],"non_llm_jobs":[],"all_non_escalated":[]}' + }) + + non_llm_fixes=$(echo "$result" | python3 -c "import json,sys; print(json.dumps(json.load(sys.stdin)['non_llm_fixes']))") + needs_llm=$(echo "$result" | python3 -c "import json,sys; print(str(json.load(sys.stdin)['needs_llm']).lower())") + has_non_llm=$(echo "$result" | python3 -c "import json,sys; print(str(json.load(sys.stdin)['has_non_llm']).lower())") + max_retries_reached=$(echo "$result" | python3 -c "import json,sys; print(str(json.load(sys.stdin)['max_retries_reached']).lower())") + escalation_details=$(echo "$result" | python3 -c "import json,sys; print(json.dumps(json.load(sys.stdin)['escalation']))") + model=$(echo "$result" | python3 -c "import json,sys; print(json.load(sys.stdin)['model'])") + # Prompt lists ALL non-escalated jobs (not just jobs_for_llm) so that + # fallthrough from non-LLM fixes gives the LLM full context. + failed_job_list=$(echo "$result" | python3 -c "import json,sys; print('\n'.join(json.load(sys.stdin)['all_non_escalated']))") + local non_llm_jobs llm_jobs all_non_escalated_jobs + non_llm_jobs=$(echo "$result" | python3 -c "import json,sys; print(json.dumps(json.load(sys.stdin)['non_llm_jobs']))") + llm_jobs=$(echo "$result" | python3 -c "import json,sys; print(json.dumps(json.load(sys.stdin)['jobs_for_llm']))") + all_non_escalated_jobs=$(echo "$result" | python3 -c "import json,sys; print(json.dumps(json.load(sys.stdin)['all_non_escalated']))") + else + # No config file — all jobs need LLM, default model + needs_llm="true" + model="sonnet" + failed_job_list=$(echo "$failed_jobs_json" | python3 -c "import json,sys; print('\n'.join(json.load(sys.stdin)))") + local non_llm_jobs="[]" + local llm_jobs="$failed_jobs_json" + local all_non_escalated_jobs="$failed_jobs_json" + fi + + # Build the failed checks section for the prompt + local failed_checks_section="" + if [[ -n "$failed_job_list" ]]; then + while IFS= read -r job; do + [[ -z "$job" ]] && continue + failed_checks_section="${failed_checks_section} +- **${job}**" + done <<< "$failed_job_list" + fi + + # Load conventions (per-check fixer specific) + local conventions_file="${SCRIPT_DIR}/autofixer-conventions.md" + local conventions="" + if [[ -f "$conventions_file" ]]; then + conventions=$(cat "$conventions_file") + fi + + # Build the focused prompt. + # NOTE: We intentionally do NOT include shared/prompts/autofixer-rules.md here + # because it instructs the agent to run checks locally, which conflicts with the + # per-check CI-driven model. The conventions file contains the relevant rules. + local run_log_cmd="" + if [[ -n "${FAILED_RUN_ID:-}" ]]; then + run_log_cmd="gh run view ${FAILED_RUN_ID} --log-failed" + else + run_log_cmd="gh pr checks ${PR_NUMBER}" + fi + + local prompt + prompt="Fix failing checks in the **${FAILED_WORKFLOW}** workflow on PR #${PR_NUMBER} in ${GITHUB_REPOSITORY}. + +## Failed Checks +${failed_checks_section} + +## Instructions + +1. **Investigate the failure**: Run \`${run_log_cmd}\` to see the failure output. +2. **Fix the issues** causing the failures listed above. +3. **Commit and push** your fixes. + +**CRITICAL: Do NOT run checks locally.** CI will re-run automatically after you push. +Fix only the issues listed above. Do not fix unrelated code. + +If you cannot fix an issue without human guidance, post a PR comment explaining +what's needed and why. + +## Auto-fixable vs Report-only + +**Auto-fixable (commit fixes directly):** +- Lint errors (formatting, import order, code style) +- Type errors with clear fixes +- Simple test failures with obvious fixes +- Missing or outdated dependencies in lock files + +**Report only (explain what's needed):** +- Complex logic errors requiring design decisions +- Security issues requiring architectural changes +- Failures that require understanding business requirements to resolve correctly + +## Conventions + +${conventions:-Use git commit and git push to push fixes. Sign comments with: -- Authored by egg} +" + + # Write prompt to temp file + local prompt_dir="${RUNNER_TEMP:-/tmp}" + mkdir -p "$prompt_dir" + local prompt_file="${prompt_dir}/check-fixer-prompt-${PR_NUMBER}.txt" + echo "$prompt" > "$prompt_file" + + # Write outputs + { + echo "prompt-file=${prompt_file}" + echo "model=${model}" + echo "non-llm-fixes=${non_llm_fixes}" + echo "has-non-llm-fixes=${has_non_llm}" + echo "needs-llm=${needs_llm}" + echo "max-retries-reached=${max_retries_reached}" + echo "escalation-details=${escalation_details}" + echo "non-llm-jobs=${non_llm_jobs}" + echo "llm-jobs=${llm_jobs}" + echo "all-non-escalated-jobs=${all_non_escalated_jobs}" + } >> "${GITHUB_OUTPUT:-/dev/null}" + + echo "Check fixer prompt built: ${#prompt} chars, model=${model}, has_non_llm=${has_non_llm}, needs_llm=${needs_llm}" +} + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + +: "${PR_NUMBER:?PR_NUMBER is required}" +: "${GITHUB_REPOSITORY:?GITHUB_REPOSITORY is required}" + +build_prompt diff --git a/docs/guides/github-automation.md b/docs/guides/github-automation.md index 87905d73e..f36f5c160 100644 --- a/docs/guides/github-automation.md +++ b/docs/guides/github-automation.md @@ -28,6 +28,7 @@ Review criteria for each workflow are defined in `shared/prompts/` as markdown f | `shared/prompts/code-review-criteria.md` | AI Code Review, orchestrator reviewers | | `shared/prompts/agent-design-criteria.md` | Design Review, orchestrator reviewers | | `shared/prompts/autofixer-rules.md` | Check Autofixer | +| `shared/check-fixers.yml` | Check Autofixer (per-job config: non-LLM fixes, retries, model) | | `shared/prompts/contract-review-criteria.md` | Contract Verification, orchestrator reviewers | | `shared/prompts/onboarding-docs-prompt.md` | Documentation Onboarding (`egg-onboarding-docs`) | @@ -293,21 +294,67 @@ Contract files follow the schema at `.egg/schemas/contract.schema.json`. The wor ## Check Autofixer **Workflow:** [`.github/workflows/on-check-failure.yml`](../../.github/workflows/on-check-failure.yml) +**Framework:** [`.github/workflows/reusable-check-fixer.yml`](../../.github/workflows/reusable-check-fixer.yml) +**Config:** [`shared/check-fixers.yml`](../../shared/check-fixers.yml) Triggers when `Lint`, `Test`, or `Integration Tests` workflows fail on a PR, or via `workflow_dispatch`. +Uses a per-check fixer loop where CI validates after each fix attempt. ### How It Works 1. **Skip check** — Skips PRs with `[skip-autofix]` in the title. -2. **Comment cleanup** — Minimizes previous "investigating" comments to reduce clutter. -3. **Acknowledgment** — Posts a comment linking to the failed workflow run. -4. **Trusted prompt build** — Builds the autofixer prompt from `main` using - `build-autofixer-prompt.sh`, which includes the failed workflow name and run ID. -5. **Investigation** — The agent uses `gh pr checks` to list failures, examines logs - via `gh run view --log-failed`, and reads workflow files for context. -6. **Fix or report** — Auto-fixable issues (lint, formatting, simple type errors) are - fixed, committed, and pushed. Complex issues get a comment explaining the problem - and suggested next steps. +2. **Identify failed jobs** — Queries the GitHub API for which specific jobs failed + in the triggering workflow run (e.g., Python, Shell within Lint). +3. **Read autofix state** — Reads retry counts from a `` + PR comment to track how many times each check has been attempted. +4. **Comment cleanup** — Minimizes previous status comments to reduce clutter. +5. **Build fix plan** — Runs `build-check-fixer-prompt.sh` from `main` (trusted), + which reads `check-fixers.yml` config and determines: + - Which checks have non-LLM fixes available (ruff, shfmt, etc.) + - Which checks need the LLM fixer + - Which checks have exceeded max retries (escalation needed) +6. **Phase 1: Non-LLM fixes** — On first attempt, runs mechanical fixes (e.g., + `ruff format`, `shfmt`) for applicable checks. If changes are produced, commits, + pushes, and **exits** — CI re-runs with fresh context. +7. **Phase 2: LLM fixer** — If non-LLM fixes didn't apply or didn't resolve the + issue, runs a focused LLM agent with a prompt listing only the specific failed + jobs. The agent fixes and pushes but does **not** re-run checks locally. +8. **State update** — Increments attempt counts for each failed check in the + state comment. +9. **Escalation** — When any check exceeds its `max_retries`, posts an escalation + comment listing the checks that need human attention. + +### CI-Driven Loop + +The fixer operates in a loop driven by CI: +``` +CI fails → fixer fixes → pushes → CI re-runs → still fails? → fixer re-invoked +``` + +The fixer does **not** run checks locally. This avoids wasting agent compute +on re-running checks that CI already handles. Each push triggers CI, which +re-triggers the fixer if checks still fail. + +### Non-LLM Fixes + +Mechanical fixes run before the LLM fixer on first attempt. Configured per job +in `check-fixers.yml`: + +| Check | Non-LLM Fix | +|-------|-------------| +| Lint / Python | `ruff check --fix --unsafe-fixes` + `ruff format` | +| Lint / Shell | `shfmt` formatting | +| Lint / YAML | Trailing whitespace removal + final newline | + +If non-LLM fixes produce changes, they are committed and pushed immediately. +CI re-runs, and if the check still fails, the LLM fixer handles it on the +next attempt. + +### Retry and Escalation + +Each check has a configurable `max_retries` (default: 3). State is tracked +in a PR comment with a JSON payload. When a check exceeds its max retries, +an escalation comment is posted requesting human intervention. ### Auto-Fix vs Report @@ -318,6 +365,25 @@ The agent follows these rules (customizable via `.egg/autofixer-rules.md`): | **Auto-fix** | Lint errors, formatting, import order, type errors with clear fixes, simple test failures | | **Report only** | Complex logic errors, security issues, unclear requirements, missing environment config | +### Concurrency + +Fixers serialize per PR using `cancel-in-progress: false`. When both Lint and +Test fail simultaneously, the two `workflow_run` events queue and execute +sequentially rather than one canceling the other. + +### Configuration + +Per-job settings in `check-fixers.yml`: + +| Setting | Default | Purpose | +|---------|---------|---------| +| `model` | `sonnet` | LLM model for this check | +| `timeout` | `15` | Minutes before timeout | +| `max_retries` | `3` | Max fix attempts before escalation | +| `non_llm_fix` | (none) | Shell commands for mechanical fixing | + +Repos can override by placing `.egg/check-fixers.yml` in their repository. + ## Conflict Resolver **Workflow:** [`.github/workflows/on-merge-conflict.yml`](../../.github/workflows/on-merge-conflict.yml) @@ -536,6 +602,7 @@ This variable controls who can trigger the Address Review Feedback workflow thro |------|---------| | `.egg/review-rules.md` | Custom review focus areas (overrides defaults) | | `.egg/autofixer-rules.md` | Custom auto-fix vs report rules (overrides defaults) | +| `.egg/check-fixers.yml` | Custom per-check fixer config (overrides `shared/check-fixers.yml`) | | `.egg/conflict-rules.md` | Custom conflict resolution rules (overrides defaults) | ### Skip Labels diff --git a/shared/check-fixers.yml b/shared/check-fixers.yml new file mode 100644 index 000000000..7b17058be --- /dev/null +++ b/shared/check-fixers.yml @@ -0,0 +1,50 @@ +# Check fixer configuration +# +# Maps CI workflow jobs to their non-LLM fix commands, max retries, model, +# and timeout settings. Used by build-check-fixer-prompt.sh and +# reusable-check-fixer.yml. +# +# Repos can override by placing .egg/check-fixers.yml in their repository. + +version: "1" + +defaults: + model: "sonnet" + timeout: "15" + max_retries: 3 + +workflows: + Lint: + Python: + non_llm_fix: | + pip install -q uv 2>/dev/null || true + uv sync --extra dev 2>/dev/null || true + .venv/bin/ruff check --fix --unsafe-fixes . + .venv/bin/ruff format . + max_retries: 3 + Shell: + non_llm_fix: | + if command -v shfmt >/dev/null 2>&1; then + find . -name "*.sh" -not -path "./.venv/*" -not -path "./.git/*" | xargs shfmt -w -i 2 -ci -bn + fi + max_retries: 2 + YAML: + non_llm_fix: | + find . \( -name "*.yaml" -o -name "*.yml" \) -not -path "./.venv/*" -not -path "./.git/*" | while read -r f; do + sed -i 's/[[:space:]]*$//' "$f" + [ -n "$(tail -c1 "$f")" ] && echo "" >> "$f" + done + max_retries: 2 + Docker: + max_retries: 2 + Actions: + max_retries: 2 + "Custom Checks": + max_retries: 2 + Test: + "Unit Tests": + model: "opus" + timeout: "15" + max_retries: 2 + "Security Scan": + max_retries: 2