arXiv Monitor #10
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: arXiv Monitor | |
| on: | |
| schedule: | |
| - cron: '0 */6 * * *' | |
| workflow_dispatch: | |
| inputs: | |
| categories: | |
| description: 'arXiv categories' | |
| required: false | |
| default: 'cs.AI,cs.LG,cs.CL,cs.CV,cs.RO,cs.IR,cs.NE,stat.ML' | |
| type: string | |
| max_recommended: | |
| description: 'Max recommended papers to create issues' | |
| required: false | |
| default: '2' | |
| type: string | |
| concurrency: | |
| group: arxiv-monitor | |
| cancel-in-progress: true | |
| permissions: | |
| issues: write | |
| contents: read | |
| actions: write | |
| env: | |
| CATEGORIES: ${{ github.event.inputs.categories || 'cs.AI,cs.LG,cs.CL,cs.CV,cs.RO,cs.IR,cs.NE,stat.ML' }} | |
| MAX_RECOMMENDED: ${{ github.event.inputs.max_recommended || '2' }} | |
| # 获取更多论文供智能分析,分析后筛选创建 Issue | |
| FETCH_PAPERS: 20 | |
| GH_TOKEN: ${{ secrets.PAT_TOKEN || github.token }} | |
| # 启用 DEBUG 日志以显示 Agent 完整执行过程 | |
| LOG_LEVEL: DEBUG | |
| MCP_LOG_DETAIL: "1" | |
| PROMPT_LOG: "1" | |
| # Anthropic API 配置 | |
| ANTHROPIC_AUTH_TOKEN: ${{ secrets.ANTHROPIC_AUTH_TOKEN }} | |
| ANTHROPIC_BASE_URL: ${{ secrets.ANTHROPIC_BASE_URL || 'https://api.minimaxi.com/anthropic' }} | |
| ANTHROPIC_MODEL: ${{ secrets.ANTHROPIC_MODEL || 'MiniMax-M2.1' }} | |
| CLAUDE_AGENT_SDK_SKIP_VERSION_CHECK: "true" | |
| jobs: | |
| monitor: | |
| if: github.repository == 'gqy20/IssueLab' | |
| runs-on: ubuntu-latest | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: astral-sh/setup-uv@v7 | |
| with: | |
| python-version: '3.13' | |
| enable-cache: true | |
| - run: uv sync | |
| - name: Get last scan time | |
| id: last_scan | |
| run: | | |
| LAST_SCAN=$(gh api -X GET "repos/${{ github.repository }}/actions/variables/ARXIV_LAST_SCAN" --jq '.value' 2>/dev/null || true) | |
| if [[ "$LAST_SCAN" =~ ^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$ ]]; then | |
| echo "Using persisted ARXIV_LAST_SCAN: $LAST_SCAN" | |
| echo "last_scan=$LAST_SCAN" >> $GITHUB_OUTPUT | |
| else | |
| LAST_SCAN=$(date -u -d '7 days ago' +%Y-%m-%dT%H:%M:%SZ) | |
| echo "ARXIV_LAST_SCAN invalid/not found, fallback to: $LAST_SCAN" | |
| echo "last_scan=$LAST_SCAN" >> $GITHUB_OUTPUT | |
| fi | |
| - name: Fetch, analyze & create issues | |
| run: | | |
| set -euo pipefail | |
| mkdir -p artifacts/observability | |
| echo "🔍 扫描 arXiv 新论文..." | |
| echo "📥 获取 ${{ env.FETCH_PAPERS }} 篇候选论文供分析" | |
| echo "" | |
| uv run python scripts/monitor_arxiv.py \ | |
| --token "${{ env.GH_TOKEN }}" \ | |
| --repo "${{ github.repository }}" \ | |
| --categories "${{ env.CATEGORIES }}" \ | |
| --max-papers ${{ env.FETCH_PAPERS }} \ | |
| --max-recommended ${{ env.MAX_RECOMMENDED }} \ | |
| --last-scan "${{ steps.last_scan.outputs.last_scan }}" \ | |
| --output artifacts/arxiv_candidates.json \ | |
| --metrics-output artifacts/observability/arxiv_metrics.json \ | |
| 2>&1 | tee artifacts/observability/arxiv_monitor.log | |
| - name: Update last scan time | |
| run: | | |
| NOW=$(date -u +%Y-%m-%dT%H:%M:%SZ) | |
| echo "Updating ARXIV_LAST_SCAN to: $NOW" | |
| if gh api -X PATCH "repos/${{ github.repository }}/actions/variables/ARXIV_LAST_SCAN" \ | |
| -f name='ARXIV_LAST_SCAN' \ | |
| -f value="$NOW" >/dev/null 2>&1; then | |
| echo "Updated existing ARXIV_LAST_SCAN" | |
| else | |
| gh api -X POST "repos/${{ github.repository }}/actions/variables" \ | |
| -f name='ARXIV_LAST_SCAN' \ | |
| -f value="$NOW" >/dev/null | |
| echo "Created ARXIV_LAST_SCAN" | |
| fi | |
| - name: Emit observability summary | |
| if: always() | |
| env: | |
| WF_NAME: ${{ github.workflow }} | |
| JOB_NAME: monitor | |
| RUN_ID: ${{ github.run_id }} | |
| REPO_NAME: ${{ github.repository }} | |
| SHA: ${{ github.sha }} | |
| run: | | |
| set -euo pipefail | |
| mkdir -p artifacts/observability | |
| python - << 'PY' | |
| import json | |
| import os | |
| from datetime import UTC, datetime | |
| from pathlib import Path | |
| metrics_path = Path("artifacts/observability/arxiv_metrics.json") | |
| m = {} | |
| if metrics_path.exists(): | |
| try: | |
| m = json.loads(metrics_path.read_text(encoding="utf-8")) | |
| except Exception: | |
| m = {} | |
| input_count = int(m.get("fetched_count", 0) or 0) | |
| success_count = int(m.get("created_issues", 0) or 0) | |
| failed_count = 0 | |
| skipped_count = 0 | |
| status_raw = str(m.get("status", "")) | |
| failures = [] | |
| if status_raw in {"no_recommendation", "insufficient_candidates", "no_new_after_dedupe", "no_new_papers"}: | |
| skipped_count = 1 | |
| if status_raw == "started": | |
| failures.append({"code": "METRICS_MISSING", "count": 1, "samples": []}) | |
| status = "success" | |
| if failures: | |
| status = "partial" | |
| elif skipped_count > 0 and success_count == 0: | |
| status = "skipped" | |
| payload = { | |
| "schema_version": "v1", | |
| "workflow": os.getenv("WF_NAME"), | |
| "job": os.getenv("JOB_NAME"), | |
| "run_id": os.getenv("RUN_ID"), | |
| "repo": os.getenv("REPO_NAME"), | |
| "sha": os.getenv("SHA"), | |
| "finished_at": datetime.now(UTC).isoformat().replace("+00:00", "Z"), | |
| "metrics": { | |
| "input_count": input_count, | |
| "success_count": success_count, | |
| "failed_count": failed_count, | |
| "skipped_count": skipped_count, | |
| "new_papers_count": int(m.get("new_papers_count", 0) or 0), | |
| "recommended_count": int(m.get("recommended_count", 0) or 0), | |
| "created_issues": int(m.get("created_issues", 0) or 0), | |
| }, | |
| "failures_topn": failures[:5], | |
| "status": status, | |
| } | |
| Path("artifacts/observability/arxiv_monitor__monitor.json").write_text( | |
| json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8" | |
| ) | |
| summary = [ | |
| "## Observability (arxiv monitor)", | |
| "", | |
| "| metric | value |", | |
| "|---|---:|", | |
| f"| input_count | {payload['metrics']['input_count']} |", | |
| f"| new_papers_count | {payload['metrics']['new_papers_count']} |", | |
| f"| recommended_count | {payload['metrics']['recommended_count']} |", | |
| f"| created_issues | {payload['metrics']['created_issues']} |", | |
| f"| status | {payload['status']} |", | |
| ] | |
| with open(os.environ["GITHUB_STEP_SUMMARY"], "a", encoding="utf-8") as fh: | |
| fh.write("\n".join(summary) + "\n") | |
| PY | |
| - name: Upload observability artifact | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: arxiv-monitor-observability-${{ github.run_id }} | |
| path: artifacts/observability/ | |
| retention-days: 14 |