Skip to content

arXiv Monitor

arXiv Monitor #10

Workflow file for this run

name: arXiv Monitor
on:
schedule:
- cron: '0 */6 * * *'
workflow_dispatch:
inputs:
categories:
description: 'arXiv categories'
required: false
default: 'cs.AI,cs.LG,cs.CL,cs.CV,cs.RO,cs.IR,cs.NE,stat.ML'
type: string
max_recommended:
description: 'Max recommended papers to create issues'
required: false
default: '2'
type: string
concurrency:
group: arxiv-monitor
cancel-in-progress: true
permissions:
issues: write
contents: read
actions: write
env:
CATEGORIES: ${{ github.event.inputs.categories || 'cs.AI,cs.LG,cs.CL,cs.CV,cs.RO,cs.IR,cs.NE,stat.ML' }}
MAX_RECOMMENDED: ${{ github.event.inputs.max_recommended || '2' }}
# 获取更多论文供智能分析,分析后筛选创建 Issue
FETCH_PAPERS: 20
GH_TOKEN: ${{ secrets.PAT_TOKEN || github.token }}
# 启用 DEBUG 日志以显示 Agent 完整执行过程
LOG_LEVEL: DEBUG
MCP_LOG_DETAIL: "1"
PROMPT_LOG: "1"
# Anthropic API 配置
ANTHROPIC_AUTH_TOKEN: ${{ secrets.ANTHROPIC_AUTH_TOKEN }}
ANTHROPIC_BASE_URL: ${{ secrets.ANTHROPIC_BASE_URL || 'https://api.minimaxi.com/anthropic' }}
ANTHROPIC_MODEL: ${{ secrets.ANTHROPIC_MODEL || 'MiniMax-M2.1' }}
CLAUDE_AGENT_SDK_SKIP_VERSION_CHECK: "true"
jobs:
monitor:
if: github.repository == 'gqy20/IssueLab'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: astral-sh/setup-uv@v7
with:
python-version: '3.13'
enable-cache: true
- run: uv sync
- name: Get last scan time
id: last_scan
run: |
LAST_SCAN=$(gh api -X GET "repos/${{ github.repository }}/actions/variables/ARXIV_LAST_SCAN" --jq '.value' 2>/dev/null || true)
if [[ "$LAST_SCAN" =~ ^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$ ]]; then
echo "Using persisted ARXIV_LAST_SCAN: $LAST_SCAN"
echo "last_scan=$LAST_SCAN" >> $GITHUB_OUTPUT
else
LAST_SCAN=$(date -u -d '7 days ago' +%Y-%m-%dT%H:%M:%SZ)
echo "ARXIV_LAST_SCAN invalid/not found, fallback to: $LAST_SCAN"
echo "last_scan=$LAST_SCAN" >> $GITHUB_OUTPUT
fi
- name: Fetch, analyze & create issues
run: |
set -euo pipefail
mkdir -p artifacts/observability
echo "🔍 扫描 arXiv 新论文..."
echo "📥 获取 ${{ env.FETCH_PAPERS }} 篇候选论文供分析"
echo ""
uv run python scripts/monitor_arxiv.py \
--token "${{ env.GH_TOKEN }}" \
--repo "${{ github.repository }}" \
--categories "${{ env.CATEGORIES }}" \
--max-papers ${{ env.FETCH_PAPERS }} \
--max-recommended ${{ env.MAX_RECOMMENDED }} \
--last-scan "${{ steps.last_scan.outputs.last_scan }}" \
--output artifacts/arxiv_candidates.json \
--metrics-output artifacts/observability/arxiv_metrics.json \
2>&1 | tee artifacts/observability/arxiv_monitor.log
- name: Update last scan time
run: |
NOW=$(date -u +%Y-%m-%dT%H:%M:%SZ)
echo "Updating ARXIV_LAST_SCAN to: $NOW"
if gh api -X PATCH "repos/${{ github.repository }}/actions/variables/ARXIV_LAST_SCAN" \
-f name='ARXIV_LAST_SCAN' \
-f value="$NOW" >/dev/null 2>&1; then
echo "Updated existing ARXIV_LAST_SCAN"
else
gh api -X POST "repos/${{ github.repository }}/actions/variables" \
-f name='ARXIV_LAST_SCAN' \
-f value="$NOW" >/dev/null
echo "Created ARXIV_LAST_SCAN"
fi
- name: Emit observability summary
if: always()
env:
WF_NAME: ${{ github.workflow }}
JOB_NAME: monitor
RUN_ID: ${{ github.run_id }}
REPO_NAME: ${{ github.repository }}
SHA: ${{ github.sha }}
run: |
set -euo pipefail
mkdir -p artifacts/observability
python - << 'PY'
import json
import os
from datetime import UTC, datetime
from pathlib import Path
metrics_path = Path("artifacts/observability/arxiv_metrics.json")
m = {}
if metrics_path.exists():
try:
m = json.loads(metrics_path.read_text(encoding="utf-8"))
except Exception:
m = {}
input_count = int(m.get("fetched_count", 0) or 0)
success_count = int(m.get("created_issues", 0) or 0)
failed_count = 0
skipped_count = 0
status_raw = str(m.get("status", ""))
failures = []
if status_raw in {"no_recommendation", "insufficient_candidates", "no_new_after_dedupe", "no_new_papers"}:
skipped_count = 1
if status_raw == "started":
failures.append({"code": "METRICS_MISSING", "count": 1, "samples": []})
status = "success"
if failures:
status = "partial"
elif skipped_count > 0 and success_count == 0:
status = "skipped"
payload = {
"schema_version": "v1",
"workflow": os.getenv("WF_NAME"),
"job": os.getenv("JOB_NAME"),
"run_id": os.getenv("RUN_ID"),
"repo": os.getenv("REPO_NAME"),
"sha": os.getenv("SHA"),
"finished_at": datetime.now(UTC).isoformat().replace("+00:00", "Z"),
"metrics": {
"input_count": input_count,
"success_count": success_count,
"failed_count": failed_count,
"skipped_count": skipped_count,
"new_papers_count": int(m.get("new_papers_count", 0) or 0),
"recommended_count": int(m.get("recommended_count", 0) or 0),
"created_issues": int(m.get("created_issues", 0) or 0),
},
"failures_topn": failures[:5],
"status": status,
}
Path("artifacts/observability/arxiv_monitor__monitor.json").write_text(
json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8"
)
summary = [
"## Observability (arxiv monitor)",
"",
"| metric | value |",
"|---|---:|",
f"| input_count | {payload['metrics']['input_count']} |",
f"| new_papers_count | {payload['metrics']['new_papers_count']} |",
f"| recommended_count | {payload['metrics']['recommended_count']} |",
f"| created_issues | {payload['metrics']['created_issues']} |",
f"| status | {payload['status']} |",
]
with open(os.environ["GITHUB_STEP_SUMMARY"], "a", encoding="utf-8") as fh:
fh.write("\n".join(summary) + "\n")
PY
- name: Upload observability artifact
if: always()
uses: actions/upload-artifact@v4
with:
name: arxiv-monitor-observability-${{ github.run_id }}
path: artifacts/observability/
retention-days: 14