From 531f7e3e0e4ac3d22cfe2dd62010e8b23cb9dc8b Mon Sep 17 00:00:00 2001
From: rongfeng <sikjmyhre@gmail.com>
Date: Sun, 8 Feb 2026 01:40:28 +0800
Subject: [PATCH 1/6] feat: add PaperReader v0.2 plugin - universal document
 recursive reader

New plugin: Plugin/PaperReader
- MinerU cloud API for high-fidelity PDF parsing (formulas/tables/images/OCR)
- Auto-degradation to pdf-parse when MinerU unavailable
- Section-aware chunking with Meta-Header injection
- Rolling Context deep reading with auto-compression
- Keyword-based query with section weighting
- Supports academic papers, technical reports, books, legal documents, etc.
---
 Plugin/PaperReader/PaperReader.js            | 174 +++++++++++++++
 Plugin/PaperReader/README.md                 |  68 ++++++
 Plugin/PaperReader/config.env.example        |  25 +++
 Plugin/PaperReader/lib/chunker.js            | 183 ++++++++++++++++
 Plugin/PaperReader/lib/deep-reader.js        | 174 +++++++++++++++
 Plugin/PaperReader/lib/ingest.js             |  41 ++++
 Plugin/PaperReader/lib/llm.js                |  72 +++++++
 Plugin/PaperReader/lib/mineru-client.js      | 213 +++++++++++++++++++
 Plugin/PaperReader/lib/pdf-parse-fallback.js |  35 +++
 Plugin/PaperReader/lib/query.js              | 108 ++++++++++
 Plugin/PaperReader/lib/skeleton.js           | 161 ++++++++++++++
 Plugin/PaperReader/plugin-manifest.json      |  75 +++++++
 12 files changed, 1329 insertions(+)
 create mode 100644 Plugin/PaperReader/PaperReader.js
 create mode 100644 Plugin/PaperReader/README.md
 create mode 100644 Plugin/PaperReader/config.env.example
 create mode 100644 Plugin/PaperReader/lib/chunker.js
 create mode 100644 Plugin/PaperReader/lib/deep-reader.js
 create mode 100644 Plugin/PaperReader/lib/ingest.js
 create mode 100644 Plugin/PaperReader/lib/llm.js
 create mode 100644 Plugin/PaperReader/lib/mineru-client.js
 create mode 100644 Plugin/PaperReader/lib/pdf-parse-fallback.js
 create mode 100644 Plugin/PaperReader/lib/query.js
 create mode 100644 Plugin/PaperReader/lib/skeleton.js
 create mode 100644 Plugin/PaperReader/plugin-manifest.json

diff --git a/Plugin/PaperReader/PaperReader.js b/Plugin/PaperReader/PaperReader.js
new file mode 100644
index 00000000..9b3b522d
--- /dev/null
+++ b/Plugin/PaperReader/PaperReader.js
@@ -0,0 +1,174 @@
+/**
+ * PaperReader v0.2 — 主入口
+ * 
+ * stdin 接收 JSON → 路由到各 command handler → stdout 输出 JSON
+ */
+
+const fs = require('fs').promises;
+const fsSync = require('fs');
+const path = require('path');
+const crypto = require('crypto');
+
+require('dotenv').config({ path: path.join(__dirname, 'config.env') });
+require('dotenv').config({ path: path.join(__dirname, '..', '..', 'config.env') });
+
+const { ingestPdf } = require('./lib/ingest');
+const { chunkMarkdown } = require('./lib/chunker');
+const { generateSkeleton } = require('./lib/skeleton');
+const { readDeep } = require('./lib/deep-reader');
+const { queryPaper } = require('./lib/query');
+
+const WORKSPACE_ROOT = path.join(__dirname, 'workspace');
+
+function sendResponse(data) {
+  process.stdout.write(JSON.stringify(data));
+  process.exit(0);
+}
+
+function sha1(input) {
+  return crypto.createHash('sha1').update(input).digest('hex');
+}
+
+function getPaperWorkspace(paperId) {
+  return path.join(WORKSPACE_ROOT, paperId);
+}
+
+async function writeJson(filePath, obj) {
+  await fs.writeFile(filePath, JSON.stringify(obj, null, 2), 'utf-8');
+}
+
+// ─── Command Handlers ───
+
+async function handleIngestPDF({ filePath, paperId }) {
+  if (!filePath || typeof filePath !== 'string') {
+    throw new Error('IngestPDF requires filePath');
+  }
+
+  const abs = path.isAbsolute(filePath) ? filePath : path.resolve(process.cwd(), filePath);
+  if (!fsSync.existsSync(abs)) {
+    throw new Error(`PDF not found: ${abs}`);
+  }
+
+  const resolvedPaperId = paperId && String(paperId).trim()
+    ? String(paperId).trim()
+    : `paper-${sha1(abs).slice(0, 10)}`;
+
+  const wsDir = getPaperWorkspace(resolvedPaperId);
+  await fs.mkdir(wsDir, { recursive: true });
+
+  // L0: 解析 PDF → Markdown + Figures
+  const parsed = await ingestPdf(abs, { outputDir: wsDir });
+
+  // Save meta
+  const meta = {
+    paperId: resolvedPaperId,
+    sourceFilePath: abs,
+    extractedAt: new Date().toISOString(),
+    pageCount: parsed.pageCount,
+    textLength: (parsed.markdown || '').length,
+    engine: parsed.engine
+  };
+  await writeJson(path.join(wsDir, 'meta.json'), meta);
+
+  // Save full markdown
+  await fs.writeFile(path.join(wsDir, 'full_text.md'), parsed.markdown || '', 'utf-8');
+
+  // Save figure map
+  if (parsed.figureMap && parsed.figureMap.length > 0) {
+    await writeJson(path.join(wsDir, 'figure_map.json'), parsed.figureMap);
+  }
+
+  // L1: 章节感知切分
+  const chunks = chunkMarkdown(parsed.markdown || '');
+
+  // Save chunks
+  const chunksDir = path.join(wsDir, 'chunks');
+  await fs.mkdir(chunksDir, { recursive: true });
+
+  for (const chunk of chunks) {
+    const chunkContent = chunk.metaHeader
+      ? `${chunk.metaHeader}\n\n---\n\n${chunk.text}`
+      : chunk.text;
+    await fs.writeFile(
+      path.join(chunksDir, `chunk_${chunk.index}.md`),
+      chunkContent,
+      'utf-8'
+    );
+  }
+
+  // Save manifest
+  const manifest = {
+    chunkCount: chunks.length,
+    chunks: chunks.map(c => ({
+      index: c.index,
+      section: c.section,
+      tokenCount: c.tokenCount
+    }))
+  };
+  await writeJson(path.join(chunksDir, 'manifest.json'), manifest);
+
+  // Create reading_notes dir
+  await fs.mkdir(path.join(wsDir, 'reading_notes'), { recursive: true });
+
+  return {
+    paperId: resolvedPaperId,
+    workspace: wsDir,
+    pageCount: meta.pageCount,
+    chunkCount: chunks.length,
+    engine: parsed.engine
+  };
+}
+
+async function handleReadSkeleton({ paperId, focus }) {
+  if (!paperId) throw new Error('ReadSkeleton requires paperId');
+  const result = await generateSkeleton(paperId, { focus });
+  return { paperId, globalMapPath: result.globalMapPath };
+}
+
+async function handleReadDeep({ paperId, goal }) {
+  if (!paperId) throw new Error('ReadDeep requires paperId');
+  return await readDeep(paperId, { goal });
+}
+
+async function handleQuery({ paperId, question }) {
+  return await queryPaper(paperId, question);
+}
+
+// ─── Main ───
+
+async function main() {
+  let inputData = '';
+  process.stdin.setEncoding('utf8');
+  for await (const chunk of process.stdin) inputData += chunk;
+
+  const request = JSON.parse(inputData || '{}');
+  const command = request.command;
+
+  try {
+    if (!command) throw new Error('Missing command');
+
+    let result;
+    switch (command) {
+      case 'IngestPDF':
+        result = await handleIngestPDF({ filePath: request.filePath, paperId: request.paperId });
+        break;
+      case 'ReadSkeleton':
+        result = await handleReadSkeleton({ paperId: request.paperId, focus: request.focus });
+        break;
+      case 'ReadDeep':
+        result = await handleReadDeep({ paperId: request.paperId, goal: request.goal });
+        break;
+      case 'Query':
+        result = await handleQuery({ paperId: request.paperId, question: request.question });
+        break;
+      default:
+        throw new Error(`Unknown command: ${command}`);
+    }
+
+    sendResponse({ status: 'success', result });
+  } catch (err) {
+    sendResponse({ status: 'error', error: err?.message || String(err) });
+  }
+}
+
+main();
diff --git a/Plugin/PaperReader/README.md b/Plugin/PaperReader/README.md
new file mode 100644
index 00000000..327290bd
--- /dev/null
+++ b/Plugin/PaperReader/README.md
@@ -0,0 +1,68 @@
+# PaperReader（v0.2）
+
+## 设计目标
+
+将超长 PDF / 文档转为可控的递归阅读流程。适用于学术论文、技术报告、法律文书、书籍章节等各类长文档。
+
+1. **L0 解析层**：MinerU 云端 API 高保真解析（保留公式/表格/图片/多栏排版），自动降级到 pdf-parse
+2. **L1 切分层**：章节感知切分 + Meta-Header 注入 + 10-20% overlap
+3. **L2 递归逻辑层**：Skeleton 骨架提取 / Rolling Context 深度阅读 / 合并综合
+4. **L3 存储交互层**：Obsidian 友好的 Markdown 目录结构
+
+## 命令
+
+| 命令 | 功能 |
+|------|------|
+| `IngestPDF` | PDF → Markdown → 章节感知 chunks |
+| `ReadSkeleton` | 从目录/摘要/关键章节生成 Global Map |
+| `ReadDeep` | 带 Rolling Context 的递归摘要 → Round-1 笔记 |
+| `Query` | 检索式问答（关键词匹配 + 章节权重） |
+
+## 工件目录
+
+```
+workspace/{paperId}/
+├── meta.json                    # 元数据（含解析引擎标识）
+├── full_text.md                 # 完整 Markdown（L0 输出）
+├── figure_map.json              # Figure_ID ↔ Caption 映射
+├── assets/
+│   └── figures/                 # 提取的图片
+├── chunks/
+│   ├── manifest.json            # chunk 清单 + 章节映射
+│   └── chunk_{i}.md             # 单个 chunk（含 Meta-Header）
+└── reading_notes/
+    ├── Global_Map.md            # 骨架地图
+    ├── Chunk_Summaries.json     # 分块摘要
+    └── Round_1_Summary.md       # 深度笔记
+```
+
+## 配置
+
+复制 `config.env.example` 为 `config.env` 并填入：
+- `MINERU_API_TOKEN`：MinerU 云端 API Token（不填则自动降级）
+- `PaperReaderModel`：LLM 模型名称
+- 详见 `config.env.example` 中的完整配置项
+
+## 依赖
+
+- `axios`：HTTP 请求
+- `pdf-parse`：降级模式 PDF 解析
+- `adm-zip`：解压 MinerU 返回的 zip
+- `@dqbd/tiktoken`：token 计数
+- `dotenv`：环境变量
+
+## 支持的文档类型
+
+MinerU 云端 API 支持解析：
+- 学术论文（多栏、公式、引用）
+- 技术报告 / 白皮书
+- 书籍章节
+- 法律文书 / 合同
+- 扫描版 PDF（内置 OCR）
+- 含复杂表格的文档
+
+## 常见限制
+
+- MinerU 免费额度：每日 2000 页，单文件 200MB/600 页
+- Rolling Context 上限 4000 tokens，超出自动压缩
+- Query 目前为关键词匹配（向量检索计划在 Phase 2）
diff --git a/Plugin/PaperReader/config.env.example b/Plugin/PaperReader/config.env.example
new file mode 100644
index 00000000..fbb51b8a
--- /dev/null
+++ b/Plugin/PaperReader/config.env.example
@@ -0,0 +1,25 @@
+# PaperReader 插件配置（示例）
+
+# === L0 解析层 ===
+# MinerU 云端 API Token（从 mineru.net 获取）
+MINERU_API_TOKEN=
+# 轮询超时（ms），默认 5 分钟
+MINERU_API_TIMEOUT=300000
+# 轮询间隔（ms），默认 5 秒
+MINERU_POLL_INTERVAL=5000
+
+# === L1 切分层 ===
+# 目标 chunk 大小（tokens）
+PaperReaderChunkSize=2000
+# chunk 重叠比例
+PaperReaderOverlap=0.15
+
+# === L2 递归逻辑层 ===
+# 读取/总结模型（使用 VCP 的 API_URL/API_Key 调用 /v1/chat/completions）
+PaperReaderModel=gemini-2.5-flash-search
+# 单次模型输出 token 上限
+PaperReaderMaxOutputTokens=12000
+# 分批并发组大小（每组处理的 chunk 数）
+PaperReaderBatchSize=4
+# deep 阅读最多处理多少个 chunk（防止成本失控）
+PaperReaderMaxChunks=120
diff --git a/Plugin/PaperReader/lib/chunker.js b/Plugin/PaperReader/lib/chunker.js
new file mode 100644
index 00000000..2b2f61c8
--- /dev/null
+++ b/Plugin/PaperReader/lib/chunker.js
@@ -0,0 +1,183 @@
+/**
+ * 章节感知切分器 (T3)
+ * 
+ * 按 Markdown 章节标题（##）切分，超长章节在段落边界二次切分。
+ * 每个 chunk 注入 Meta-Header（章节名 + 全局摘要占位 + overlap）。
+ * 使用 tiktoken cl100k_base 计算 token 数。
+ */
+
+const { get_encoding } = require('@dqbd/tiktoken');
+
+const encoding = get_encoding('cl100k_base');
+
+const DEFAULT_TARGET_TOKENS = 2000;
+const DEFAULT_OVERLAP_RATIO = 0.15;
+const DEFAULT_MAX_CHUNKS = 120;
+
+/**
+ * 计算文本的 token 数
+ */
+function countTokens(text) {
+  if (!text) return 0;
+  return encoding.encode(text).length;
+}
+
+/**
+ * 从 Markdown 中提取章节结构
+ * @returns {Array<{ level: number, title: string, content: string }>}
+ */
+function extractSections(markdown) {
+  const lines = markdown.split('\n');
+  const sections = [];
+  let currentSection = { level: 0, title: '(Preamble)', lines: [] };
+
+  for (const line of lines) {
+    const headerMatch = line.match(/^(#{1,4})\s+(.+)$/);
+    if (headerMatch) {
+      // Save previous section
+      if (currentSection.lines.length > 0 || currentSection.title !== '(Preamble)') {
+        sections.push({
+          level: currentSection.level,
+          title: currentSection.title,
+          content: currentSection.lines.join('\n')
+        });
+      }
+      currentSection = {
+        level: headerMatch[1].length,
+        title: headerMatch[2].trim(),
+        lines: [line]
+      };
+    } else {
+      currentSection.lines.push(line);
+    }
+  }
+
+  // Push last section
+  if (currentSection.lines.length > 0) {
+    sections.push({
+      level: currentSection.level,
+      title: currentSection.title,
+      content: currentSection.lines.join('\n')
+    });
+  }
+
+  return sections;
+}
+
+/**
+ * 在段落边界切分超长文本
+ * @returns {string[]}
+ */
+function splitAtParagraphs(text, targetTokens) {
+  const paragraphs = text.split(/\n\n+/);
+  const pieces = [];
+  let current = '';
+  let currentTokens = 0;
+
+  for (const para of paragraphs) {
+    const paraTokens = countTokens(para);
+
+    if (currentTokens + paraTokens > targetTokens && current.trim()) {
+      pieces.push(current.trim());
+      current = '';
+      currentTokens = 0;
+    }
+
+    // Handle single paragraph exceeding limit
+    if (paraTokens > targetTokens && !current.trim()) {
+      const sentences = para.split(/(?<=[。？！.!?\n])/g);
+      for (const sent of sentences) {
+        const sentTokens = countTokens(sent);
+        if (currentTokens + sentTokens > targetTokens && current.trim()) {
+          pieces.push(current.trim());
+          current = '';
+          currentTokens = 0;
+        }
+        current += sent;
+        currentTokens += sentTokens;
+      }
+      continue;
+    }
+
+    current += (current ? '\n\n' : '') + para;
+    currentTokens += paraTokens;
+  }
+
+  if (current.trim()) {
+    pieces.push(current.trim());
+  }
+
+  return pieces;
+}
+
+/**
+ * 生成 Meta-Header
+ */
+function makeMetaHeader(section, globalSummary, overlapText) {
+  const parts = [`[章节: ${section}]`];
+  if (globalSummary) {
+    parts.push(`[全局摘要: ${globalSummary}]`);
+  }
+  if (overlapText) {
+    parts.push(`[上文衔接: ...${overlapText.slice(-200)}]`);
+  }
+  return parts.join('\n');
+}
+
+/**
+ * 章节感知切分
+ * 
+ * @param {string} markdown - L0 输出的 Markdown
+ * @param {object} options - { targetTokens, overlapRatio, maxChunks, globalSummary }
+ * @returns {Array<{ index, section, tokenCount, text, metaHeader }>}
+ */
+function chunkMarkdown(markdown, options = {}) {
+  const targetTokens = options.targetTokens || DEFAULT_TARGET_TOKENS;
+  const overlapRatio = options.overlapRatio || DEFAULT_OVERLAP_RATIO;
+  const maxChunks = options.maxChunks || DEFAULT_MAX_CHUNKS;
+  const globalSummary = options.globalSummary || '';
+
+  if (!markdown || !markdown.trim()) return [];
+
+  const sections = extractSections(markdown);
+  const chunks = [];
+  let prevTail = '';
+
+  for (const section of sections) {
+    const sectionTokens = countTokens(section.content);
+
+    if (sectionTokens <= targetTokens) {
+      const metaHeader = makeMetaHeader(section.title, globalSummary, prevTail);
+      const text = section.content;
+      chunks.push({
+        index: chunks.length,
+        section: section.title,
+        tokenCount: countTokens(metaHeader + '\n\n' + text),
+        text,
+        metaHeader
+      });
+      const tailLen = Math.floor(text.length * overlapRatio);
+      prevTail = text.slice(-tailLen);
+    } else {
+      const pieces = splitAtParagraphs(section.content, targetTokens);
+      for (const piece of pieces) {
+        const metaHeader = makeMetaHeader(section.title, globalSummary, prevTail);
+        chunks.push({
+          index: chunks.length,
+          section: section.title,
+          tokenCount: countTokens(metaHeader + '\n\n' + piece),
+          text: piece,
+          metaHeader
+        });
+        const tailLen = Math.floor(piece.length * overlapRatio);
+        prevTail = piece.slice(-tailLen);
+      }
+    }
+
+    if (chunks.length >= maxChunks) break;
+  }
+
+  return chunks.slice(0, maxChunks);
+}
+
+module.exports = { chunkMarkdown, countTokens, extractSections };
diff --git a/Plugin/PaperReader/lib/deep-reader.js b/Plugin/PaperReader/lib/deep-reader.js
new file mode 100644
index 00000000..05ab5890
--- /dev/null
+++ b/Plugin/PaperReader/lib/deep-reader.js
@@ -0,0 +1,174 @@
+/**
+ * Rolling Context Deep Reader (T6)
+ * 
+ * 带滚动上下文的深度阅读：每个 chunk 摘要时携带前序累积的关键事实，
+ * 保持 chunk 间的连贯性。超出上限时自动压缩。
+ */
+
+const fs = require('fs').promises;
+const fsSync = require('fs');
+const path = require('path');
+const { callLLM, callLLMJson } = require('./llm');
+const { countTokens } = require('./chunker');
+
+const WORKSPACE_ROOT = path.join(__dirname, '..', 'workspace');
+const BATCH_SIZE = parseInt(process.env.PaperReaderBatchSize || '4', 10);
+const MAX_CHUNKS = parseInt(process.env.PaperReaderMaxChunks || '120', 10);
+const ROLLING_CONTEXT_MAX_TOKENS = 4000;
+
+/**
+ * 压缩 Rolling Context（当超过上限时）
+ */
+async function compressContext(rollingContext) {
+  const compressed = await callLLM([
+    { role: 'system', content: '将以下累积的阅读笔记压缩为关键事实列表，保留最重要的信息、关键步骤和核心结论。删除冗余和过渡性描述。输出纯文本，不超过 2000 tokens。' },
+    { role: 'user', content: rollingContext }
+  ], { max_tokens: 3000, temperature: 0.1 });
+  return compressed;
+}
+
+/**
+ * 对单个 chunk 做摘要（携带 Rolling Context）
+ */
+async function summarizeChunk(chunkText, { goal, globalMap, rollingContext, chunkIndex, section }) {
+  const system = [
+    '你是一个"长文档分块摘要器"，适用于各类文档（学术论文、技术报告、书籍、法律文书等）。',
+    '你会结合已有的阅读上下文，对当前 chunk 进行摘要。',
+    '输出 JSON（纯 JSON，不要代码块）：',
+    '{"summary": string, "key_facts": string[], "methods": string[], "claims": string[], "open_questions": string[]}',
+    '其中 methods 字段可包含任何流程/步骤/操作方法（不限于科研实验），claims 包含文档中的核心论断/条款/规定。'
+  ].join('\n');
+
+  const userParts = [
+    `主任务目标：${goal || '全面理解文档核心内容'}`,
+    `当前位置：第 ${chunkIndex} 块，章节「${section}」`
+  ];
+
+  if (rollingContext) {
+    userParts.push(`【已有阅读上下文】\n${rollingContext}`);
+  }
+  if (globalMap) {
+    userParts.push(`【全局地图摘要】\n${globalMap.slice(0, 2000)}`);
+  }
+  userParts.push(`【当前 chunk 内容】\n${chunkText}`);
+
+  const result = await callLLMJson([
+    { role: 'system', content: system },
+    { role: 'user', content: userParts.join('\n\n') }
+  ], { temperature: 0.1 });
+
+  // Normalize result
+  return {
+    summary: result.summary || result.raw_response || '',
+    key_facts: result.key_facts || [],
+    methods: result.methods || [],
+    claims: result.claims || [],
+    open_questions: result.open_questions || []
+  };
+}
+
+/**
+ * 带滚动上下文的深度阅读
+ * 
+ * @param {string} paperId
+ * @param {object} options - { goal, batchSize, maxChunks }
+ * @returns {Promise<{ summariesPath, roundPath }>}
+ */
+async function readDeep(paperId, options = {}) {
+  const wsDir = path.join(WORKSPACE_ROOT, paperId);
+  const chunksDir = path.join(wsDir, 'chunks');
+  const manifestPath = path.join(chunksDir, 'manifest.json');
+
+  if (!fsSync.existsSync(manifestPath)) {
+    throw new Error(`chunks/manifest.json not found: ${manifestPath}`);
+  }
+
+  const manifest = JSON.parse(await fs.readFile(manifestPath, 'utf-8'));
+  const chunks = manifest.chunks || [];
+
+  // Load Global Map if exists
+  const globalMapPath = path.join(wsDir, 'reading_notes', 'Global_Map.md');
+  const globalMap = fsSync.existsSync(globalMapPath)
+    ? await fs.readFile(globalMapPath, 'utf-8')
+    : '';
+
+  const batchSize = options.batchSize || BATCH_SIZE;
+  const maxChunks = Math.min(options.maxChunks || MAX_CHUNKS, chunks.length);
+  const goal = options.goal || '';
+
+  const limited = chunks.slice(0, maxChunks);
+  const summaries = [];
+  let rollingContext = '';
+
+  // Sequential processing with Rolling Context
+  for (let i = 0; i < limited.length; i += batchSize) {
+    const batch = limited.slice(i, i + batchSize);
+
+    for (const chunk of batch) {
+      const chunkPath = path.join(chunksDir, `chunk_${chunk.index}.md`);
+      let chunkText;
+      if (fsSync.existsSync(chunkPath)) {
+        chunkText = await fs.readFile(chunkPath, 'utf-8');
+      } else {
+        chunkText = chunk.text || '';
+      }
+
+      const summary = await summarizeChunk(chunkText, {
+        goal,
+        globalMap,
+        rollingContext,
+        chunkIndex: chunk.index,
+        section: chunk.section || 'unknown'
+      });
+
+      summaries.push({
+        chunkIndex: chunk.index,
+        section: chunk.section,
+        ...summary
+      });
+
+      // Update Rolling Context
+      const newFacts = summary.key_facts.join('; ');
+      if (newFacts) {
+        rollingContext += `\n[Chunk ${chunk.index} - ${chunk.section}]: ${newFacts}`;
+      }
+
+      // Compress if exceeding limit
+      if (countTokens(rollingContext) > ROLLING_CONTEXT_MAX_TOKENS) {
+        rollingContext = await compressContext(rollingContext);
+      }
+    }
+  }
+
+  // Save chunk summaries
+  const notesDir = path.join(wsDir, 'reading_notes');
+  await fs.mkdir(notesDir, { recursive: true });
+  const summariesPath = path.join(notesDir, 'Chunk_Summaries.json');
+  await fs.writeFile(summariesPath, JSON.stringify({ count: summaries.length, summaries }, null, 2), 'utf-8');
+
+  // Synthesis: merge all summaries into Round_1_Summary.md
+  const system = [
+    '你是一个"长文档合并器"，适用于各类文档。',
+    '输入是多段 chunk 的结构化摘要（含滚动上下文），请合并成一份结构化的深度笔记。',
+    '输出 Markdown，根据文档类型自适应包含：核心主题与结论、关键内容与论点、方法/流程/步骤（如有）、重要数据与证据、局限与风险、待解决问题清单。'
+  ].join('\n');
+
+  const user = [
+    `主任务目标：${goal || '全面理解文档核心内容'}`,
+    globalMap ? `全局地图：\n${globalMap.slice(0, 3000)}` : '',
+    `最终累积上下文：\n${rollingContext}`,
+    `Chunk 摘要（${summaries.length} 个）：\n${JSON.stringify(summaries).slice(0, 150000)}`
+  ].filter(Boolean).join('\n\n');
+
+  const merged = await callLLM([
+    { role: 'system', content: system },
+    { role: 'user', content: user }
+  ], { temperature: 0.2 });
+
+  const roundPath = path.join(notesDir, 'Round_1_Summary.md');
+  await fs.writeFile(roundPath, merged || '', 'utf-8');
+
+  return { paperId, summariesPath, roundPath };
+}
+
+module.exports = { readDeep };
diff --git a/Plugin/PaperReader/lib/ingest.js b/Plugin/PaperReader/lib/ingest.js
new file mode 100644
index 00000000..56374ad9
--- /dev/null
+++ b/Plugin/PaperReader/lib/ingest.js
@@ -0,0 +1,41 @@
+/**
+ * 统一解析入口 (T1+T2)
+ * 
+ * 优先使用 MinerU 云端 API，失败则自动降级到 pdf-parse。
+ */
+
+const path = require('path');
+const fs = require('fs').promises;
+const mineruClient = require('./mineru-client');
+const fallback = require('./pdf-parse-fallback');
+
+/**
+ * 统一解析入口：优先 MinerU，失败则降级
+ * 
+ * @param {string} pdfPath - PDF 绝对路径
+ * @param {object} options - { outputDir, token, timeout, pollInterval }
+ * @returns {Promise<{ markdown, figures, pageCount, figureMap, engine: 'mineru'|'pdf-parse' }>}
+ */
+async function ingestPdf(pdfPath, options = {}) {
+  const outputDir = options.outputDir || path.dirname(pdfPath);
+  const hasMineruToken = !!(options.token || process.env.MINERU_API_TOKEN);
+
+  if (hasMineruToken) {
+    try {
+      const result = await mineruClient.parsePdf(pdfPath, { ...options, outputDir });
+      return { ...result, engine: 'mineru' };
+    } catch (err) {
+      // Log degradation warning, then fall through to pdf-parse
+      const errMsg = err instanceof mineruClient.MineruError
+        ? `[MinerU ${err.code}] ${err.message}`
+        : `[MinerU Error] ${err.message}`;
+      process.stderr.write(`[PaperReader] MinerU failed, degrading to pdf-parse: ${errMsg}\n`);
+    }
+  }
+
+  // Fallback to pdf-parse
+  const result = await fallback.parsePdf(pdfPath);
+  return { ...result, engine: 'pdf-parse' };
+}
+
+module.exports = { ingestPdf };
diff --git a/Plugin/PaperReader/lib/llm.js b/Plugin/PaperReader/lib/llm.js
new file mode 100644
index 00000000..abe09376
--- /dev/null
+++ b/Plugin/PaperReader/lib/llm.js
@@ -0,0 +1,72 @@
+/**
+ * LLM 调用封装 (T4)
+ * 
+ * 从 PaperReader.js 抽出，统一管理模型调用。
+ */
+
+const axios = require('axios');
+const path = require('path');
+
+require('dotenv').config({ path: path.join(__dirname, '..', 'config.env') });
+require('dotenv').config({ path: path.join(__dirname, '..', '..', '..', 'config.env') });
+
+const API_KEY = process.env.API_Key;
+const API_URL = process.env.API_URL;
+const MODEL = process.env.PaperReaderModel;
+const MAX_OUTPUT_TOKENS = parseInt(process.env.PaperReaderMaxOutputTokens || '12000', 10);
+
+function ensureConfig() {
+  if (!API_KEY || !API_URL) {
+    throw new Error('Missing API config: API_Key/API_URL are required (from repo root config.env).');
+  }
+  if (!MODEL) {
+    throw new Error('Missing PaperReaderModel in config.env');
+  }
+}
+
+/**
+ * 调用 LLM (OpenAI-compatible API)
+ * 
+ * @param {Array<{role: string, content: string}>} messages
+ * @param {object} options - { max_tokens, temperature }
+ * @returns {Promise<string>} 模型输出文本
+ */
+async function callLLM(messages, { max_tokens = MAX_OUTPUT_TOKENS, temperature = 0.2 } = {}) {
+  ensureConfig();
+
+  const payload = {
+    model: MODEL,
+    messages,
+    stream: false,
+    max_tokens,
+    temperature
+  };
+
+  const resp = await axios.post(API_URL, payload, {
+    headers: { Authorization: `Bearer ${API_KEY}`, 'Content-Type': 'application/json' },
+    timeout: 180000
+  });
+
+  return resp?.data?.choices?.[0]?.message?.content || '';
+}
+
+/**
+ * 调用 LLM 并解析 JSON 响应
+ * 
+ * @param {Array} messages
+ * @param {object} options
+ * @returns {Promise<object>} 解析后的 JSON 对象
+ */
+async function callLLMJson(messages, options = {}) {
+  const raw = await callLLM(messages, { ...options, temperature: options.temperature ?? 0.1 });
+  try {
+    // 尝试从 markdown 代码块中提取 JSON
+    const jsonMatch = raw.match(/```(?:json)?\s*([\s\S]*?)```/);
+    const jsonStr = jsonMatch ? jsonMatch[1].trim() : raw.trim();
+    return JSON.parse(jsonStr);
+  } catch {
+    return { raw_response: raw };
+  }
+}
+
+module.exports = { callLLM, callLLMJson };
diff --git a/Plugin/PaperReader/lib/mineru-client.js b/Plugin/PaperReader/lib/mineru-client.js
new file mode 100644
index 00000000..f0ceb760
--- /dev/null
+++ b/Plugin/PaperReader/lib/mineru-client.js
@@ -0,0 +1,213 @@
+/**
+ * MinerU Cloud API 适配器 (T1)
+ * 
+ * 流程: 获取上传URL → PUT上传PDF → 轮询batch结果 → 下载zip → 提取md+figures
+ */
+
+const fs = require('fs').promises;
+const path = require('path');
+const axios = require('axios');
+
+const MINERU_API_BASE = 'https://mineru.net/api/v4';
+
+class MineruError extends Error {
+  constructor(code, message) {
+    super(message);
+    this.name = 'MineruError';
+    this.code = code;
+  }
+}
+
+/**
+ * 获取预签名上传URL
+ */
+async function getUploadUrl(token, fileName) {
+  const resp = await axios.post(`${MINERU_API_BASE}/file-urls/batch`, {
+    files: [{ name: fileName, data_id: `pr_${Date.now()}` }],
+    model_version: 'hybrid-auto-engine'
+  }, {
+    headers: {
+      'Content-Type': 'application/json',
+      'Authorization': `Bearer ${token}`
+    },
+    timeout: 30000
+  });
+
+  const data = resp.data;
+  if (data.code !== 0) {
+    throw new MineruError('MINERU_AUTH_FAILED', `MinerU API error: code=${data.code}, msg=${data.msg || ''}`);
+  }
+
+  return {
+    uploadUrl: data.data.file_urls[0],
+    batchId: data.data.batch_id
+  };
+}
+
+/**
+ * PUT 上传文件到预签名URL
+ */
+async function uploadFile(uploadUrl, filePath) {
+  const fileBuffer = await fs.readFile(filePath);
+  await axios.put(uploadUrl, fileBuffer, {
+    headers: { 'Content-Type': 'application/octet-stream' },
+    timeout: 120000,
+    maxContentLength: 200 * 1024 * 1024
+  });
+}
+
+/**
+ * 轮询batch结果
+ */
+async function pollBatchResult(token, batchId, { timeout = 300000, pollInterval = 5000 } = {}) {
+  const startTime = Date.now();
+  const url = `${MINERU_API_BASE}/extract-results/batch/${batchId}`;
+
+  while (Date.now() - startTime < timeout) {
+    const resp = await axios.get(url, {
+      headers: { 'Authorization': `Bearer ${token}` },
+      timeout: 15000
+    });
+
+    const data = resp.data;
+    if (data.code !== 0) {
+      throw new MineruError('MINERU_PARSE_FAILED', `Batch poll failed: code=${data.code}`);
+    }
+
+    const results = data.data?.extract_result || [];
+    if (results.length > 0) {
+      const first = results[0];
+      if (first.state === 'done') {
+        return first;
+      }
+      if (first.state === 'failed') {
+        throw new MineruError('MINERU_PARSE_FAILED', `Batch task failed: ${first.err_msg || 'unknown'}`);
+      }
+    }
+
+    await new Promise(r => setTimeout(r, pollInterval));
+  }
+
+  throw new MineruError('MINERU_TIMEOUT', `Batch polling timeout after ${timeout}ms`);
+}
+
+/**
+ * 下载并解压结果zip，提取markdown和图片
+ */
+async function downloadAndExtract(zipUrl, outputDir) {
+  const AdmZip = require('adm-zip');
+
+  const resp = await axios.get(zipUrl, {
+    responseType: 'arraybuffer',
+    timeout: 120000
+  });
+
+  const zip = new AdmZip(resp.data);
+  const entries = zip.getEntries();
+
+  let markdown = '';
+  const figures = [];
+
+  const figuresDir = path.join(outputDir, 'assets', 'figures');
+  await fs.mkdir(figuresDir, { recursive: true });
+
+  for (const entry of entries) {
+    const entryName = entry.entryName;
+
+    if (entryName.endsWith('.md') && !entry.isDirectory) {
+      markdown = entry.getData().toString('utf-8');
+    } else if (/\.(png|jpg|jpeg|gif|svg|webp)$/i.test(entryName) && !entry.isDirectory) {
+      const figName = path.basename(entryName);
+      const figPath = path.join(figuresDir, figName);
+      await fs.writeFile(figPath, entry.getData());
+      figures.push({
+        id: figName.replace(/\.[^.]+$/, ''),
+        path: `assets/figures/${figName}`,
+        filename: figName
+      });
+    }
+  }
+
+  return { markdown, figures };
+}
+
+/**
+ * 从markdown中提取figure caption映射
+ */
+function extractFigureCaptions(markdown) {
+  const captions = [];
+  // 匹配 ![caption](path) 模式
+  const imgRegex = /!\[([^\]]*)\]\(([^)]+)\)/g;
+  let match;
+  while ((match = imgRegex.exec(markdown)) !== null) {
+    captions.push({
+      caption: match[1],
+      originalPath: match[2],
+      id: path.basename(match[2]).replace(/\.[^.]+$/, '')
+    });
+  }
+
+  // 匹配 "Figure X." 或 "Fig. X:" 开头的段落
+  const figTextRegex = /^(Fig(?:ure)?\.?\s*\d+[.:]\s*)(.+)$/gm;
+  while ((match = figTextRegex.exec(markdown)) !== null) {
+    captions.push({
+      caption: match[2].trim(),
+      label: match[1].trim(),
+      id: `fig_text_${captions.length}`
+    });
+  }
+
+  return captions;
+}
+
+/**
+ * 完整流程：上传 PDF → 提交解析 → 轮询 → 返回结果
+ * 
+ * @param {string} pdfPath - PDF 绝对路径
+ * @param {object} options - { token, timeout, pollInterval, outputDir }
+ * @returns {Promise<{ markdown: string, figures: Array, pageCount: number, figureMap: Array }>}
+ */
+async function parsePdf(pdfPath, options = {}) {
+  const token = options.token || process.env.MINERU_API_TOKEN;
+  if (!token) {
+    throw new MineruError('MINERU_AUTH_FAILED', 'MINERU_API_TOKEN is required');
+  }
+
+  const timeout = options.timeout || parseInt(process.env.MINERU_API_TIMEOUT || '300000', 10);
+  const pollInterval = options.pollInterval || parseInt(process.env.MINERU_POLL_INTERVAL || '5000', 10);
+
+  const fileName = path.basename(pdfPath);
+  const outputDir = options.outputDir || path.dirname(pdfPath);
+
+  // Step 1: 获取上传URL
+  const { uploadUrl, batchId } = await getUploadUrl(token, fileName);
+
+  // Step 2: 上传文件
+  await uploadFile(uploadUrl, pdfPath);
+
+  // Step 3: 轮询batch结果 (file-urls/batch 自动创建解析任务)
+  const batchResult = await pollBatchResult(token, batchId, { timeout, pollInterval });
+
+  // Step 4: 下载并解压结果
+  const zipUrl = batchResult.full_zip_url;
+  if (!zipUrl) {
+    throw new MineruError('MINERU_PARSE_FAILED', 'No zip URL in result');
+  }
+
+  const { markdown, figures } = await downloadAndExtract(zipUrl, outputDir);
+
+  // Step 5: 提取figure captions
+  const figureMap = extractFigureCaptions(markdown);
+
+  return {
+    markdown,
+    figures,
+    pageCount: batchResult.page_count || null,
+    figureMap
+  };
+}
+
+module.exports = {
+  parsePdf,
+  MineruError
+};
diff --git a/Plugin/PaperReader/lib/pdf-parse-fallback.js b/Plugin/PaperReader/lib/pdf-parse-fallback.js
new file mode 100644
index 00000000..3edb96ab
--- /dev/null
+++ b/Plugin/PaperReader/lib/pdf-parse-fallback.js
@@ -0,0 +1,35 @@
+/**
+ * pdf-parse 降级回退封装 (T2)
+ * 
+ * 当 MinerU API 不可用时，回退到本地 pdf-parse 纯文本抽取。
+ * 输出格式与 mineru-client.js 对齐，但 figures 为空，markdown 为纯文本。
+ */
+
+const fs = require('fs').promises;
+const pdfParse = require('pdf-parse');
+
+/**
+ * 使用 pdf-parse 做纯文本抽取（降级模式）
+ * 
+ * @param {string} pdfPath - PDF 绝对路径
+ * @returns {Promise<{ markdown: string, figures: [], pageCount: number, figureMap: [], degraded: true }>}
+ */
+async function parsePdf(pdfPath) {
+  const buffer = await fs.readFile(pdfPath);
+  const parsed = await pdfParse(buffer);
+
+  const rawText = parsed.text || '';
+  const markdown = rawText
+    .replace(/\r\n/g, '\n')
+    .replace(/\n{3,}/g, '\n\n');
+
+  return {
+    markdown,
+    figures: [],
+    pageCount: parsed.numpages || null,
+    figureMap: [],
+    degraded: true
+  };
+}
+
+module.exports = { parsePdf };
diff --git a/Plugin/PaperReader/lib/query.js b/Plugin/PaperReader/lib/query.js
new file mode 100644
index 00000000..16b21762
--- /dev/null
+++ b/Plugin/PaperReader/lib/query.js
@@ -0,0 +1,108 @@
+/**
+ * Query 问答模块 (T7)
+ * 
+ * Phase 1: 关键词匹配挑选相关 chunk + LLM 问答
+ * Phase 2: 升级为向量检索
+ */
+
+const fs = require('fs').promises;
+const fsSync = require('fs');
+const path = require('path');
+const { callLLM } = require('./llm');
+
+const WORKSPACE_ROOT = path.join(__dirname, '..', 'workspace');
+
+/**
+ * 关键词匹配挑选相关 chunk
+ */
+function keywordPick(chunks, question, topK = 6) {
+  const q = String(question || '').toLowerCase().trim();
+  if (!q) return chunks.slice(0, topK);
+
+  const words = q.split(/[\s,;，；。？！?!]+/).filter(w => w.length >= 2).slice(0, 15);
+
+  const scored = chunks.map(c => {
+    const text = (c.text || '').toLowerCase();
+    const section = (c.section || '').toLowerCase();
+    let score = 0;
+    for (const w of words) {
+      if (text.includes(w)) score += 1;
+      if (section.includes(w)) score += 2;
+    }
+    return { chunk: c, score };
+  });
+
+  scored.sort((a, b) => b.score - a.score);
+  return scored.slice(0, topK).filter(s => s.score > 0).map(s => s.chunk);
+}
+
+/**
+ * 对已导入的文档做检索式问答
+ * 
+ * @param {string} paperId
+ * @param {string} question
+ * @returns {Promise<{ paperId, answer, sources: Array }>}
+ */
+async function queryPaper(paperId, question) {
+  if (!paperId) throw new Error('Query requires paperId');
+  if (!question) throw new Error('Query requires question');
+
+  const wsDir = path.join(WORKSPACE_ROOT, paperId);
+  const manifestPath = path.join(wsDir, 'chunks', 'manifest.json');
+
+  if (!fsSync.existsSync(manifestPath)) {
+    throw new Error(`chunks/manifest.json not found: ${manifestPath}`);
+  }
+
+  const manifest = JSON.parse(await fs.readFile(manifestPath, 'utf-8'));
+  const chunks = manifest.chunks || [];
+
+  // Load Global Map if exists
+  const globalMapPath = path.join(wsDir, 'reading_notes', 'Global_Map.md');
+  const globalMap = fsSync.existsSync(globalMapPath)
+    ? await fs.readFile(globalMapPath, 'utf-8')
+    : '';
+
+  // Pick relevant chunks
+  const picked = keywordPick(chunks, question, 6);
+  const contextChunks = picked.length > 0 ? picked : chunks.slice(0, 4);
+
+  // Read chunk files for full content
+  const contextParts = [];
+  for (const c of contextChunks) {
+    const chunkPath = path.join(wsDir, 'chunks', `chunk_${c.index}.md`);
+    let text;
+    if (fsSync.existsSync(chunkPath)) {
+      text = await fs.readFile(chunkPath, 'utf-8');
+    } else {
+      text = c.text || '';
+    }
+    contextParts.push(`---\n[chunk ${c.index} | 章节: ${c.section || 'unknown'}]\n${text}`);
+  }
+  const context = contextParts.join('\n\n');
+
+  const system = [
+    '你是一个"文档问答助手"，适用于各类长文档（学术论文、技术报告、书籍、法律文书等）。',
+    '只根据提供的上下文回答；若上下文不足，明确说"证据不足"，并给出下一步需要检索的章节/关键词。',
+    '输出：先给结论，再给证据引用（标注 chunk index 和章节名）。'
+  ].join('\n');
+
+  const user = [
+    globalMap ? `全局地图：\n${globalMap.slice(0, 2000)}` : '',
+    `问题：${question}`,
+    `上下文：\n${context}`
+  ].filter(Boolean).join('\n\n');
+
+  const answer = await callLLM([
+    { role: 'system', content: system },
+    { role: 'user', content: user }
+  ], { temperature: 0.2 });
+
+  return {
+    paperId,
+    answer,
+    sources: contextChunks.map(c => ({ index: c.index, section: c.section }))
+  };
+}
+
+module.exports = { queryPaper };
diff --git a/Plugin/PaperReader/lib/skeleton.js b/Plugin/PaperReader/lib/skeleton.js
new file mode 100644
index 00000000..dd200cf3
--- /dev/null
+++ b/Plugin/PaperReader/lib/skeleton.js
@@ -0,0 +1,161 @@
+/**
+ * Skeleton 骨架提取重构 (T5)
+ * 
+ * 从 Markdown 结构提取目录树、Abstract、Conclusion、Figure Caption，
+ * 生成 Global Map。不再只读首尾2块。
+ */
+
+const fs = require('fs').promises;
+const fsSync = require('fs');
+const path = require('path');
+const { callLLM } = require('./llm');
+const { extractSections } = require('./chunker');
+
+const WORKSPACE_ROOT = path.join(__dirname, '..', 'workspace');
+
+/**
+ * 从 Markdown 提取目录树（标题列表）
+ */
+function extractTOC(markdown) {
+  const lines = markdown.split('\n');
+  const toc = [];
+  for (const line of lines) {
+    const match = line.match(/^(#{1,4})\s+(.+)$/);
+    if (match) {
+      toc.push({
+        level: match[1].length,
+        title: match[2].trim(),
+        indent: '  '.repeat(match[1].length - 1)
+      });
+    }
+  }
+  return toc;
+}
+
+/**
+ * 提取关键章节全文
+ */
+function extractKeySections(sections) {
+  const keyPatterns = [
+    /abstract/i,
+    /introduction/i,
+    /conclusion/i,
+    /discussion/i,
+    /summary/i,
+    /overview/i,
+    /background/i,
+    /preface/i,
+    /executive.?summary/i,
+    /摘要/,
+    /引言/,
+    /结论/,
+    /讨论/,
+    /概述/,
+    /背景/,
+    /前言/,
+    /总结/
+  ];
+
+  const found = [];
+  for (const section of sections) {
+    for (const pattern of keyPatterns) {
+      if (pattern.test(section.title)) {
+        found.push(section);
+        break;
+      }
+    }
+  }
+  return found;
+}
+
+/**
+ * 从 figure_map.json 加载 Figure Captions
+ */
+async function loadFigureCaptions(wsDir) {
+  const figMapPath = path.join(wsDir, 'figure_map.json');
+  if (!fsSync.existsSync(figMapPath)) return [];
+  const raw = await fs.readFile(figMapPath, 'utf-8');
+  try {
+    return JSON.parse(raw);
+  } catch {
+    return [];
+  }
+}
+
+/**
+ * 从 Markdown 结构提取骨架并生成 Global Map
+ * 
+ * @param {string} paperId
+ * @param {object} options - { focus }
+ * @returns {Promise<{ globalMapPath: string, globalMapContent: string }>}
+ */
+async function generateSkeleton(paperId, options = {}) {
+  const wsDir = path.join(WORKSPACE_ROOT, paperId);
+  const mdPath = path.join(wsDir, 'full_text.md');
+  const metaPath = path.join(wsDir, 'meta.json');
+
+  if (!fsSync.existsSync(mdPath)) {
+    throw new Error(`full_text.md not found: ${mdPath}`);
+  }
+
+  const markdown = await fs.readFile(mdPath, 'utf-8');
+  const meta = fsSync.existsSync(metaPath)
+    ? JSON.parse(await fs.readFile(metaPath, 'utf-8'))
+    : {};
+
+  // 1. 提取目录树
+  const toc = extractTOC(markdown);
+  const tocText = toc.map(t => `${t.indent}- ${t.title}`).join('\n');
+
+  // 2. 提取关键章节
+  const sections = extractSections(markdown);
+  const keySections = extractKeySections(sections);
+  const keyText = keySections
+    .map(s => `### ${s.title}\n${s.content.slice(0, 3000)}`)
+    .join('\n\n');
+
+  // 3. 加载 Figure Captions
+  const figureCaptions = await loadFigureCaptions(wsDir);
+  const captionText = figureCaptions.length > 0
+    ? figureCaptions.map(f => `- ${f.label || f.id}: ${f.caption}`).join('\n')
+    : '(无图注信息)';
+
+  // 4. 构建 LLM prompt
+  const system = [
+    '你是一个"文档骨架提取器"，适用于各类长文档（学术论文、技术报告、书籍章节、法律文书等）。',
+    '目标：基于目录结构、关键章节和图注，提取文档的全局地图（Global Map）。',
+    '输出 Markdown，根据文档类型自适应包含以下要素：',
+    '1. 核心主题（1-2句话概括本文档的核心内容）',
+    '2. 核心问题/目的（本文档要解决什么问题或传达什么信息）',
+    '3. 关键内容概要（主要论点、方法、流程、条款等——依文档类型而定）',
+    '4. 结构路线图（文档的组织逻辑和各部分之间的关系）',
+    '5. 主要结论/要点',
+    '6. 局限性/注意事项/风险点',
+    '7. 各章节阅读优先级标签（High/Medium/Low）',
+    '8. 后续深读建议（重点关注哪些章节/图表/附录）',
+    '引用原文短句时标注来自哪个章节。'
+  ].join('\n');
+
+  const user = [
+    `阅读焦点：${options.focus || '通用理解（全面掌握文档核心内容与结构）'}`,
+    `元信息：页数=${meta.pageCount ?? 'unknown'}`,
+    `\n【目录结构】\n${tocText}`,
+    `\n【关键章节内容】\n${keyText.slice(0, 15000)}`,
+    `\n【图注列表】\n${captionText}`
+  ].join('\n\n');
+
+  const content = await callLLM([
+    { role: 'system', content: system },
+    { role: 'user', content: user }
+  ]);
+
+  // 5. 写入 Global_Map.md
+  const notesDir = path.join(wsDir, 'reading_notes');
+  await fs.mkdir(notesDir, { recursive: true });
+  const outPath = path.join(notesDir, 'Global_Map.md');
+  await fs.writeFile(outPath, content || '', 'utf-8');
+
+  return { globalMapPath: outPath, globalMapContent: content };
+}
+
+module.exports = { generateSkeleton, extractTOC, extractKeySections };
diff --git a/Plugin/PaperReader/plugin-manifest.json b/Plugin/PaperReader/plugin-manifest.json
new file mode 100644
index 00000000..580fa8e6
--- /dev/null
+++ b/Plugin/PaperReader/plugin-manifest.json
@@ -0,0 +1,75 @@
+{
+  "manifestVersion": "1.0.0",
+  "name": "PaperReader",
+  "version": "0.2.0",
+  "displayName": "超文本递归阅读器",
+  "description": "将超长 PDF/文档转为可递归阅读的分块工件（Skeleton/Deep/Query）。适用于学术论文、技术报告、书籍章节、法律文书等各类长文档。v0.2: MinerU 云端高保真解析（公式/表格/图片/多栏/扫描OCR）+ 章节感知切分 + Rolling Context 深度阅读。MinerU 不可用时自动降级到 pdf-parse。",
+  "author": "VCP",
+  "pluginType": "synchronous",
+  "entryPoint": {
+    "type": "nodejs",
+    "command": "node PaperReader.js"
+  },
+  "communication": {
+    "protocol": "stdio",
+    "timeout": 600000
+  },
+  "configSchema": {
+    "MINERU_API_TOKEN": {
+      "type": "string",
+      "description": "MinerU 云端 API Token（从 mineru.net 获取）。不填则自动降级到 pdf-parse。"
+    },
+    "MINERU_API_TIMEOUT": {
+      "type": "integer",
+      "description": "MinerU 轮询超时（ms），默认 300000（5分钟）。"
+    },
+    "MINERU_POLL_INTERVAL": {
+      "type": "integer",
+      "description": "MinerU 轮询间隔（ms），默认 5000。"
+    },
+    "PaperReaderChunkSize": {
+      "type": "integer",
+      "description": "目标 chunk 大小（tokens），默认 2000。"
+    },
+    "PaperReaderOverlap": {
+      "type": "number",
+      "description": "chunk 重叠比例，默认 0.15。"
+    },
+    "PaperReaderModel": {
+      "type": "string",
+      "description": "用于阅读/总结的模型名称。"
+    },
+    "PaperReaderMaxOutputTokens": {
+      "type": "integer",
+      "description": "单次模型输出上限，默认 12000。"
+    },
+    "PaperReaderBatchSize": {
+      "type": "integer",
+      "description": "Deep 阅读分批处理的 chunk 数，默认 4。"
+    },
+    "PaperReaderMaxChunks": {
+      "type": "integer",
+      "description": "单次 deep 阅读最多处理的 chunk 数（防止成本失控），默认 120。"
+    }
+  },
+  "capabilities": {
+    "invocationCommands": [
+      {
+        "commandIdentifier": "IngestPDF",
+        "description": "解析 PDF 并生成可递归阅读的分块工件。支持学术论文、技术报告、书籍、法律文书等各类 PDF。使用 MinerU 云端 API 高保真解析（保留公式/表格/图片/多栏排版），失败自动降级到 pdf-parse。输出章节感知的 chunk 文件。\n参数:\n- command: 固定为 IngestPDF\n- filePath (字符串, 必需): PDF 绝对路径\n- paperId (字符串, 可选): 自定义 ID（不传则自动生成）\n调用格式:\n<<<[TOOL_REQUEST]>>>\ntool_name:「始」PaperReader「末」,\ncommand:「始」IngestPDF「末」,\nfilePath:「始」D:\\\\books\\\\paper.pdf「末」\n<<<[END_TOOL_REQUEST]>>>"
+      },
+      {
+        "commandIdentifier": "ReadSkeleton",
+        "description": "基于已导入的文档工件生成骨架地图（Global Map）。从 Markdown 结构提取目录树、关键章节和图注，生成带阅读优先级标签的全局地图。适用于任何已 Ingest 的文档。\n参数:\n- command: 固定为 ReadSkeleton\n- paperId (字符串, 必需)\n- focus (字符串, 可选): 本次阅读关注点\n调用格式:\n<<<[TOOL_REQUEST]>>>\ntool_name:「始」PaperReader「末」,\ncommand:「始」ReadSkeleton「末」,\npaperId:「始」paper-xxx「末」,\nfocus:「始」方法学与实验设计「末」\n<<<[END_TOOL_REQUEST]>>>"
+      },
+      {
+        "commandIdentifier": "ReadDeep",
+        "description": "带 Rolling Context 的深度阅读：逐 chunk 摘要时携带前序累积的关键事实，保持 chunk 间连贯性。超出上限自动压缩。最终合并为 Round-1 深度笔记。\n参数:\n- command: 固定为 ReadDeep\n- paperId (字符串, 必需)\n- goal (字符串, 可选): 主任务目标（用于决定摘要粒度）\n调用格式:\n<<<[TOOL_REQUEST]>>>\ntool_name:「始」PaperReader「末」,\ncommand:「始」ReadDeep「末」,\npaperId:「始」paper-xxx「末」,\ngoal:「始」快速理解核心贡献与可复现实验步骤「末」\n<<<[END_TOOL_REQUEST]>>>"
+      },
+      {
+        "commandIdentifier": "Query",
+        "description": "对已导入的文档做检索式问答（关键词匹配 + 章节权重挑选相关 chunk）。返回答案及引用来源。\n参数:\n- command: 固定为 Query\n- paperId (字符串, 必需)\n- question (字符串, 必需)\n调用格式:\n<<<[TOOL_REQUEST]>>>\ntool_name:「始」PaperReader「末」,\ncommand:「始」Query「末」,\npaperId:「始」paper-xxx「末」,\nquestion:「始」这份文档的核心结论是什么？「末」\n<<<[END_TOOL_REQUEST]>>>"
+      }
+    ]
+  }
+}

From 9fdeda2ee2823fbe836403a73a45088ee2ad44c9 Mon Sep 17 00:00:00 2001
From: rongfeng <sikjmyhre@gmail.com>
Date: Sun, 8 Feb 2026 02:51:04 +0800
Subject: [PATCH 2/6] fix: correct MinerU API model_version and upload headers
 per official docs

- model_version: use 'pipeline' (default) or 'vlm', not 'hybrid-auto-engine'
- Remove Content-Type header from PUT upload (per MinerU docs)
- Add MINERU_MODEL_VERSION config option
- Clarify config.env.example: Bearer Token, not Access Key/Secret Key
---
 Plugin/PaperReader/config.env.example   |  6 +++++-
 Plugin/PaperReader/lib/mineru-client.js | 13 ++++++++-----
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/Plugin/PaperReader/config.env.example b/Plugin/PaperReader/config.env.example
index fbb51b8a..92ffb42f 100644
--- a/Plugin/PaperReader/config.env.example
+++ b/Plugin/PaperReader/config.env.example
@@ -1,8 +1,12 @@
 # PaperReader 插件配置（示例）
 
 # === L0 解析层 ===
-# MinerU 云端 API Token（从 mineru.net 获取）
+# MinerU 云端 API Token（从 mineru.net 网站的「个人中心 → API密钥管理」获取）
+# 注意：这里填的是 Bearer Token（一串长字符串），不是 Access Key / Secret Key
+# 不填则自动降级到 pdf-parse 纯文本模式
 MINERU_API_TOKEN=
+# MinerU 模型版本：pipeline（默认，速度快）或 vlm（效果更好，速度较慢）
+MINERU_MODEL_VERSION=pipeline
 # 轮询超时（ms），默认 5 分钟
 MINERU_API_TIMEOUT=300000
 # 轮询间隔（ms），默认 5 秒
diff --git a/Plugin/PaperReader/lib/mineru-client.js b/Plugin/PaperReader/lib/mineru-client.js
index f0ceb760..f197a44b 100644
--- a/Plugin/PaperReader/lib/mineru-client.js
+++ b/Plugin/PaperReader/lib/mineru-client.js
@@ -21,10 +21,12 @@ class MineruError extends Error {
 /**
  * 获取预签名上传URL
  */
-async function getUploadUrl(token, fileName) {
+async function getUploadUrl(token, fileName, modelVersion) {
   const resp = await axios.post(`${MINERU_API_BASE}/file-urls/batch`, {
     files: [{ name: fileName, data_id: `pr_${Date.now()}` }],
-    model_version: 'hybrid-auto-engine'
+    enable_formula: true,
+    enable_table: true,
+    model_version: modelVersion
   }, {
     headers: {
       'Content-Type': 'application/json',
@@ -49,8 +51,8 @@ async function getUploadUrl(token, fileName) {
  */
 async function uploadFile(uploadUrl, filePath) {
   const fileBuffer = await fs.readFile(filePath);
+  // MinerU 文档明确说明：上传文件时无须设置 Content-Type 请求头
   await axios.put(uploadUrl, fileBuffer, {
-    headers: { 'Content-Type': 'application/octet-stream' },
     timeout: 120000,
     maxContentLength: 200 * 1024 * 1024
   });
@@ -164,7 +166,7 @@ function extractFigureCaptions(markdown) {
  * 完整流程：上传 PDF → 提交解析 → 轮询 → 返回结果
  * 
  * @param {string} pdfPath - PDF 绝对路径
- * @param {object} options - { token, timeout, pollInterval, outputDir }
+ * @param {object} options - { token, timeout, pollInterval, outputDir, modelVersion }
  * @returns {Promise<{ markdown: string, figures: Array, pageCount: number, figureMap: Array }>}
  */
 async function parsePdf(pdfPath, options = {}) {
@@ -175,12 +177,13 @@ async function parsePdf(pdfPath, options = {}) {
 
   const timeout = options.timeout || parseInt(process.env.MINERU_API_TIMEOUT || '300000', 10);
   const pollInterval = options.pollInterval || parseInt(process.env.MINERU_POLL_INTERVAL || '5000', 10);
+  const modelVersion = options.modelVersion || process.env.MINERU_MODEL_VERSION || 'pipeline';
 
   const fileName = path.basename(pdfPath);
   const outputDir = options.outputDir || path.dirname(pdfPath);
 
   // Step 1: 获取上传URL
-  const { uploadUrl, batchId } = await getUploadUrl(token, fileName);
+  const { uploadUrl, batchId } = await getUploadUrl(token, fileName, modelVersion);
 
   // Step 2: 上传文件
   await uploadFile(uploadUrl, pdfPath);

From 499bd1a3ade7c50ed9a8af91ad0c8293b6e0dc18 Mon Sep 17 00:00:00 2001
From: rongfeng <sikjmyhre@gmail.com>
Date: Sun, 8 Feb 2026 03:53:26 +0800
Subject: [PATCH 3/6] fix: MinerU OSS upload 403 + pdf-parse v2 API
 compatibility

- mineru-client.js: replace axios.put with native https.request for OSS
  presigned URL upload (axios auto-adds headers that break signature)
- pdf-parse-fallback.js: rewrite for pdf-parse v2 API
  (PDFParse class + Uint8Array + getText/destroy)
---
 Plugin/PaperReader/lib/mineru-client.js      | 35 ++++++++++++++++++--
 Plugin/PaperReader/lib/pdf-parse-fallback.js | 28 ++++++++++++----
 2 files changed, 54 insertions(+), 9 deletions(-)

diff --git a/Plugin/PaperReader/lib/mineru-client.js b/Plugin/PaperReader/lib/mineru-client.js
index f197a44b..56ba0183 100644
--- a/Plugin/PaperReader/lib/mineru-client.js
+++ b/Plugin/PaperReader/lib/mineru-client.js
@@ -52,9 +52,38 @@ async function getUploadUrl(token, fileName, modelVersion) {
 async function uploadFile(uploadUrl, filePath) {
   const fileBuffer = await fs.readFile(filePath);
   // MinerU 文档明确说明：上传文件时无须设置 Content-Type 请求头
-  await axios.put(uploadUrl, fileBuffer, {
-    timeout: 120000,
-    maxContentLength: 200 * 1024 * 1024
+  // axios 会自动添加 Content-Type/Accept 等头部，导致 OSS 预签名 URL 签名校验失败
+  // 改用 Node 原生 https 模块，只发送 Content-Length，完全匹配 Python requests.put(url, data=f) 的行为
+  const { URL } = require('url');
+  const https = require('https');
+  const parsedUrl = new URL(uploadUrl);
+
+  await new Promise((resolve, reject) => {
+    const req = https.request({
+      hostname: parsedUrl.hostname,
+      port: parsedUrl.port || 443,
+      path: parsedUrl.pathname + parsedUrl.search,
+      method: 'PUT',
+      headers: {
+        'Content-Length': fileBuffer.length
+      },
+      timeout: 120000
+    }, (res) => {
+      let body = '';
+      res.on('data', chunk => body += chunk);
+      res.on('end', () => {
+        if (res.statusCode >= 200 && res.statusCode < 300) {
+          resolve();
+        } else {
+          reject(new MineruError('MINERU_UPLOAD_FAILED',
+            `Upload failed: HTTP ${res.statusCode} - ${body.slice(0, 200)}`));
+        }
+      });
+    });
+    req.on('error', reject);
+    req.on('timeout', () => { req.destroy(); reject(new MineruError('MINERU_UPLOAD_FAILED', 'Upload timeout')); });
+    req.write(fileBuffer);
+    req.end();
   });
 }
 
diff --git a/Plugin/PaperReader/lib/pdf-parse-fallback.js b/Plugin/PaperReader/lib/pdf-parse-fallback.js
index 3edb96ab..006f96b7 100644
--- a/Plugin/PaperReader/lib/pdf-parse-fallback.js
+++ b/Plugin/PaperReader/lib/pdf-parse-fallback.js
@@ -1,24 +1,40 @@
 /**
  * pdf-parse 降级回退封装 (T2)
- * 
+ *
  * 当 MinerU API 不可用时，回退到本地 pdf-parse 纯文本抽取。
  * 输出格式与 mineru-client.js 对齐，但 figures 为空，markdown 为纯文本。
+ *
+ * pdf-parse v2 API: new PDFParse({ data: Uint8Array }) → getText() → destroy()
  */
 
 const fs = require('fs').promises;
-const pdfParse = require('pdf-parse');
+const { PDFParse } = require('pdf-parse');
 
 /**
  * 使用 pdf-parse 做纯文本抽取（降级模式）
- * 
+ *
  * @param {string} pdfPath - PDF 绝对路径
  * @returns {Promise<{ markdown: string, figures: [], pageCount: number, figureMap: [], degraded: true }>}
  */
 async function parsePdf(pdfPath) {
   const buffer = await fs.readFile(pdfPath);
-  const parsed = await pdfParse(buffer);
+  // pdf-parse v2 要求 Uint8Array 而非 Buffer
+  const uint8 = new Uint8Array(buffer.buffer, buffer.byteOffset, buffer.byteLength);
+
+  const parser = new PDFParse({ data: uint8 });
+  let pageCount = null;
+  let rawText = '';
+
+  try {
+    const info = await parser.getInfo();
+    pageCount = info.total || null;
+
+    const textResult = await parser.getText();
+    rawText = textResult.text || '';
+  } finally {
+    await parser.destroy();
+  }
 
-  const rawText = parsed.text || '';
   const markdown = rawText
     .replace(/\r\n/g, '\n')
     .replace(/\n{3,}/g, '\n\n');
@@ -26,7 +42,7 @@ async function parsePdf(pdfPath) {
   return {
     markdown,
     figures: [],
-    pageCount: parsed.numpages || null,
+    pageCount,
     figureMap: [],
     degraded: true
   };

From 3614ecdcc49341c96a8f90d3e437938efab1f2c3 Mon Sep 17 00:00:00 2001
From: rongfeng <sikjmyhre@gmail.com>
Date: Sun, 8 Feb 2026 04:28:24 +0800
Subject: [PATCH 4/6] fix: return content to model + 429 retry with exponential
 backoff

- PaperReader.js: ReadSkeleton/ReadDeep now return actual text content
- llm.js: add 429 exponential backoff retry (5 attempts, 3s/6s/12s/24s)
- deep-reader.js: add 1.5s inter-chunk delay to prevent rate limiting
---
 Plugin/PaperReader/PaperReader.js     |  9 +++++++--
 Plugin/PaperReader/lib/deep-reader.js |  9 +++++++++
 Plugin/PaperReader/lib/llm.js         | 26 ++++++++++++++++++++------
 3 files changed, 36 insertions(+), 8 deletions(-)

diff --git a/Plugin/PaperReader/PaperReader.js b/Plugin/PaperReader/PaperReader.js
index 9b3b522d..6335fa21 100644
--- a/Plugin/PaperReader/PaperReader.js
+++ b/Plugin/PaperReader/PaperReader.js
@@ -122,12 +122,17 @@ async function handleIngestPDF({ filePath, paperId }) {
 async function handleReadSkeleton({ paperId, focus }) {
   if (!paperId) throw new Error('ReadSkeleton requires paperId');
   const result = await generateSkeleton(paperId, { focus });
-  return { paperId, globalMapPath: result.globalMapPath };
+  return { paperId, globalMapPath: result.globalMapPath, content: result.globalMapContent };
 }
 
 async function handleReadDeep({ paperId, goal }) {
   if (!paperId) throw new Error('ReadDeep requires paperId');
-  return await readDeep(paperId, { goal });
+  const result = await readDeep(paperId, { goal });
+  // Read the Round_1_Summary.md to return its content
+  const summaryContent = fsSync.existsSync(result.roundPath)
+    ? (await fs.readFile(result.roundPath, 'utf-8'))
+    : '';
+  return { ...result, content: summaryContent };
 }
 
 async function handleQuery({ paperId, question }) {
diff --git a/Plugin/PaperReader/lib/deep-reader.js b/Plugin/PaperReader/lib/deep-reader.js
index 05ab5890..ebd3fbde 100644
--- a/Plugin/PaperReader/lib/deep-reader.js
+++ b/Plugin/PaperReader/lib/deep-reader.js
@@ -15,6 +15,7 @@ const WORKSPACE_ROOT = path.join(__dirname, '..', 'workspace');
 const BATCH_SIZE = parseInt(process.env.PaperReaderBatchSize || '4', 10);
 const MAX_CHUNKS = parseInt(process.env.PaperReaderMaxChunks || '120', 10);
 const ROLLING_CONTEXT_MAX_TOKENS = 4000;
+const CHUNK_DELAY_MS = parseInt(process.env.PaperReaderChunkDelay || '1500', 10);
 
 /**
  * 压缩 Rolling Context（当超过上限时）
@@ -101,10 +102,13 @@ async function readDeep(paperId, options = {}) {
   let rollingContext = '';
 
   // Sequential processing with Rolling Context
+  // Process in small batches but maintain rolling context between batches
   for (let i = 0; i < limited.length; i += batchSize) {
     const batch = limited.slice(i, i + batchSize);
 
+    // Within a batch, process sequentially to maintain rolling context
     for (const chunk of batch) {
+      // Read chunk content
       const chunkPath = path.join(chunksDir, `chunk_${chunk.index}.md`);
       let chunkText;
       if (fsSync.existsSync(chunkPath)) {
@@ -113,6 +117,11 @@ async function readDeep(paperId, options = {}) {
         chunkText = chunk.text || '';
       }
 
+      // Delay between LLM calls to avoid 429 rate limiting (skip first chunk)
+      if (summaries.length > 0) {
+        await new Promise(r => setTimeout(r, CHUNK_DELAY_MS));
+      }
+
       const summary = await summarizeChunk(chunkText, {
         goal,
         globalMap,
diff --git a/Plugin/PaperReader/lib/llm.js b/Plugin/PaperReader/lib/llm.js
index abe09376..e253ed5b 100644
--- a/Plugin/PaperReader/lib/llm.js
+++ b/Plugin/PaperReader/lib/llm.js
@@ -42,12 +42,26 @@ async function callLLM(messages, { max_tokens = MAX_OUTPUT_TOKENS, temperature =
     temperature
   };
 
-  const resp = await axios.post(API_URL, payload, {
-    headers: { Authorization: `Bearer ${API_KEY}`, 'Content-Type': 'application/json' },
-    timeout: 180000
-  });
-
-  return resp?.data?.choices?.[0]?.message?.content || '';
+  const maxRetries = 5;
+  for (let attempt = 0; attempt < maxRetries; attempt++) {
+    try {
+      const resp = await axios.post(API_URL, payload, {
+        headers: { Authorization: `Bearer ${API_KEY}`, 'Content-Type': 'application/json' },
+        timeout: 180000
+      });
+      return resp?.data?.choices?.[0]?.message?.content || '';
+    } catch (err) {
+      const status = err?.response?.status;
+      if (status === 429 && attempt < maxRetries - 1) {
+        // Exponential backoff: 3s, 6s, 12s, 24s
+        const delay = 3000 * Math.pow(2, attempt);
+        process.stderr.write(`[PaperReader] 429 rate limit, retrying in ${delay / 1000}s (attempt ${attempt + 1}/${maxRetries})\n`);
+        await new Promise(r => setTimeout(r, delay));
+        continue;
+      }
+      throw err;
+    }
+  }
 }
 
 /**

From dce4176a850f648f6981fc4e62f775db96bb747b Mon Sep 17 00:00:00 2001
From: rongfeng <sikjmyhre@gmail.com>
Date: Sun, 8 Feb 2026 06:11:11 +0800
Subject: [PATCH 5/6] fix(PaperReader): fix LLM URL resolution, add concurrency
 & caching

- lib/llm.js: auto-resolve localhost URL to correct VCP port + path;
  add error classifier (rate_limit/auth/timeout/network); add traceTag logging
- lib/deep-reader.js: batch-internal concurrency via Promise.all;
  full cache (skip if Round_1_Summary.md exists);
  incremental cache (skip already-summarized chunks)
- PaperReader.js: IngestPDF cache (skip if manifest+meta exist);
  route logging; passthrough forceReparse/forceReread params
---
 Plugin/PaperReader/PaperReader.js     |  48 ++++++++++--
 Plugin/PaperReader/lib/deep-reader.js | 102 ++++++++++++++++++++------
 Plugin/PaperReader/lib/llm.js         |  99 ++++++++++++++++++++++---
 3 files changed, 206 insertions(+), 43 deletions(-)

diff --git a/Plugin/PaperReader/PaperReader.js b/Plugin/PaperReader/PaperReader.js
index 6335fa21..e6c9bb51 100644
--- a/Plugin/PaperReader/PaperReader.js
+++ b/Plugin/PaperReader/PaperReader.js
@@ -39,7 +39,7 @@ async function writeJson(filePath, obj) {
 
 // ─── Command Handlers ───
 
-async function handleIngestPDF({ filePath, paperId }) {
+async function handleIngestPDF({ filePath, paperId, forceReparse }) {
   if (!filePath || typeof filePath !== 'string') {
     throw new Error('IngestPDF requires filePath');
   }
@@ -54,6 +54,26 @@ async function handleIngestPDF({ filePath, paperId }) {
     : `paper-${sha1(abs).slice(0, 10)}`;
 
   const wsDir = getPaperWorkspace(resolvedPaperId);
+  const manifestPath = path.join(wsDir, 'chunks', 'manifest.json');
+  const metaPath = path.join(wsDir, 'meta.json');
+
+  // ── Cache check: if manifest + meta already exist, skip re-parsing ──
+  if (!forceReparse && fsSync.existsSync(manifestPath) && fsSync.existsSync(metaPath)) {
+    const existingMeta = JSON.parse(await fs.readFile(metaPath, 'utf-8'));
+    const existingManifest = JSON.parse(await fs.readFile(manifestPath, 'utf-8'));
+    process.stderr.write(`[PaperReader][Ingest] cache hit: paperId=${resolvedPaperId}, chunkCount=${existingManifest.chunkCount}, engine=${existingMeta.engine}\n`);
+    return {
+      paperId: resolvedPaperId,
+      workspace: wsDir,
+      pageCount: existingMeta.pageCount,
+      chunkCount: existingManifest.chunkCount,
+      engine: existingMeta.engine,
+      cached: true
+    };
+  }
+
+  process.stderr.write(`[PaperReader][Ingest] no cache, starting full parse: paperId=${resolvedPaperId}\n`);
+
   await fs.mkdir(wsDir, { recursive: true });
 
   // L0: 解析 PDF → Markdown + Figures
@@ -68,7 +88,7 @@ async function handleIngestPDF({ filePath, paperId }) {
     textLength: (parsed.markdown || '').length,
     engine: parsed.engine
   };
-  await writeJson(path.join(wsDir, 'meta.json'), meta);
+  await writeJson(metaPath, meta);
 
   // Save full markdown
   await fs.writeFile(path.join(wsDir, 'full_text.md'), parsed.markdown || '', 'utf-8');
@@ -105,7 +125,7 @@ async function handleIngestPDF({ filePath, paperId }) {
       tokenCount: c.tokenCount
     }))
   };
-  await writeJson(path.join(chunksDir, 'manifest.json'), manifest);
+  await writeJson(manifestPath, manifest);
 
   // Create reading_notes dir
   await fs.mkdir(path.join(wsDir, 'reading_notes'), { recursive: true });
@@ -115,7 +135,8 @@ async function handleIngestPDF({ filePath, paperId }) {
     workspace: wsDir,
     pageCount: meta.pageCount,
     chunkCount: chunks.length,
-    engine: parsed.engine
+    engine: parsed.engine,
+    cached: false
   };
 }
 
@@ -125,9 +146,13 @@ async function handleReadSkeleton({ paperId, focus }) {
   return { paperId, globalMapPath: result.globalMapPath, content: result.globalMapContent };
 }
 
-async function handleReadDeep({ paperId, goal }) {
+async function handleReadDeep({ paperId, goal, maxChunks, batchSize, forceReread }) {
   if (!paperId) throw new Error('ReadDeep requires paperId');
-  const result = await readDeep(paperId, { goal });
+  const opts = { goal };
+  if (maxChunks) opts.maxChunks = maxChunks;
+  if (batchSize) opts.batchSize = batchSize;
+  if (forceReread) opts.forceReread = true;
+  const result = await readDeep(paperId, opts);
   // Read the Round_1_Summary.md to return its content
   const summaryContent = fsSync.existsSync(result.roundPath)
     ? (await fs.readFile(result.roundPath, 'utf-8'))
@@ -149,21 +174,27 @@ async function main() {
   const request = JSON.parse(inputData || '{}');
   const command = request.command;
 
+  process.stderr.write(`[PaperReader][Main] request received: command=${command || 'undefined'}, paperId=${request.paperId || 'n/a'}\n`);
+
   try {
     if (!command) throw new Error('Missing command');
 
     let result;
     switch (command) {
       case 'IngestPDF':
-        result = await handleIngestPDF({ filePath: request.filePath, paperId: request.paperId });
+        process.stderr.write('[PaperReader][Main] route hit: IngestPDF\n');
+        result = await handleIngestPDF({ filePath: request.filePath, paperId: request.paperId, forceReparse: request.forceReparse });
         break;
       case 'ReadSkeleton':
+        process.stderr.write('[PaperReader][Main] route hit: ReadSkeleton\n');
         result = await handleReadSkeleton({ paperId: request.paperId, focus: request.focus });
         break;
       case 'ReadDeep':
-        result = await handleReadDeep({ paperId: request.paperId, goal: request.goal });
+        process.stderr.write('[PaperReader][Main] route hit: ReadDeep\n');
+        result = await handleReadDeep({ paperId: request.paperId, goal: request.goal, maxChunks: request.maxChunks, batchSize: request.batchSize, forceReread: request.forceReread });
         break;
       case 'Query':
+        process.stderr.write('[PaperReader][Main] route hit: Query\n');
         result = await handleQuery({ paperId: request.paperId, question: request.question });
         break;
       default:
@@ -172,6 +203,7 @@ async function main() {
 
     sendResponse({ status: 'success', result });
   } catch (err) {
+    process.stderr.write(`[PaperReader][Main] request failed: command=${command || 'undefined'}, error=${err?.message || String(err)}\n`);
     sendResponse({ status: 'error', error: err?.message || String(err) });
   }
 }
diff --git a/Plugin/PaperReader/lib/deep-reader.js b/Plugin/PaperReader/lib/deep-reader.js
index ebd3fbde..7b0c2bb5 100644
--- a/Plugin/PaperReader/lib/deep-reader.js
+++ b/Plugin/PaperReader/lib/deep-reader.js
@@ -56,7 +56,7 @@ async function summarizeChunk(chunkText, { goal, globalMap, rollingContext, chun
   const result = await callLLMJson([
     { role: 'system', content: system },
     { role: 'user', content: userParts.join('\n\n') }
-  ], { temperature: 0.1 });
+  ], { temperature: 0.1, traceTag: `DeepReader:chunk_${chunkIndex}` });
 
   // Normalize result
   return {
@@ -79,6 +79,18 @@ async function readDeep(paperId, options = {}) {
   const wsDir = path.join(WORKSPACE_ROOT, paperId);
   const chunksDir = path.join(wsDir, 'chunks');
   const manifestPath = path.join(chunksDir, 'manifest.json');
+  const notesDir = path.join(wsDir, 'reading_notes');
+  const summariesPath = path.join(notesDir, 'Chunk_Summaries.json');
+  const roundPath = path.join(notesDir, 'Round_1_Summary.md');
+
+  process.stderr.write(`[PaperReader][DeepReader] start: paperId=${paperId}, goal=${options.goal || '(default)'}\n`);
+
+  // ── Cache check: if Round_1_Summary.md already exists, return directly ──
+  if (!options.forceReread && fsSync.existsSync(roundPath) && fsSync.existsSync(summariesPath)) {
+    const existingSummaries = JSON.parse(await fs.readFile(summariesPath, 'utf-8'));
+    process.stderr.write(`[PaperReader][DeepReader] cache hit: Round_1_Summary.md exists (${existingSummaries.count} chunk summaries). Returning cached result.\n`);
+    return { paperId, summariesPath, roundPath, cached: true };
+  }
 
   if (!fsSync.existsSync(manifestPath)) {
     throw new Error(`chunks/manifest.json not found: ${manifestPath}`);
@@ -98,16 +110,50 @@ async function readDeep(paperId, options = {}) {
   const goal = options.goal || '';
 
   const limited = chunks.slice(0, maxChunks);
-  const summaries = [];
+  let summaries = [];
   let rollingContext = '';
 
-  // Sequential processing with Rolling Context
-  // Process in small batches but maintain rolling context between batches
+  // ── Incremental resume: load existing chunk summaries if available ──
+  const existingSummariesMap = new Map();
+  if (!options.forceReread && fsSync.existsSync(summariesPath)) {
+    try {
+      const existing = JSON.parse(await fs.readFile(summariesPath, 'utf-8'));
+      if (existing.summaries && Array.isArray(existing.summaries)) {
+        for (const s of existing.summaries) {
+          existingSummariesMap.set(s.chunkIndex, s);
+        }
+        process.stderr.write(`[PaperReader][DeepReader] found ${existingSummariesMap.size} cached chunk summaries, will skip those\n`);
+      }
+    } catch { /* ignore corrupt file */ }
+  }
+
+  process.stderr.write(`[PaperReader][DeepReader] config: totalChunks=${chunks.length}, processing=${limited.length}, batchSize=${batchSize}, chunkDelay=${CHUNK_DELAY_MS}ms\n`);
+
+  // Concurrent batch processing with Rolling Context
+  // Each batch shares the same rolling context snapshot, chunks within a batch run in parallel.
+  // After a batch completes, results are merged in order to update rolling context before next batch.
   for (let i = 0; i < limited.length; i += batchSize) {
     const batch = limited.slice(i, i + batchSize);
+    const batchNum = Math.floor(i / batchSize) + 1;
+    const totalBatches = Math.ceil(limited.length / batchSize);
+    process.stderr.write(`[PaperReader][DeepReader] batch ${batchNum}/${totalBatches} start (chunks ${i}-${Math.min(i + batchSize, limited.length) - 1}, concurrency=${batch.length})\n`);
+
+    // Delay between batches to avoid rate limiting (skip first batch)
+    if (i > 0) {
+      await new Promise(r => setTimeout(r, CHUNK_DELAY_MS));
+    }
+
+    // Snapshot rolling context for this batch — all chunks in the batch see the same context
+    const batchRollingContext = rollingContext;
+
+    // Launch all chunks in this batch concurrently (skip cached ones)
+    const batchPromises = batch.map(async (chunk) => {
+      // Check incremental cache
+      if (existingSummariesMap.has(chunk.index)) {
+        process.stderr.write(`[PaperReader][DeepReader] chunk ${chunk.index}/${limited.length - 1} (section: ${chunk.section || 'unknown'}) CACHED, skipping LLM\n`);
+        return existingSummariesMap.get(chunk.index);
+      }
 
-    // Within a batch, process sequentially to maintain rolling context
-    for (const chunk of batch) {
       // Read chunk content
       const chunkPath = path.join(chunksDir, `chunk_${chunk.index}.md`);
       let chunkText;
@@ -117,44 +163,51 @@ async function readDeep(paperId, options = {}) {
         chunkText = chunk.text || '';
       }
 
-      // Delay between LLM calls to avoid 429 rate limiting (skip first chunk)
-      if (summaries.length > 0) {
-        await new Promise(r => setTimeout(r, CHUNK_DELAY_MS));
-      }
+      process.stderr.write(`[PaperReader][DeepReader] chunk ${chunk.index}/${limited.length - 1} (section: ${chunk.section || 'unknown'}) summarizing...\n`);
 
       const summary = await summarizeChunk(chunkText, {
         goal,
         globalMap,
-        rollingContext,
+        rollingContext: batchRollingContext,
         chunkIndex: chunk.index,
         section: chunk.section || 'unknown'
       });
 
-      summaries.push({
+      return {
         chunkIndex: chunk.index,
         section: chunk.section,
         ...summary
-      });
+      };
+    });
 
-      // Update Rolling Context
-      const newFacts = summary.key_facts.join('; ');
+    // Wait for all chunks in this batch to complete
+    const batchResults = await Promise.all(batchPromises);
+
+    // Merge results in order
+    for (const result of batchResults) {
+      summaries.push(result);
+      process.stderr.write(`[PaperReader][DeepReader] chunk ${result.chunkIndex} done (${summaries.length}/${limited.length} completed)\n`);
+
+      // Update Rolling Context in order
+      const newFacts = result.key_facts.join('; ');
       if (newFacts) {
-        rollingContext += `\n[Chunk ${chunk.index} - ${chunk.section}]: ${newFacts}`;
+        rollingContext += `\n[Chunk ${result.chunkIndex} - ${result.section}]: ${newFacts}`;
       }
+    }
 
-      // Compress if exceeding limit
-      if (countTokens(rollingContext) > ROLLING_CONTEXT_MAX_TOKENS) {
-        rollingContext = await compressContext(rollingContext);
-      }
+    // Compress rolling context if exceeding limit (once per batch)
+    if (countTokens(rollingContext) > ROLLING_CONTEXT_MAX_TOKENS) {
+      process.stderr.write(`[PaperReader][DeepReader] rolling context exceeds ${ROLLING_CONTEXT_MAX_TOKENS} tokens, compressing...\n`);
+      rollingContext = await compressContext(rollingContext);
     }
   }
 
   // Save chunk summaries
-  const notesDir = path.join(wsDir, 'reading_notes');
   await fs.mkdir(notesDir, { recursive: true });
-  const summariesPath = path.join(notesDir, 'Chunk_Summaries.json');
   await fs.writeFile(summariesPath, JSON.stringify({ count: summaries.length, summaries }, null, 2), 'utf-8');
 
+  process.stderr.write(`[PaperReader][DeepReader] all ${summaries.length} chunks summarized, starting synthesis...\n`);
+
   // Synthesis: merge all summaries into Round_1_Summary.md
   const system = [
     '你是一个"长文档合并器"，适用于各类文档。',
@@ -172,11 +225,12 @@ async function readDeep(paperId, options = {}) {
   const merged = await callLLM([
     { role: 'system', content: system },
     { role: 'user', content: user }
-  ], { temperature: 0.2 });
+  ], { temperature: 0.2, traceTag: 'DeepReader:synthesis' });
 
-  const roundPath = path.join(notesDir, 'Round_1_Summary.md');
   await fs.writeFile(roundPath, merged || '', 'utf-8');
 
+  process.stderr.write(`[PaperReader][DeepReader] complete: summariesPath=${summariesPath}, roundPath=${roundPath}\n`);
+
   return { paperId, summariesPath, roundPath };
 }
 
diff --git a/Plugin/PaperReader/lib/llm.js b/Plugin/PaperReader/lib/llm.js
index e253ed5b..3bb0b745 100644
--- a/Plugin/PaperReader/lib/llm.js
+++ b/Plugin/PaperReader/lib/llm.js
@@ -1,6 +1,6 @@
 /**
  * LLM 调用封装 (T4)
- * 
+ *
  * 从 PaperReader.js 抽出，统一管理模型调用。
  */
 
@@ -10,28 +10,94 @@ const path = require('path');
 require('dotenv').config({ path: path.join(__dirname, '..', 'config.env') });
 require('dotenv').config({ path: path.join(__dirname, '..', '..', '..', 'config.env') });
 
-const API_KEY = process.env.API_Key;
-const API_URL = process.env.API_URL;
+const API_KEY = process.env.PaperReaderApiKey || process.env.Key || process.env.API_Key;
+const RAW_API_URL = process.env.PaperReaderApiUrl || process.env.API_URL;
+const VCP_PORT = process.env.PORT || '6005';
 const MODEL = process.env.PaperReaderModel;
 const MAX_OUTPUT_TOKENS = parseInt(process.env.PaperReaderMaxOutputTokens || '12000', 10);
 
+function resolveApiUrl() {
+  let url = RAW_API_URL;
+  if (!url) return null;
+
+  // If API_URL is just a base like http://127.0.0.1:3000, auto-fix to VCP port + path
+  // VCP serves its chat completions API on PORT (default 6005), not the admin panel port
+  if (url.match(/^https?:\/\/(?:127\.0\.0\.1|localhost)(?::\d+)?$/)) {
+    const base = url.replace(/:\d+$/, '');
+    url = `${base}:${VCP_PORT}/v1/chat/completions`;
+  }
+
+  // Append /v1/chat/completions if URL doesn't already end with a path
+  if (!url.includes('/v1/') && !url.includes('/chat/')) {
+    url = url.replace(/\/$/, '') + '/v1/chat/completions';
+  }
+
+  return url;
+}
+
+const API_URL = resolveApiUrl();
+
 function ensureConfig() {
   if (!API_KEY || !API_URL) {
-    throw new Error('Missing API config: API_Key/API_URL are required (from repo root config.env).');
+    throw new Error(
+      `Missing API config: API_Key=${API_KEY ? 'set' : 'MISSING'}, API_URL=${API_URL || 'MISSING'} (raw=${RAW_API_URL || 'MISSING'}). ` +
+      'Check repo root config.env and Plugin/PaperReader/config.env.'
+    );
   }
   if (!MODEL) {
     throw new Error('Missing PaperReaderModel in config.env');
   }
 }
 
+function classifyLlmError(err) {
+  const status = err?.response?.status;
+  const code = err?.code;
+
+  if (status === 429) {
+    return {
+      type: 'rate_limit',
+      message: 'LLM API 触发速率限制(429)。建议降低并发/增大 chunk 间隔后重试。'
+    };
+  }
+  if (status === 401 || status === 403) {
+    return {
+      type: 'auth',
+      message: 'LLM API 鉴权失败(401/403)。请检查 API_Key 与权限。'
+    };
+  }
+  if (code === 'ECONNABORTED') {
+    return {
+      type: 'timeout',
+      message: 'LLM API 请求超时(ECONNABORTED)。可提高超时或降低单次输入体积。'
+    };
+  }
+  if (status >= 500 && status <= 599) {
+    return {
+      type: 'upstream_5xx',
+      message: `LLM API 上游服务错误(${status})。建议稍后重试。`
+    };
+  }
+  if (code === 'ENOTFOUND' || code === 'ECONNREFUSED' || code === 'EAI_AGAIN') {
+    return {
+      type: 'network',
+      message: `LLM API 网络异常(${code})。请检查 API_URL 或网络连通性。`
+    };
+  }
+
+  return {
+    type: 'unknown',
+    message: `LLM API 未分类错误：${err?.message || 'unknown error'}`
+  };
+}
+
 /**
  * 调用 LLM (OpenAI-compatible API)
- * 
+ *
  * @param {Array<{role: string, content: string}>} messages
- * @param {object} options - { max_tokens, temperature }
+ * @param {object} options - { max_tokens, temperature, traceTag }
  * @returns {Promise<string>} 模型输出文本
  */
-async function callLLM(messages, { max_tokens = MAX_OUTPUT_TOKENS, temperature = 0.2 } = {}) {
+async function callLLM(messages, { max_tokens = MAX_OUTPUT_TOKENS, temperature = 0.2, traceTag = 'callLLM' } = {}) {
   ensureConfig();
 
   const payload = {
@@ -45,34 +111,45 @@ async function callLLM(messages, { max_tokens = MAX_OUTPUT_TOKENS, temperature =
   const maxRetries = 5;
   for (let attempt = 0; attempt < maxRetries; attempt++) {
     try {
+      process.stderr.write(`[PaperReader][LLM][${traceTag}] request start: model=${MODEL}, attempt=${attempt + 1}/${maxRetries}, max_tokens=${max_tokens}\n`);
       const resp = await axios.post(API_URL, payload, {
         headers: { Authorization: `Bearer ${API_KEY}`, 'Content-Type': 'application/json' },
         timeout: 180000
       });
+      process.stderr.write(`[PaperReader][LLM][${traceTag}] request success: attempt=${attempt + 1}/${maxRetries}\n`);
       return resp?.data?.choices?.[0]?.message?.content || '';
     } catch (err) {
       const status = err?.response?.status;
       if (status === 429 && attempt < maxRetries - 1) {
         // Exponential backoff: 3s, 6s, 12s, 24s
         const delay = 3000 * Math.pow(2, attempt);
-        process.stderr.write(`[PaperReader] 429 rate limit, retrying in ${delay / 1000}s (attempt ${attempt + 1}/${maxRetries})\n`);
+        process.stderr.write(`[PaperReader][LLM][${traceTag}] 429 rate limit, retrying in ${delay / 1000}s (attempt ${attempt + 1}/${maxRetries})\n`);
         await new Promise(r => setTimeout(r, delay));
         continue;
       }
-      throw err;
+
+      const classified = classifyLlmError(err);
+      process.stderr.write(
+        `[PaperReader][LLM][${traceTag}] request failed: type=${classified.type}, status=${status || 'n/a'}, code=${err?.code || 'n/a'}, message=${err?.message || 'n/a'}\n`
+      );
+      throw new Error(`${classified.message} [status=${status || 'n/a'} code=${err?.code || 'n/a'}]`);
     }
   }
 }
 
 /**
  * 调用 LLM 并解析 JSON 响应
- * 
+ *
  * @param {Array} messages
  * @param {object} options
  * @returns {Promise<object>} 解析后的 JSON 对象
  */
 async function callLLMJson(messages, options = {}) {
-  const raw = await callLLM(messages, { ...options, temperature: options.temperature ?? 0.1 });
+  const raw = await callLLM(messages, {
+    ...options,
+    temperature: options.temperature ?? 0.1,
+    traceTag: options.traceTag || 'callLLMJson'
+  });
   try {
     // 尝试从 markdown 代码块中提取 JSON
     const jsonMatch = raw.match(/```(?:json)?\s*([\s\S]*?)```/);

From f8cda116bccd5abd3ea8d107bff036de82413c57 Mon Sep 17 00:00:00 2001
From: rongfeng <sikjmyhre@gmail.com>
Date: Sun, 8 Feb 2026 20:56:04 +0800
Subject: [PATCH 6/6] =?UTF-8?q?feat(PaperReader):=20v0.4=20=E2=80=94=20?=
 =?UTF-8?q?=E7=BB=9F=E4=B8=80=E8=87=AA=E9=80=82=E5=BA=94=E9=98=85=E8=AF=BB?=
 =?UTF-8?q?=E5=BC=95=E6=93=8E?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- 新增 Read 命令: Survey→Triage→DeepDive/Skim→Audit→Synthesize
- 新增 Triage 分诊模块 (lib/triage.js): 注意力分配 + 拓扑排序
- 新增 Skim 模式 (lib/skim-reader.js): 轻量扫读 + upgrade 检测
- 新增 Auditor 审核员 (lib/auditor.js): 去污染独立审核 + PatchContext
- 新增 ReadingState 持久化 (lib/reading-state.js)
- Bug修复: Promise.allSettled 容错, 增量缓存 readMode 校验
- Bug修复: expandToChunkPlan skip 节点统计, O(n) 环检测
- 并发调优: BatchSize 2→5, MaxConcurrentLLM 3→5, ChunkDelay 1500→800ms
- 更新 README, config.env.example, plugin-manifest.json
---
 Plugin/PaperReader/config.env.example   |  12 ++-
 Plugin/PaperReader/lib/reading-state.js | 137 ++++++++++++++++++++++++
 Plugin/PaperReader/lib/skim-reader.js   |  52 +++++++++
 Plugin/PaperReader/plugin-manifest.json |  28 +++--
 4 files changed, 219 insertions(+), 10 deletions(-)
 create mode 100644 Plugin/PaperReader/lib/reading-state.js
 create mode 100644 Plugin/PaperReader/lib/skim-reader.js

diff --git a/Plugin/PaperReader/config.env.example b/Plugin/PaperReader/config.env.example
index 92ffb42f..600481df 100644
--- a/Plugin/PaperReader/config.env.example
+++ b/Plugin/PaperReader/config.env.example
@@ -23,7 +23,15 @@ PaperReaderOverlap=0.15
 PaperReaderModel=gemini-2.5-flash-search
 # 单次模型输出 token 上限
 PaperReaderMaxOutputTokens=12000
-# 分批并发组大小（每组处理的 chunk 数）
-PaperReaderBatchSize=4
+# 分批并发组大小（每组处理的 chunk 数，建议 ≤ MaxConcurrentLLM）
+# ⚠️ 质量取舍：同批内的 deep chunk 共享同一份 Rolling Context 快照。
+#   BatchSize=1（串行）：上下文递进最强，chunk N 能看到 1..N-1 的所有发现
+#   BatchSize=5（推荐）：速度与质量的甜蜜点
+#   BatchSize=10+：速度最快，但同批 chunk 无法互相感知（skim 不受影响）
+#   极高精度需求（法律/财务逐条审计）建议 ≤3
+PaperReaderBatchSize=5
+# 进程级 LLM 最大并发请求数（防止 429 风暴，建议 3-8）
+# 真正的并发控制由此 semaphore 管理，BatchSize 只控制批内共享上下文的范围
+PaperReaderMaxConcurrentLLM=5
 # deep 阅读最多处理多少个 chunk（防止成本失控）
 PaperReaderMaxChunks=120
diff --git a/Plugin/PaperReader/lib/reading-state.js b/Plugin/PaperReader/lib/reading-state.js
new file mode 100644
index 00000000..1d170cc6
--- /dev/null
+++ b/Plugin/PaperReader/lib/reading-state.js
@@ -0,0 +1,137 @@
+/**
+ * ReadingState 持久化管理 (v0.4)
+ * 
+ * 管理 reading_state.json 的读写，支持：
+ * - 中断恢复
+ * - 多轮阅读
+ * - 跨会话接力
+ */
+
+const fs = require('fs').promises;
+const fsSync = require('fs');
+const path = require('path');
+
+const WORKSPACE_ROOT = path.join(__dirname, '..', 'workspace');
+
+/**
+ * 创建空的 ReadingState
+ */
+function createEmptyState(docId, goal, mode) {
+  return {
+    docId,
+    goal: goal || '',
+    mode: mode || 'auto',
+    currentPhase: 'survey',
+    round: 1,
+    rollingContext: '',
+    readLog: [],
+    chunkSummaries: [],
+    auditReport: null,
+    createdAt: new Date().toISOString(),
+    updatedAt: new Date().toISOString()
+  };
+}
+
+/**
+ * 获取 reading_state.json 路径
+ */
+function getStatePath(docId) {
+  return path.join(WORKSPACE_ROOT, docId, 'reading_notes', 'reading_state.json');
+}
+
+/**
+ * 加载 ReadingState（不存在则返回 null）
+ */
+async function loadState(docId) {
+  const statePath = getStatePath(docId);
+  if (!fsSync.existsSync(statePath)) return null;
+  try {
+    const raw = await fs.readFile(statePath, 'utf-8');
+    return JSON.parse(raw);
+  } catch {
+    return null;
+  }
+}
+
+/**
+ * 保存 ReadingState
+ */
+async function saveState(docId, state) {
+  const statePath = getStatePath(docId);
+  const dir = path.dirname(statePath);
+  await fs.mkdir(dir, { recursive: true });
+  state.updatedAt = new Date().toISOString();
+  await fs.writeFile(statePath, JSON.stringify(state, null, 2), 'utf-8');
+}
+
+/**
+ * 加载或创建 ReadingState
+ */
+async function loadOrCreateState(docId, goal, mode) {
+  const existing = await loadState(docId);
+  if (existing) {
+    // 如果 goal 不同，创建新的 round
+    if (goal && existing.goal !== goal) {
+      existing.round = (existing.round || 1) + 1;
+      existing.goal = goal;
+      existing.currentPhase = 'survey';
+      existing.auditReport = null;
+      process.stderr.write(`[PaperReader][State] new round ${existing.round} with different goal\n`);
+    }
+    return existing;
+  }
+  return createEmptyState(docId, goal, mode);
+}
+
+/**
+ * 记录一个 chunk 的阅读结果
+ */
+function addChunkRead(state, { chunkIndex, section, readMode, nodeId }) {
+  state.readLog.push({
+    chunkIndex,
+    section: section || 'unknown',
+    readMode,
+    nodeId: nodeId || null,
+    readAt: new Date().toISOString(),
+    round: state.round
+  });
+}
+
+/**
+ * 添加 chunk 摘要
+ */
+function addChunkSummary(state, summary) {
+  // 去重：同 chunkIndex 只保留最新
+  state.chunkSummaries = state.chunkSummaries.filter(
+    s => s.chunkIndex !== summary.chunkIndex
+  );
+  state.chunkSummaries.push(summary);
+}
+
+/**
+ * 更新阶段
+ */
+function setPhase(state, phase) {
+  state.currentPhase = phase;
+}
+
+/**
+ * 获取已读 chunk 索引集合（指定 round 或全部）
+ */
+function getReadChunkIndices(state, round) {
+  const log = round
+    ? state.readLog.filter(r => r.round === round)
+    : state.readLog;
+  return new Set(log.map(r => r.chunkIndex));
+}
+
+module.exports = {
+  createEmptyState,
+  loadState,
+  saveState,
+  loadOrCreateState,
+  addChunkRead,
+  addChunkSummary,
+  setPhase,
+  getReadChunkIndices
+};
diff --git a/Plugin/PaperReader/lib/skim-reader.js b/Plugin/PaperReader/lib/skim-reader.js
new file mode 100644
index 00000000..1f036910
--- /dev/null
+++ b/Plugin/PaperReader/lib/skim-reader.js
@@ -0,0 +1,52 @@
+/**
+ * Skim Reader 模块 (v0.4)
+ * 
+ * 轻量摘要：用简化 prompt 处理 skim 标记的 chunk。
+ * 核心约束：Skim 结果不写入 Rolling Context（不污染精读上下文）。
+ * 支持 upgrade 检测：如果发现高密度信息，自动提升为 deep。
+ */
+
+const { callLLMJson } = require('./llm');
+
+/**
+ * 对单个 chunk 执行 Skim 摘要
+ * 
+ * @param {string} chunkText - chunk 原文
+ * @param {object} options - { goal, chunkIndex, section }
+ * @returns {Promise<{summary: string, upgrade: boolean, reason: string}>}
+ */
+async function skimChunk(chunkText, { goal, chunkIndex, section }) {
+  const system = [
+    '你是一个快速扫读器。用一句话概括这个章节的核心内容。',
+    '如果发现与阅读目标高度相关的意外重要内容，标记 upgrade: true。',
+    '',
+    '输出 JSON（纯 JSON，不要代码块）：',
+    '{"summary": string, "upgrade": boolean, "reason": string}',
+    '',
+    'upgrade 规则：',
+    '- true：该 chunk 包含与阅读目标直接相关的关键数据/方法/结论，值得精读',
+    '- false：该 chunk 是背景/综述/已知信息，扫读即可',
+    'reason：解释为什么 upgrade 或不 upgrade（一句话）'
+  ].join('\n');
+
+  const user = [
+    `阅读目标：${goal || '全面理解文档核心内容'}`,
+    `当前位置：第 ${chunkIndex} 块，章节「${section}」`,
+    '',
+    `【chunk 内容】`,
+    chunkText
+  ].join('\n');
+
+  const result = await callLLMJson([
+    { role: 'system', content: system },
+    { role: 'user', content: user }
+  ], { temperature: 0.1, max_tokens: 500, traceTag: `Skim:chunk_${chunkIndex}` });
+
+  return {
+    summary: result.summary || result.raw_response || '',
+    upgrade: result.upgrade === true,
+    reason: result.reason || ''
+  };
+}
+
+module.exports = { skimChunk };
diff --git a/Plugin/PaperReader/plugin-manifest.json b/Plugin/PaperReader/plugin-manifest.json
index 580fa8e6..25fccc2d 100644
--- a/Plugin/PaperReader/plugin-manifest.json
+++ b/Plugin/PaperReader/plugin-manifest.json
@@ -1,9 +1,9 @@
 {
   "manifestVersion": "1.0.0",
   "name": "PaperReader",
-  "version": "0.2.0",
+  "version": "0.4.0",
   "displayName": "超文本递归阅读器",
-  "description": "将超长 PDF/文档转为可递归阅读的分块工件（Skeleton/Deep/Query）。适用于学术论文、技术报告、书籍章节、法律文书等各类长文档。v0.2: MinerU 云端高保真解析（公式/表格/图片/多栏/扫描OCR）+ 章节感知切分 + Rolling Context 深度阅读。MinerU 不可用时自动降级到 pdf-parse。",
+  "description": "统一自适应阅读引擎：将超长 PDF/文档转为目标驱动的多分辨率阅读流程。v0.4: 统一 Read 命令（Survey→Triage→DeepDive/Skim→Audit→Synthesize）、Triage 分诊注意力分配、Skim 轻量扫读、Auditor 去偏见审核、ReadingState 持久化。MinerU 云端高保真解析，不可用时自动降级到 pdf-parse。",
   "author": "VCP",
   "pluginType": "synchronous",
   "entryPoint": {
@@ -12,7 +12,7 @@
   },
   "communication": {
     "protocol": "stdio",
-    "timeout": 600000
+    "timeout": 1800000
   },
   "configSchema": {
     "MINERU_API_TOKEN": {
@@ -45,11 +45,19 @@
     },
     "PaperReaderBatchSize": {
       "type": "integer",
-      "description": "Deep 阅读分批处理的 chunk 数，默认 4。"
+      "description": "Deep 阅读分批处理的 chunk 数，默认 5。同批内共享 Rolling Context 快照，越大速度越快但上下文递进越弱。"
+    },
+    "PaperReaderMaxConcurrentLLM": {
+      "type": "integer",
+      "description": "进程级 LLM 最大并发请求数（semaphore），默认 5。真正的并发控制。"
     },
     "PaperReaderMaxChunks": {
       "type": "integer",
-      "description": "单次 deep 阅读最多处理的 chunk 数（防止成本失控），默认 120。"
+      "description": "单次阅读最多处理的 chunk 数（防止成本失控），默认 120。"
+    },
+    "PaperReaderMaxAuditChunks": {
+      "type": "integer",
+      "description": "Auditor 审核抽样最大 chunk 数，默认 8。"
     }
   },
   "capabilities": {
@@ -58,17 +66,21 @@
         "commandIdentifier": "IngestPDF",
         "description": "解析 PDF 并生成可递归阅读的分块工件。支持学术论文、技术报告、书籍、法律文书等各类 PDF。使用 MinerU 云端 API 高保真解析（保留公式/表格/图片/多栏排版），失败自动降级到 pdf-parse。输出章节感知的 chunk 文件。\n参数:\n- command: 固定为 IngestPDF\n- filePath (字符串, 必需): PDF 绝对路径\n- paperId (字符串, 可选): 自定义 ID（不传则自动生成）\n调用格式:\n<<<[TOOL_REQUEST]>>>\ntool_name:「始」PaperReader「末」,\ncommand:「始」IngestPDF「末」,\nfilePath:「始」D:\\\\books\\\\paper.pdf「末」\n<<<[END_TOOL_REQUEST]>>>"
       },
+      {
+        "commandIdentifier": "Read",
+        "description": "v0.4 统一自适应阅读命令。自动执行完整流程：Survey（骨架提取）→ Triage（分诊注意力分配）→ DeepDive/Skim（精读/扫读）→ Audit（去偏见审核）→ Synthesize（合成报告）。这是推荐的阅读方式，自动决定哪些章节精读、哪些扫读、哪些跳过。注意：处理大文档（100+ chunks）可能需要数分钟。\n参数:\n- command: 固定为 Read\n- paperId (字符串, 必需): 文档 ID\n- goal (字符串, 可选): 阅读目标（影响 Triage 分诊策略）\n- forceReread (布尔值, 可选): 强制重新阅读（忽略缓存）\n调用格式:\n<<<[TOOL_REQUEST]>>>\ntool_name:「始」PaperReader「末」,\ncommand:「始」Read「末」,\npaperId:「始」xray-20260208-xxx「末」,\ngoal:「始」理解核心方法和实验设计「末」\n<<<[END_TOOL_REQUEST]>>>"
+      },
       {
         "commandIdentifier": "ReadSkeleton",
-        "description": "基于已导入的文档工件生成骨架地图（Global Map）。从 Markdown 结构提取目录树、关键章节和图注，生成带阅读优先级标签的全局地图。适用于任何已 Ingest 的文档。\n参数:\n- command: 固定为 ReadSkeleton\n- paperId (字符串, 必需)\n- focus (字符串, 可选): 本次阅读关注点\n调用格式:\n<<<[TOOL_REQUEST]>>>\ntool_name:「始」PaperReader「末」,\ncommand:「始」ReadSkeleton「末」,\npaperId:「始」paper-xxx「末」,\nfocus:「始」方法学与实验设计「末」\n<<<[END_TOOL_REQUEST]>>>"
+        "description": "基于已导入的文档工件生成骨架地图（Global Map）+ 层级树索引。Read 命令会自动调用此步骤，通常不需要单独使用。\n参数:\n- command: 固定为 ReadSkeleton\n- paperId (字符串, 必需)\n- focus (字符串, 可选): 本次阅读关注点\n调用格式:\n<<<[TOOL_REQUEST]>>>\ntool_name:「始」PaperReader「末」,\ncommand:「始」ReadSkeleton「末」,\npaperId:「始」xray-xxx「末」,\nfocus:「始」方法学与实验设计「末」\n<<<[END_TOOL_REQUEST]>>>"
       },
       {
         "commandIdentifier": "ReadDeep",
-        "description": "带 Rolling Context 的深度阅读：逐 chunk 摘要时携带前序累积的关键事实，保持 chunk 间连贯性。超出上限自动压缩。最终合并为 Round-1 深度笔记。\n参数:\n- command: 固定为 ReadDeep\n- paperId (字符串, 必需)\n- goal (字符串, 可选): 主任务目标（用于决定摘要粒度）\n调用格式:\n<<<[TOOL_REQUEST]>>>\ntool_name:「始」PaperReader「末」,\ncommand:「始」ReadDeep「末」,\npaperId:「始」paper-xxx「末」,\ngoal:「始」快速理解核心贡献与可复现实验步骤「末」\n<<<[END_TOOL_REQUEST]>>>"
+        "description": "全量 Rolling Context 深度阅读（无 Triage/Audit 的 v0.3 兼容模式）。对所有 chunk 无差别精读。推荐使用 Read 命令替代。\n参数:\n- command: 固定为 ReadDeep\n- paperId (字符串, 必需)\n- goal (字符串, 可选): 主任务目标\n调用格式:\n<<<[TOOL_REQUEST]>>>\ntool_name:「始」PaperReader「末」,\ncommand:「始」ReadDeep「末」,\npaperId:「始」xray-xxx「末」,\ngoal:「始」快速理解核心贡献「末」\n<<<[END_TOOL_REQUEST]>>>"
       },
       {
         "commandIdentifier": "Query",
-        "description": "对已导入的文档做检索式问答（关键词匹配 + 章节权重挑选相关 chunk）。返回答案及引用来源。\n参数:\n- command: 固定为 Query\n- paperId (字符串, 必需)\n- question (字符串, 必需)\n调用格式:\n<<<[TOOL_REQUEST]>>>\ntool_name:「始」PaperReader「末」,\ncommand:「始」Query「末」,\npaperId:「始」paper-xxx「末」,\nquestion:「始」这份文档的核心结论是什么？「末」\n<<<[END_TOOL_REQUEST]>>>"
+        "description": "对已导入的文档做检索式问答（树索引推理式检索，降级到关键词匹配）。返回答案、推理过程及引用来源。\n参数:\n- command: 固定为 Query\n- paperId (字符串, 必需)\n- question (字符串, 必需)\n调用格式:\n<<<[TOOL_REQUEST]>>>\ntool_name:「始」PaperReader「末」,\ncommand:「始」Query「末」,\npaperId:「始」xray-xxx「末」,\nquestion:「始」这份文档的核心结论是什么？「末」\n<<<[END_TOOL_REQUEST]>>>"
       }
     ]
   }