harumiWeb · harumiWeb · Jan 12, 2026 · Jan 12, 2026 · Jan 12, 2026 · Jan 12, 2026
diff --git a/docs/agents/FEATURE_SPEC.md b/docs/agents/FEATURE_SPEC.md
@@ -4,46 +4,6 @@
 
 ---
 
-## セル結合データのコンテキスト量圧縮
-
-- 現状の `merged_cells` がコンテキスト量を非常に多く持っているため、データ構造の見直しで圧縮する
-- `rows` と `merged_cells` でセル値を重複して持っているため、出力時に `rows` 側の結合セル値を落とす運用を検討する
-
-### 仕様（v1.1 予定）
-
-- `merged_cells` を **schema + items** 形式へ変更して冗長なキーを削減する
-- 結合セルの値は `merged_cells` に集約し、`rows` 側に保持するかはフラグで切替可能にする
-
-#### merged_cells の新フォーマット（例）
-
-```json
-{
-  "merged_cells": {
-    "schema": ["r1", "c1", "r2", "c2", "v"],
-    "items": [
-      [1, 0, 2, 1, "A1-B2 merged"],
-      [3, 4, 3, 6, "merged value"]
-    ]
-  }
-}
-```
-
-- `r1/c1/r2/c2` は従来同様の座標（row: 1-based, col: 0-based）
-- `v` は結合セルの代表値（セル値がない場合でも `" "` を出力する）
-
-#### rows 側の結合セル値の扱い
-
-- 新しいフラグ `include_merged_values_in_rows: bool` を導入
-- `True` の場合は互換モード（従来どおり `rows` に結合セル値を残す）
-- `False` の場合は `rows` から結合セル値を排除し、`merged_cells` のみで値を保持
-
-#### 互換性
-
-- デフォルトは `True` として破壊的変更を回避
-- 将来的にデフォルト切替の可能性があるため、出力仕様に明記する
-
----
-
 ## 今後のオプション検討メモ
 
 - 表検知スコアリングの閾値を CLI/環境変数で調整可能にする

diff --git a/docs/agents/TASKS.md b/docs/agents/TASKS.md
@@ -2,10 +2,7 @@
 
 未完了 [ ], 完了 [x]
 
-- [x] 仕様: `merged_cells` の新フォーマット（schema + items）をモデルと出力仕様に反映
-- [x] 仕様: `include_merged_values_in_rows` フラグ追加（デフォルト True）
-- [x] 実装: 既存の `merged_cells` 生成ロジックを新構造へ置換
-- [x] 実装: `rows` から結合セル値を排除する分岐を追加（フラグ制御）
-- [x] 実装: 結合セルの値がない場合は `" "` を出力
-- [ ] 更新: 既存の JSON 出力例・ドキュメントの整合性確認
-- [x] テスト: 結合セルが多いケースの JSON 量削減を確認
+- [x] 仕様確認: 画像出力は DPI を維持しつつ、メモリリーク/クラッシュ回避のためサブプロセス化で処理する方針を明記
+- [x] 実装方針: シートごとに PDF を分割 → サブプロセスで PDF ページを PNG へ変換 → 終了時にメモリを解放する設計（親は進捗/結果を集約）
+- [x] 実装方針: 子プロセスは `pypdfium2` をロードしてページごとにレンダリングし、書き込み済みパスを親に返す
+- [x] 実装方針: 例外時は子プロセスでエラーを返し、親が RenderError として集約して返す
diff --git a/docs/release-notes/v0.3.6.md b/docs/release-notes/v0.3.6.md
@@ -0,0 +1,17 @@
+# v0.3.6 Release Notes
+
+This release improves rendering robustness for image export and large Excel
+files, with better support for multi-page sheets and legacy .xls inputs.
+
+## Highlights
+
+- Sheet image export now renders all PDF pages per sheet, with `_pNN` suffixes
+  for page 2+ (fixes multi-print-range sheets outputting only the first image).
+- .xls rendering now uses Excel SaveAs to a temporary .xlsx before PDF export,
+  avoiding failures when outputting images from legacy files.
+- Image rendering can run in a subprocess to isolate memory usage and reduce
+  crashes on large workbooks (enabled by default).
+
+## Notes
+
+- Set `EXSTRUCT_RENDER_SUBPROCESS=0` to disable subprocess rendering.
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -27,6 +27,7 @@ nav:
   - CLI Guide: cli.md
   - Concept / Why ExStruct?: concept.md
   - Release Notes:
+      - v0.3.6: release-notes/v0.3.6.md
       - v0.3.5: release-notes/v0.3.5.md
       - v0.3.2: release-notes/v0.3.2.md
       - v0.3.1: release-notes/v0.3.1.md

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "exstruct"
-version = "0.3.5"
+version = "0.3.6"
 description = "Excel to structured JSON (tables, shapes, charts) for LLM/RAG pipelines"
 readme = "README.md"
 license = { file = "LICENSE" }

diff --git a/src/exstruct/render/__init__.py b/src/exstruct/render/__init__.py
@@ -1,6 +1,8 @@
 from __future__ import annotations
 
 import logging
+import multiprocessing as mp
+import os
 from pathlib import Path
 import shutil
 import tempfile
@@ -35,14 +37,15 @@ def export_pdf(excel_path: str | Path, output_pdf: str | Path) -> list[str]:
         temp_dir = Path(td)
         temp_xlsx = temp_dir / "book.xlsx"
         temp_pdf = temp_dir / "book.pdf"
-        shutil.copy(normalized_excel_path, temp_xlsx)
 
         app: xw.App | None = None
         wb: xw.Book | None = None
         try:
             app = _require_excel_app()
-            wb = app.books.open(str(temp_xlsx))
+            app.display_alerts = False
+            wb = app.books.open(str(normalized_excel_path))
             sheet_names = [s.name for s in wb.sheets]
+            wb.api.SaveAs(str(temp_xlsx))
             wb.api.ExportAsFixedFormat(0, str(temp_pdf))
             shutil.copy(temp_pdf, normalized_output_pdf)
         except RenderError:
@@ -77,28 +80,55 @@ def export_sheet_images(
     excel_path: str | Path, output_dir: str | Path, dpi: int = 144
 ) -> list[Path]:
     """Export each sheet as PNG (via PDF then pypdfium2 rasterization) and return paths in sheet order."""
-    pdfium = cast(Any, _require_pdfium())
     normalized_excel_path = Path(excel_path)
     normalized_output_dir = Path(output_dir)
     normalized_output_dir.mkdir(parents=True, exist_ok=True)
+    use_subprocess = _use_render_subprocess()
+    if not use_subprocess:
+        pdfium = cast(Any, _require_pdfium())
+    else:
+        _require_pdfium()
 
     try:
         with tempfile.TemporaryDirectory() as td:
-            tmp_pdf = Path(td) / "book.pdf"
-            sheet_names = export_pdf(normalized_excel_path, tmp_pdf)
-
-            scale = dpi / 72.0
             written: list[Path] = []
-            with pdfium.PdfDocument(str(tmp_pdf)) as pdf:
-                for i, sheet_name in enumerate(sheet_names):
-                    page = pdf[i]
-                    bitmap = page.render(scale=scale)
-                    pil_image = bitmap.to_pil()
+            app: xw.App | None = None
+            wb: xw.Book | None = None
+            try:
+                app = _require_excel_app()
+                wb = app.books.open(str(normalized_excel_path))
+                for sheet_index, sheet in enumerate(wb.sheets):
+                    sheet_name = sheet.name
+                    sheet_pdf = Path(td) / f"sheet_{sheet_index + 1:02d}.pdf"
+                    sheet.api.ExportAsFixedFormat(0, str(sheet_pdf))
                     safe_name = _sanitize_sheet_filename(sheet_name)
-                    img_path = normalized_output_dir / f"{i + 1:02d}_{safe_name}.png"
-                    pil_image.save(img_path, format="PNG", dpi=(dpi, dpi))
-                    written.append(img_path)
-            return written
+                    if use_subprocess:
+                        written.extend(
+                            _render_pdf_pages_subprocess(
+                                sheet_pdf,
+                                normalized_output_dir,
+                                sheet_index,
+                                safe_name,
+                                dpi,
+                            )
+                        )
+                    else:
+                        written.extend(
+                            _render_pdf_pages_in_process(
+                                pdfium,
+                                sheet_pdf,
+                                normalized_output_dir,
+                                sheet_index,
+                                safe_name,
+                                dpi,
+                            )
+                        )
+                return written
+            finally:
+                if wb is not None:
+                    wb.close()
+                if app is not None:
+                    app.quit()
     except RenderError:
         raise
     except Exception as exc:
@@ -111,4 +141,99 @@ def _sanitize_sheet_filename(name: str) -> str:
     return "".join("_" if c in '\\/:*?"<>|' else c for c in name).strip() or "sheet"
 
 
+def _use_render_subprocess() -> bool:
+    """Return True when PDF->PNG rendering should run in a subprocess."""
+    return os.getenv("EXSTRUCT_RENDER_SUBPROCESS", "1").lower() not in {"0", "false"}
+
+
+def _render_pdf_pages_in_process(
+    pdfium: ModuleType,
+    pdf_path: Path,
+    output_dir: Path,
+    sheet_index: int,
+    safe_name: str,
+    dpi: int,
+) -> list[Path]:
+    """Render PDF pages to PNGs in the current process."""
+    scale = dpi / 72.0
+    written: list[Path] = []
+    with pdfium.PdfDocument(str(pdf_path)) as pdf:
+        for page_index in range(len(pdf)):
+            page = pdf[page_index]
+            bitmap = page.render(scale=scale)
+            pil_image = bitmap.to_pil()
+            page_suffix = f"_p{page_index + 1:02d}" if page_index > 0 else ""
+            img_path = (
+                output_dir / f"{sheet_index + 1:02d}_{safe_name}{page_suffix}.png"
+            )
+            pil_image.save(img_path, format="PNG", dpi=(dpi, dpi))
+            written.append(img_path)
+    return written
+
+
+def _render_pdf_pages_subprocess(
+    pdf_path: Path,
+    output_dir: Path,
+    sheet_index: int,
+    safe_name: str,
+    dpi: int,
+) -> list[Path]:
+    """Render PDF pages to PNGs in a subprocess for memory isolation."""
+    ctx = mp.get_context("spawn")
+    queue: mp.Queue[dict[str, list[str] | str]] = ctx.Queue()
+    process = ctx.Process(
+        target=_render_pdf_pages_worker,
+        args=(pdf_path, output_dir, sheet_index, safe_name, dpi, queue),
+    )
+    process.start()
+    process.join()
+    result = _get_subprocess_result(queue)
+    if process.exitcode != 0 or "error" in result:
+        message = result.get("error", "subprocess failed")
+        raise RenderError(f"Failed to render PDF pages: {message}")
+    paths = result.get("paths", [])
+    return [Path(path) for path in paths]
+
+
+def _get_subprocess_result(
+    queue: mp.Queue[dict[str, list[str] | str]],
+) -> dict[str, list[str] | str]:
+    """Fetch the worker result from the queue with a timeout."""
+    try:
+        return queue.get(timeout=5)
+    except Exception as exc:
+        return {"error": f"subprocess did not return results ({exc})"}
+
+
+def _render_pdf_pages_worker(
+    pdf_path: Path,
+    output_dir: Path,
+    sheet_index: int,
+    safe_name: str,
+    dpi: int,
+    queue: mp.Queue[dict[str, list[str] | str]],
+) -> None:
+    """Worker process to render PDF pages into PNG files."""
+    try:
+        import pypdfium2 as pdfium
+
+        scale = dpi / 72.0
+        output_dir.mkdir(parents=True, exist_ok=True)
+        written: list[str] = []
+        with pdfium.PdfDocument(str(pdf_path)) as pdf:
+            for page_index in range(len(pdf)):
+                page = pdf[page_index]
+                bitmap = page.render(scale=scale)
+                pil_image = bitmap.to_pil()
+                page_suffix = f"_p{page_index + 1:02d}" if page_index > 0 else ""
+                img_path = (
+                    output_dir / f"{sheet_index + 1:02d}_{safe_name}{page_suffix}.png"
+                )
+                pil_image.save(img_path, format="PNG", dpi=(dpi, dpi))
+                written.append(str(img_path))
+        queue.put({"paths": written})
+    except Exception as exc:
+        queue.put({"error": str(exc)})
+
+
 __all__ = ["export_pdf", "export_sheet_images"]
diff --git a/tests/assets/multiple_print_ranges_4sheets.xlsx b/tests/assets/multiple_print_ranges_4sheets.xlsx
diff --git a/tests/assets/sample.xls b/tests/assets/sample.xls
diff --git a/tests/com/test_render_smoke.py b/tests/com/test_render_smoke.py
@@ -34,3 +34,36 @@ def test_render_smoke_pdf_and_png(tmp_path: Path) -> None:
     assert pdf_path.exists()
     assert images_dir.exists()
     assert any(images_dir.glob("*.png"))
+
+
+def test_render_multiple_print_ranges_images(tmp_path: Path) -> None:
+    xlsx = (
+        Path(__file__).resolve().parents[1]
+        / "assets"
+        / "multiple_print_ranges_4sheets.xlsx"
+    )
+    out_json = tmp_path / "out.json"
+    process_excel(
+        xlsx,
+        output_path=out_json,
+        out_fmt="json",
+        image=True,
+        dpi=72,
+        mode="standard",
+        pretty=True,
+    )
+    images_dir = out_json.parent / f"{out_json.stem}_images"
+    images = list(images_dir.glob("*.png"))
+    assert images_dir.exists()
+    prefixes = {_strip_page_suffix(image.stem) for image in images}
+    assert len(prefixes) == 4
+
+
+def _strip_page_suffix(stem: str) -> str:
+    """Return the image stem without the _pNN page suffix."""
+    if "_p" not in stem:
+        return stem
+    base, suffix = stem.rsplit("_p", 1)
+    if len(suffix) == 2 and suffix.isdigit():
+        return base
+    return stem