chore: update READMEs for dspy example, update imports, etc. (#1366)

georgeh0 · web-flow · commit dd17870635d1 · 2025-12-04T11:39:20.000-08:00
diff --git a/README.md b/README.md
@@ -204,6 +204,7 @@ It defines an index flow like this:
 | [Patient intake form extraction](examples/patient_intake_extraction) | Use LLM to extract structured data from patient intake forms with different formats |
 | [HackerNews Trending Topics](examples/hn_trending_topics) | Extract trending topics from HackerNews threads and comments, using *CocoIndex Custom Source* and LLM |
 | [Patient Intake Form Extraction with BAML](examples/patient_intake_extraction_baml) | Extract structured data from patient intake forms using BAML |
+| [Patient Intake Form Extraction with DSPy](examples/patient_intake_extraction_dspy) | Extract structured data from patient intake forms using DSPy |
 
 More coming and stay tuned 👀!
 
diff --git a/examples/README.md b/examples/README.md
@@ -30,6 +30,7 @@ Check out our [examples documentation](https://cocoindex.io/docs/examples) for m
 
 - 🏥 [**patient_intake_extraction**](./patient_intake_extraction) - Extract structured data from patient intake forms (PDF, Docx) using LLM
 - 🏥 [**patient_intake_extraction_baml**](./patient_intake_extraction_baml) - Extract structured data from patient intake PDFs using BAML
+- 🏥 [**patient_intake_extraction_dspy**](./patient_intake_extraction_dspy) - Extract structured data from patient intake PDFs using DSPy
 - 📖 [**manuals_llm_extraction**](./manuals_llm_extraction) - Extract structured information from PDF manuals using Ollama
 - 📄 [**paper_metadata**](./paper_metadata) - Extract metadata (title, authors, abstract) from research papers in PDF
 - 📝 [**meeting_notes_graph**](./meeting_notes_graph) - Extract structured meeting info from Google Drive and build a knowledge graph
diff --git a/examples/patient_intake_extraction_dspy/main.py b/examples/patient_intake_extraction_dspy/main.py
@@ -2,7 +2,7 @@
 
 import dspy
 from pydantic import BaseModel, Field
-import fitz  # PyMuPDF
+import pymupdf
 
 import cocoindex
 
@@ -106,12 +106,12 @@ def extract_patient(pdf_content: bytes) -> Patient:
     """Extract patient information from PDF content."""
 
     # Convert PDF pages to DSPy Image objects
-    pdf_doc = fitz.open(stream=pdf_content, filetype="pdf")
+    pdf_doc = pymupdf.open(stream=pdf_content, filetype="pdf")
 
     form_images = []
     for page in pdf_doc:
         # Render page to pixmap (image) at 2x resolution for better quality
-        pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))
+        pix = page.get_pixmap(matrix=pymupdf.Matrix(2, 2))
         # Convert to PNG bytes
         img_bytes = pix.tobytes("png")
         # Create DSPy Image from bytes
diff --git a/examples/patient_intake_extraction_dspy/pyproject.toml b/examples/patient_intake_extraction_dspy/pyproject.toml
@@ -7,7 +7,7 @@ dependencies = [
     "cocoindex>=0.3.9",
     "dspy-ai>=3.0.4",
     "pydantic>=2.0.0",
-    "pymupdf>=1.24.0",
+    "pymupdf>=1.26.5",
 ]
 
 [tool.setuptools]

Original file line number	Diff line number	Diff line change
`@@ -7,7 +7,7 @@ dependencies = [`
`7`	`7`	`"cocoindex>=0.3.9",`
`8`	`8`	`"dspy-ai>=3.0.4",`
`9`	`9`	`"pydantic>=2.0.0",`
`10`		`- "pymupdf>=1.24.0",`
	`10`	`+ "pymupdf>=1.26.5",`
`11`	`11`	`]`
`12`	`12`
`13`	`13`	`[tool.setuptools]`