From 0a0ed4d3ebde6e6fbfcfe07dbce73b16f7be0fe2 Mon Sep 17 00:00:00 2001
From: Quin Hoxie <qhoxie@gmail.com>
Date: Thu, 2 Oct 2025 14:27:05 -0700
Subject: [PATCH 1/2] Rework optimize in preparation for new optimizers (e.g.
 GEPA).  Remove unnecessary flag in favor of just calling optimize() on
 extractor,  update examples.

---
 README.md                                     |   3 +-
 .../content/docs/examples/legal-contracts.mdx |   2 +-
 .../docs/examples/scientific-papers.mdx       |   2 +-
 docs/src/content/docs/optimization.mdx        |  23 ++-
 docs/src/content/docs/persistence.mdx         |   8 +-
 docs/src/content/docs/query-parsing.mdx       |  49 +++--
 docs/src/content/docs/quickstart.mdx          |   3 +-
 docs/src/content/docs/why-dspy.mdx            |   1 -
 docs/src/content/docs/why-langstruct.mdx      |   4 +-
 examples/06_rag_integration.py                |   2 +-
 examples/07_optimization.py                   |   9 +-
 langstruct/api.py                             |  32 ++--
 langstruct/core/persistence.py                |   9 +-
 langstruct/core/validation.py                 |   4 +-
 langstruct/optimizers/__init__.py             |   3 +-
 langstruct/optimizers/bootstrap.py            |  55 ------
 tests/conftest.py                             |   2 +-
 tests/test_api.py                             |  41 ++---
 tests/test_integration_workflows.py           | 171 ++++++++++++++++++
 tests/test_persistence.py                     |   3 +-
 20 files changed, 257 insertions(+), 169 deletions(-)
 delete mode 100644 langstruct/optimizers/bootstrap.py
 create mode 100644 tests/test_integration_workflows.py

diff --git a/README.md b/README.md
index 0d70d35..4294624 100644
--- a/README.md
+++ b/README.md
@@ -205,8 +205,7 @@ Once you've got the basics working, there's more:
 ```python
 extractor.optimize(
     texts=your_examples,
-    expected_results=expected_outputs,
-    num_trials=50
+    expected_results=expected_outputs
 )
 ```
 
diff --git a/docs/src/content/docs/examples/legal-contracts.mdx b/docs/src/content/docs/examples/legal-contracts.mdx
index d9492e2..2133b16 100644
--- a/docs/src/content/docs/examples/legal-contracts.mdx
+++ b/docs/src/content/docs/examples/legal-contracts.mdx
@@ -66,11 +66,11 @@ Create an extractor for legal document analysis:
 extractor = LangStruct(
     schema=LegalContractSchema,
     model="gemini/gemini-2.5-flash-lite",  # Fast and reliable for legal analysis
-    optimize=True,
     use_sources=True,  # Critical for legal document traceability
     temperature=0.1,   # Lower temperature for consistency
     max_retries=3      # Ensure reliability
 )
+# Later: extractor.optimize(training_texts, expected_results)
 
 # Example contract text
 contract_text = """
diff --git a/docs/src/content/docs/examples/scientific-papers.mdx b/docs/src/content/docs/examples/scientific-papers.mdx
index 908de48..904be99 100644
--- a/docs/src/content/docs/examples/scientific-papers.mdx
+++ b/docs/src/content/docs/examples/scientific-papers.mdx
@@ -76,11 +76,11 @@ Create an extractor for research paper analysis:
 extractor = LangStruct(
     schema=ScientificPaperSchema,
     model="gemini/gemini-2.5-flash-lite",  # Fast and reliable for academic content
-    optimize=True,
     use_sources=True,  # Track where information was found
     temperature=0.2,   # Slightly higher for nuanced interpretation
     max_retries=3
 )
+# Later: extractor.optimize(training_texts, expected_results)
 
 # Example research paper text (excerpt)
 paper_text = """
diff --git a/docs/src/content/docs/optimization.mdx b/docs/src/content/docs/optimization.mdx
index caa76ee..5ce19bf 100644
--- a/docs/src/content/docs/optimization.mdx
+++ b/docs/src/content/docs/optimization.mdx
@@ -9,28 +9,28 @@ Make your extraction more accurate with automatic optimization. LangStruct learn
 
 ## The Easy Way
 
-**Enable optimization (configure optimizer) and then optimize with your data:**
+**Create an extractor (optionally choose the optimizer) and call `optimize()` when you're ready:**
 
 ```python
 from langstruct import LangStruct
 
-# Create extractor with optimization enabled
 extractor = LangStruct(
   example={
     "name": "Dr. Sarah Johnson",
     "age": 34,
     "occupation": "data scientist"
   },
-  optimize=True  # sets up optimizer; run .optimize(...) to train
+  optimizer="miprov2",  # default optimizer
 )
+
+# Later, once you have training data:
+# extractor.optimize(texts=training_texts, expected_results=good_results)
 ```
 
-**Default behavior (faster startup, good baseline accuracy):**
+**Quick experiments (skip optimization entirely):**
 
 ```python
-# No optimization - good for quick experiments
 extractor = LangStruct(example={"name": "John", "age": 30})
-# optimize=False by default - enables faster startup
 ```
 
 ## When You Have Training Data
@@ -86,7 +86,6 @@ Most users don't need this, but if you want more control:
 extractor.optimize(
     texts=training_texts,
     expected_results=good_results,
-    num_trials=50,        # More trials = better results (takes longer)
     validation_split=0.3  # Use 30% for testing improvements
 )
 ```
@@ -110,19 +109,19 @@ extractor.optimize(
 
 ## Common Questions
 
-**Q: Do I always need training data?**  
+**Q: Do I always need training data?**
 A: No! Optimization can work without training data, but providing examples improves results significantly.
 
-**Q: How long does optimization take?**  
+**Q: How long does optimization take?**
 A: Usually 1-5 minutes for typical datasets (10-100 examples).
 
-**Q: Can I optimize an already optimized extractor?**  
+**Q: Can I optimize an already optimized extractor?**
 A: Yes! You can keep optimizing with new data as you get it.
 
-**Q: Will this make my extractions slower?**  
+**Q: Will this make my extractions slower?**
 A: No - optimization happens once during training. Production extraction speed is the same.
 
-**Q: What happens when I switch models?**  
+**Q: What happens when I switch models?**
 A: Just change the model and re-optimize! Same training data, same accuracy - zero prompt rewriting needed.
 
 ## Next Steps
diff --git a/docs/src/content/docs/persistence.mdx b/docs/src/content/docs/persistence.mdx
index 0405427..48e92da 100644
--- a/docs/src/content/docs/persistence.mdx
+++ b/docs/src/content/docs/persistence.mdx
@@ -46,10 +46,9 @@ print(result.entities)
 ```python
 from langstruct import LangStruct
 
-# Create extractor with optimization
+# Create extractor
 extractor = LangStruct(
     example={"name": "John", "age": 30, "role": "engineer"},
-    optimize=True
 )
 
 # Train the extractor
@@ -58,8 +57,7 @@ expected_results = [{"name": "Expected outputs..."}]
 
 extractor.optimize(
     texts=training_texts,
-    expected_results=expected_results,
-    num_trials=50
+    expected_results=expected_results
 )
 
 # Save optimized state
@@ -215,7 +213,7 @@ Common error scenarios:
 
 ```python
 # Development: Train and save
-extractor = LangStruct(schema=MySchema, optimize=True)
+extractor = LangStruct(schema=MySchema)
 extractor.optimize(training_data, expected_results)
 extractor.save("./production_extractor")
 
diff --git a/docs/src/content/docs/query-parsing.mdx b/docs/src/content/docs/query-parsing.mdx
index c52d8f7..cddfa60 100644
--- a/docs/src/content/docs/query-parsing.mdx
+++ b/docs/src/content/docs/query-parsing.mdx
@@ -44,14 +44,14 @@ This single query contains **three distinct types of information**:
     - Quarter: Q3 2024 (exact match)
     - Revenue: > $100B (numeric comparison)
     - Sector: Technology (category match)
-    
+
     These need **database-style filtering**, not semantic search
   </Card>
   <Card title="Semantic Content" icon="magnifier">
     **Conceptual topics for similarity search:**
     - "financial reports" (could be 10-K, earnings, statements)
     - "AI investments" (could be ML, artificial intelligence, neural networks)
-    
+
     These need **embedding-based semantic search**
   </Card>
   <Card title="Implicit Context" icon="information">
@@ -59,7 +59,7 @@ This single query contains **three distinct types of information**:
     - "Show me" implies retrieval intent
     - "companies" implies corporate entities
     - Plural suggests multiple results expected
-    
+
     These provide **query understanding context**
   </Card>
 </CardGrid>
@@ -86,14 +86,14 @@ results = vector_db.similarity_search(query_embedding)
 <Tabs>
   <TabItem label="Semantic Terms">
     **What they are:** Conceptual topics that benefit from semantic understanding
-    
+
     **Examples:**
     - "artificial intelligence" ≈ "AI" ≈ "machine learning"
     - "financial performance" ≈ "earnings" ≈ "fiscal results"
     - "customer satisfaction" ≈ "user happiness" ≈ "client feedback"
-    
+
     **How they work:** Converted to embeddings for similarity matching
-    
+
     **Best for:**
     - Finding conceptually related content
     - Handling synonyms and variations
@@ -101,15 +101,15 @@ results = vector_db.similarity_search(query_embedding)
   </TabItem>
   <TabItem label="Structured Filters">
     **What they are:** Exact constraints that must be precisely matched
-    
+
     **Examples:**
     - Date/Time: "Q3 2024", "after 2023", "last 30 days"
     - Numbers: "revenue > $100M", "5-10 employees", "top 3"
     - Categories: "tech sector", "approved status", "high priority"
     - Entities: "Apple Inc.", "California", "John Smith"
-    
+
     **How they work:** Converted to database-style filter operations
-    
+
     **Best for:**
     - Enforcing hard constraints
     - Filtering by exact values
@@ -129,7 +129,7 @@ Let's see how different queries naturally decompose:
 - **Structured filters:** `{"quarter": "Q3 2024", "sector": "Technology", "profitable": true}`
 - **Why it matters:** You want companies that ARE profitable (filter), not just ones that DISCUSS profitability
 
-#### Healthcare Query  
+#### Healthcare Query
 > "Patient records over 65 years old with diabetes showing improvement"
 
 - **Semantic terms:** `["showing improvement", "better outcomes"]`
@@ -216,7 +216,7 @@ print("📖 Explanation:", result.explanation)
     'revenue': {'$gte': 100.0}
 }
 💯 Confidence: 91.5%
-📖 Explanation: 
+📖 Explanation:
 Searching for: tech companies
 With filters:
   • quarter = Q3 2024
@@ -270,30 +270,30 @@ class EnhancedRAGSystem:
         # Same schema for both extraction and parsing!
         self.langstruct = LangStruct(example=schema_example)
         self.vectorstore = Chroma(embedding_function=OpenAIEmbeddings())
-    
+
     def index_document(self, text: str):
         """Extract metadata and index document"""
         # Extract structured metadata
         extraction = self.langstruct.extract(text)
-        
+
         # Index with both text and metadata
         self.vectorstore.add_texts(
             texts=[text],
             metadatas=[extraction.entities]
         )
-    
+
     def natural_query(self, query: str, k: int = 5):
         """Query using natural language"""
         # Parse query into components
         parsed = self.langstruct.query(query)
-        
+
         # Perform hybrid search
         results = self.vectorstore.similarity_search(
             query=' '.join(parsed.semantic_terms),
             k=k,
             filter=parsed.structured_filters
         )
-        
+
         return results, parsed.explanation
 
 # Usage
@@ -407,13 +407,13 @@ ls = LangStruct(example=your_schema)
 # Query with natural language
 def smart_search(query: str):
     parsed = ls.query(query)
-    
+
     results = collection.query(
         query_texts=parsed.semantic_terms,
         where=parsed.structured_filters,
         n_results=10
     )
-    
+
     return results
 ```
 
@@ -431,19 +431,19 @@ ls = LangStruct(example=your_schema)
 # Natural language query
 def pinecone_search(query: str):
     parsed = ls.query(query)
-    
+
     # Convert to Pinecone filter format
     pinecone_filter = {
-        f"metadata.{k}": v 
+        f"metadata.{k}": v
         for k, v in parsed.structured_filters.items()
     }
-    
+
     results = index.query(
         vector=embed(parsed.semantic_terms),
         filter=pinecone_filter,
         top_k=10
     )
-    
+
     return results
 ```
 
@@ -497,9 +497,8 @@ domain_ls = LangStruct(
         # Include synonyms in descriptions
         "earnings": 10.5,  # Also covers "profits", "income"
     },
-    # Can optimize for better accuracy
-    optimize=True
 )
+# Call domain_ls.optimize(...) with training examples when ready
 ```
 
 ## Performance Considerations
@@ -512,7 +511,7 @@ from functools import lru_cache
 class CachedLangStruct:
     def __init__(self, schema):
         self.ls = LangStruct(example=schema)
-        
+
     @lru_cache(maxsize=1000)
     def query_cached(self, query: str):
         """Cache frequently used queries"""
diff --git a/docs/src/content/docs/quickstart.mdx b/docs/src/content/docs/quickstart.mdx
index e6418b8..62930af 100644
--- a/docs/src/content/docs/quickstart.mdx
+++ b/docs/src/content/docs/quickstart.mdx
@@ -87,8 +87,7 @@ extractor = LangStruct(example=schema)
 # See optimization in action
 extractor.optimize(
     texts=["training texts..."],
-    expected=[{"expected outputs..."}],
-    num_trials=50  # More trials = better accuracy
+    expected=[{"expected outputs..."}]
 )
 print(f"Optimized accuracy: {extractor.score:.1%}")
 ```
diff --git a/docs/src/content/docs/why-dspy.mdx b/docs/src/content/docs/why-dspy.mdx
index 338e96d..6936b02 100644
--- a/docs/src/content/docs/why-dspy.mdx
+++ b/docs/src/content/docs/why-dspy.mdx
@@ -147,7 +147,6 @@ result = extractor.extract("Microsoft announced $65B revenue for Q4")
 extractor = LangStruct(
     example={"company": "Apple", "revenue": 100.0},
     model="gpt-5-mini",
-    optimize=True
 )
 extractor.optimize(training_texts, expected_results)
 
diff --git a/docs/src/content/docs/why-langstruct.mdx b/docs/src/content/docs/why-langstruct.mdx
index d0c2b96..6a7c687 100644
--- a/docs/src/content/docs/why-langstruct.mdx
+++ b/docs/src/content/docs/why-langstruct.mdx
@@ -155,7 +155,7 @@ extractor = LangExtract(...)
 
 # Month 6: Switch to Claude - everything breaks!
 # ❌ Prompts don't work the same way
-# ❌ Few-shot examples need rewriting  
+# ❌ Few-shot examples need rewriting
 # ❌ Back to manual tuning for weeks
 
 # Month 12: Move to local Llama - start over again!
@@ -166,7 +166,7 @@ extractor = LangExtract(...)
 ### With LangStruct
 ```python
 # Month 1: Set up once
-extractor = LangStruct(example=schema, optimize=True)
+extractor = LangStruct(example=schema)
 extractor.optimize(training_data)
 
 # Month 6: Switch to Claude
diff --git a/examples/06_rag_integration.py b/examples/06_rag_integration.py
index b98a270..99db1c8 100644
--- a/examples/06_rag_integration.py
+++ b/examples/06_rag_integration.py
@@ -69,8 +69,8 @@ def __init__(self, extraction_schema: Dict[str, Any]):
             self.metadata_extractor = LangStruct(
                 example=extraction_schema,
                 # Model will use LangStruct's default unless specified
-                optimize=True,  # Enable auto-optimization
             )
+            # Call self.metadata_extractor.optimize(...) later with labeled data if needed
         except Exception as e:
             raise ValueError(
                 f"Failed to initialize LangStruct: {e}. "
diff --git a/examples/07_optimization.py b/examples/07_optimization.py
index 980c3a1..c3528bf 100644
--- a/examples/07_optimization.py
+++ b/examples/07_optimization.py
@@ -27,8 +27,9 @@ def main():
     print("=" * 40)
 
     try:
-        # Step 1: Create extractor with optimization enabled
-        print("\n1️⃣ Creating extractor with auto-optimization...")
+        # Step 1: Create extractor
+        print()
+        print("1️⃣ Creating extractor...")
         extractor = LangStruct(
             example={
                 "person_name": "Dr. Sarah Johnson",
@@ -36,9 +37,8 @@ def main():
                 "years_experience": 8,
                 "specialization": "interventional cardiology",
             },
-            optimize=True,  # Enable optimization for better accuracy
         )
-        print("✅ Extractor created with optimization enabled!")
+        print("✅ Extractor ready! Call optimize() once you have training data.")
 
         # Step 2: Initial extraction (baseline)
         print("\n2️⃣ Baseline extraction...")
@@ -143,7 +143,6 @@ def main():
                 extractor.optimize(
                     texts=training_texts,
                     expected_results=expected_results,
-                    num_trials=10,  # More trials → better results (higher cost)
                 )
                 did_optimize = True
                 print("   ✅ Optimization complete!")
diff --git a/langstruct/api.py b/langstruct/api.py
index 2d504de..17d6984 100644
--- a/langstruct/api.py
+++ b/langstruct/api.py
@@ -22,7 +22,6 @@
     PersistenceError,
     ValidationError,
 )
-from .optimizers.bootstrap import BootstrapOptimizer
 from .optimizers.metrics import ExtractionMetrics
 from .optimizers.mipro import MIPROv2Optimizer
 from .parallel import ParallelProcessor, ProcessingResult
@@ -64,7 +63,6 @@ def __init__(
         self,
         schema: Optional[Type[Schema]] = None,
         model: Optional[Union[str, dspy.LM]] = None,
-        optimize: bool = False,
         optimizer: str = "miprov2",
         chunking_config: Optional[ChunkingConfig] = None,
         use_sources: bool = True,
@@ -84,8 +82,7 @@ def __init__(
             schema: Pydantic schema defining the extraction structure (optional)
             model: Model name or DSPy LM instance (defaults to "gpt-5-mini"; pass
                 "gpt-5-mini"/"gpt-5-pro" for the latest OpenAI models)
-            optimize: Whether to use automatic prompt optimization (default: False)
-            optimizer: Optimizer to use ("miprov2", "bootstrap")
+            optimizer: Optimizer to use when optimize() runs (default: "miprov2")
             chunking_config: Configuration for text chunking
             use_sources: Whether to include source grounding (default: True)
             example: Single example dict for auto schema generation (optional)
@@ -128,7 +125,6 @@ def __init__(
         schema = ensure_schema_class(schema)
 
         self.schema = schema
-        self.optimize = optimize
         self.optimizer_name = optimizer
         self.chunking_config = chunking_config or ChunkingConfig()
         self.use_sources = use_sources
@@ -166,7 +162,8 @@ def __init__(
         # Initialize the extraction pipeline (robust to monkeypatched constructors)
         pipeline_cls = core_modules.ExtractionPipeline
         try:
-            sig = inspect.signature(pipeline_cls)
+            # Inspect __init__ directly to get actual parameters (not dspy.Module's *args, **kwargs)
+            sig = inspect.signature(pipeline_cls.__init__)
         except (TypeError, ValueError):
             # Fallback if signature can't be inspected (e.g., C-extensions or mocks)
             sig = None
@@ -198,10 +195,8 @@ def __init__(
             except TypeError:
                 self.pipeline = pipeline_cls(schema)
 
-        # Initialize optimizer if requested
+        # Optimizer is created lazily when optimize() is called
         self.optimizer = None
-        if optimize:
-            self._initialize_optimizer()
 
         # Initialize refinement engine if requested
         self.refinement_engine = None
@@ -509,7 +504,13 @@ def _extract_single(
                 else self.refine_config
             )
 
-            if effective_refine and self.refinement_engine:
+            if effective_refine:
+                # Lazily initialize refinement engine if not already created
+                if self.refinement_engine is None:
+                    self.refinement_engine = RefinementEngine(
+                        self.schema, self.pipeline.extractor
+                    )
+
                 # Run refinement process
                 refined_result, trace = self.refinement_engine(text, effective_refine)
                 result = refined_result
@@ -580,7 +581,6 @@ def optimize(
         self,
         texts: List[str],
         expected_results: Optional[List[Dict]] = None,
-        num_trials: int = 20,
         validation_split: float = 0.2,
     ) -> "LangStruct":
         """Optimize extraction performance on provided data.
@@ -588,7 +588,6 @@ def optimize(
         Args:
             texts: Training texts for optimization
             expected_results: Optional ground truth results for supervised optimization
-            num_trials: Number of optimization trials to run
             validation_split: Fraction of data to use for validation
 
         Returns:
@@ -619,7 +618,6 @@ def optimize(
             val_texts=val_texts or train_texts,  # Use train if no val data
             train_expected=train_expected,
             val_expected=val_expected,
-            num_trials=num_trials,
         )
 
         self.pipeline = optimized_pipeline
@@ -802,7 +800,7 @@ def save(self, path: str) -> None:
             path: Directory path to save the extractor to (will be created if needed)
 
         Example:
-            >>> extractor = LangStruct(schema=PersonSchema, optimize=True)
+            >>> extractor = LangStruct(schema=PersonSchema)
             >>> extractor.optimize(train_texts, expected_results)
             >>> extractor.save("./my_extractor")
             >>> # Creates directory with all extractor components
@@ -847,11 +845,9 @@ def _initialize_optimizer(self) -> None:
         """Initialize the appropriate optimizer."""
         if self.optimizer_name.lower() == "miprov2":
             self.optimizer = MIPROv2Optimizer()
-        elif self.optimizer_name.lower() == "bootstrap":
-            self.optimizer = BootstrapOptimizer()
         else:
             raise ValueError(
-                f"Unknown optimizer: {self.optimizer_name}. Supported optimizers: miprov2, bootstrap"
+                f"Unknown optimizer: {self.optimizer_name}. Only 'miprov2' is supported."
             )
 
     def _parse_refine_config(
@@ -1114,5 +1110,5 @@ def __repr__(self) -> str:
         return (
             f"LangStruct(schema={self.schema.__name__}, "
             f"model={self.lm.__class__.__name__}, "
-            f"optimize={self.optimize})"
+            f"optimizer_initialized={self.optimizer is not None})"
         )
diff --git a/langstruct/core/persistence.py b/langstruct/core/persistence.py
index 2606829..3d5a623 100644
--- a/langstruct/core/persistence.py
+++ b/langstruct/core/persistence.py
@@ -145,11 +145,10 @@ def load_extractor(cls, path: Union[str, Path]) -> "LangStruct":
                 extractor = LangStruct(
                     schema=schema_class,
                     model=metadata.model_name,
-                    optimize=False,  # We'll handle optimization separately
                     chunking_config=chunking_config,
                     use_sources=metadata.use_sources,
                     **metadata.lm_config,
-                )
+                )  # Optimizer state restored separately
             except Exception as e:
                 raise PersistenceError(
                     f"Failed to recreate LangStruct instance. This may be due to missing API keys, "
@@ -445,12 +444,6 @@ def _restore_optimizer_state(
                 num_threads=optimizer_state.get("num_threads", 4),
                 **optimizer_state.get("kwargs", {}),
             )
-        elif optimizer_name == "bootstrap":
-            from ..optimizers.bootstrap import BootstrapOptimizer
-
-            extractor.optimizer = BootstrapOptimizer(
-                **optimizer_state.get("kwargs", {})
-            )
 
     @classmethod
     def _restore_refinement_state(
diff --git a/langstruct/core/validation.py b/langstruct/core/validation.py
index b2934d8..42b0a03 100644
--- a/langstruct/core/validation.py
+++ b/langstruct/core/validation.py
@@ -452,7 +452,9 @@ def _generate_suggestions(self, issues: List[ValidationIssue]) -> List[str]:
         if IssueType.LOW_CONFIDENCE in issue_types:
             suggestions.append("🎯 Try a more powerful model (e.g. gpt-5-mini)")
             suggestions.append("📝 Add more detailed field descriptions")
-            suggestions.append("🔄 Enable auto-optimization with optimize=True")
+            suggestions.append(
+                "🔄 Run extractor.optimize(...) with representative data"
+            )
 
         if IssueType.MISSING_FIELDS in issue_types:
             suggestions.append("❓ Make optional fields Optional[type] in schema")
diff --git a/langstruct/optimizers/__init__.py b/langstruct/optimizers/__init__.py
index a79255f..e4209de 100644
--- a/langstruct/optimizers/__init__.py
+++ b/langstruct/optimizers/__init__.py
@@ -1,7 +1,6 @@
 """Optimization functionality using DSPy optimizers."""
 
-from .bootstrap import BootstrapOptimizer
 from .metrics import ExtractionMetrics
 from .mipro import MIPROv2Optimizer
 
-__all__ = ["MIPROv2Optimizer", "BootstrapOptimizer", "ExtractionMetrics"]
+__all__ = ["MIPROv2Optimizer", "ExtractionMetrics"]
diff --git a/langstruct/optimizers/bootstrap.py b/langstruct/optimizers/bootstrap.py
deleted file mode 100644
index 7599ffc..0000000
--- a/langstruct/optimizers/bootstrap.py
+++ /dev/null
@@ -1,55 +0,0 @@
-"""Bootstrap optimizer for few-shot example generation."""
-
-import logging
-from typing import Any, Dict, List, Optional
-
-from ..core.modules import ExtractionPipeline
-
-logger = logging.getLogger(__name__)
-
-
-class BootstrapOptimizer:
-    """DSPy Bootstrap optimizer for generating few-shot examples."""
-
-    def __init__(self, max_bootstrapped_demos: int = 8, max_labeled_demos: int = 16):
-        """Initialize Bootstrap optimizer.
-
-        Args:
-            max_bootstrapped_demos: Maximum number of bootstrapped examples
-            max_labeled_demos: Maximum number of labeled examples to use
-        """
-        self.max_bootstrapped_demos = max_bootstrapped_demos
-        self.max_labeled_demos = max_labeled_demos
-
-    def optimize(
-        self,
-        pipeline: ExtractionPipeline,
-        train_texts: List[str],
-        val_texts: List[str],
-        train_expected: Optional[List[Dict]] = None,
-        val_expected: Optional[List[Dict]] = None,
-        num_trials: int = 20,
-    ) -> ExtractionPipeline:
-        """Optimize extraction pipeline using Bootstrap few-shot learning.
-
-        Args:
-            pipeline: Extraction pipeline to optimize
-            train_texts: Training texts
-            val_texts: Validation texts
-            train_expected: Expected results for training (optional)
-            val_expected: Expected results for validation (optional)
-            num_trials: Number of optimization trials
-
-        Returns:
-            Optimized extraction pipeline
-        """
-        # TODO: Implement Bootstrap optimization
-        # This will use DSPy's BootstrapFewShot to automatically
-        # generate good few-shot examples
-
-        logger.info("Bootstrap optimization not yet implemented")
-        logger.info("Would bootstrap %d examples", self.max_bootstrapped_demos)
-        logger.info("From %d training examples", len(train_texts))
-
-        # For now, return the original pipeline
-        return pipeline
diff --git a/tests/conftest.py b/tests/conftest.py
index 258ff9d..5f2ac28 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -149,7 +149,7 @@ def setup_test_environment():
         import dspy
 
         if GOOGLE_API_KEY:
-            dspy.configure(lm=dspy.LM("gemini/gemini-2.5-flash"))
+            dspy.configure(lm=dspy.LM("gemini/gemini-2.5-flash-lite"))
             print(f"\n✅ Running tests with Gemini 2.5 Flash")
         elif OPENAI_API_KEY:
             dspy.configure(lm=dspy.LM("openai/gpt-4o-mini"))
diff --git a/tests/test_api.py b/tests/test_api.py
index 9d1e8f9..0b30ff0 100644
--- a/tests/test_api.py
+++ b/tests/test_api.py
@@ -21,16 +21,14 @@ def test_basic_initialization(self, person_schema, mock_extraction_pipeline):
         assert issubclass(extractor.schema, person_schema)
         assert extractor.schema is not person_schema
         assert extractor.use_sources is True  # Default
-        assert extractor.optimize is False  # Default
+        assert extractor.optimizer is None
 
     def test_initialization_with_options(self, person_schema, mock_extraction_pipeline):
         """Test LangStruct initialization with custom options."""
-        extractor = LangStruct(
-            schema=person_schema, model="gpt-4o", optimize=True, use_sources=False
-        )
+        extractor = LangStruct(schema=person_schema, model="gpt-4o", use_sources=False)
 
         assert issubclass(extractor.schema, person_schema)
-        assert extractor.optimize is True
+        assert extractor.optimizer is None
         assert extractor.use_sources is False
 
     @integration_test
@@ -155,7 +153,7 @@ def test_constructor_with_schema(self, person_schema, mock_extraction_pipeline):
         extractor = LangStruct(schema=person_schema)
 
         assert issubclass(extractor.schema, person_schema)
-        assert extractor.optimize is False  # Default behavior
+        assert extractor.optimizer is None
         assert extractor.use_sources is True  # Should be enabled by auto
 
     def test_schema_wrapping_enforces_extra_forbid(self, mock_extraction_pipeline):
@@ -204,7 +202,7 @@ def test_constructor_with_example(
         extractor = LangStruct(example=person_example_data)
 
         assert extractor.schema is not None
-        assert extractor.optimize is False  # Default behavior
+        assert extractor.optimizer is None
         assert extractor.use_sources is True
 
     def test_constructor_no_input(self, mock_extraction_pipeline):
@@ -308,12 +306,12 @@ def test_source_grounding_override(
 
     def test_repr(self, person_schema, mock_extraction_pipeline):
         """Test __repr__ method."""
-        extractor = LangStruct(schema=person_schema, optimize=True)
+        extractor = LangStruct(schema=person_schema)
         repr_str = repr(extractor)
 
         assert "LangStruct" in repr_str
         assert "PersonSchema" in repr_str
-        assert "optimize=True" in repr_str
+        assert "optimizer_initialized=False" in repr_str
 
     def test_save_load_basic_functionality(
         self, person_schema, mock_extraction_pipeline
@@ -342,23 +340,14 @@ def test_save_load_basic_functionality(
             assert loaded is not None
             assert issubclass(loaded.schema, person_schema)
 
-    def test_optimization_setup(self, person_schema, mock_extraction_pipeline):
-        """Test optimizer initialization."""
-        # Test with MIPROv2
-        extractor1 = LangStruct(
-            schema=person_schema, optimize=True, optimizer="miprov2"
-        )
-        assert extractor1.optimizer is not None
-
-        # Test with Bootstrap
-        extractor2 = LangStruct(
-            schema=person_schema, optimize=True, optimizer="bootstrap"
-        )
-        assert extractor2.optimizer is not None
+    def test_optimize_raises_for_invalid_optimizer(
+        self, person_schema, mock_extraction_pipeline
+    ):
+        """Ensure invalid optimizer names raise when optimization runs."""
+        extractor = LangStruct(schema=person_schema, optimizer="invalid")
 
-        # Test with invalid optimizer
         with pytest.raises(ValueError, match="Unknown optimizer"):
-            LangStruct(schema=person_schema, optimize=True, optimizer="invalid")
+            extractor.optimize(["text"])
 
     def test_optimization_default_disabled(
         self, person_schema, mock_extraction_pipeline
@@ -367,7 +356,7 @@ def test_optimization_default_disabled(
         extractor = LangStruct(schema=person_schema)
 
         # Optimization should be disabled by default now
-        assert extractor.optimize is False
+        assert extractor.optimizer is None
 
     def test_evaluate_placeholder(self, person_schema, mock_extraction_pipeline):
         """Test evaluate method (currently placeholder)."""
@@ -523,7 +512,7 @@ def test_auto_configuration_workflow(self, mock_extraction_pipeline):
         extractor = LangStruct(example=example)
 
         # Verify default settings
-        assert extractor.optimize is False  # Default behavior
+        assert extractor.optimizer is None
         assert extractor.use_sources is True
 
         # Should work for extraction
diff --git a/tests/test_integration_workflows.py b/tests/test_integration_workflows.py
new file mode 100644
index 0000000..9ca542b
--- /dev/null
+++ b/tests/test_integration_workflows.py
@@ -0,0 +1,171 @@
+"""Slow integration tests that hit real LLM providers when API keys are configured."""
+
+from __future__ import annotations
+
+import json
+from typing import Dict, List, Tuple
+
+import pytest
+
+from langstruct import LangStruct
+from langstruct.core.chunking import ChunkingConfig
+from langstruct.core.refinement import Budget, Refine
+
+
+@pytest.fixture(scope="module")
+def optimization_dataset() -> Tuple[List[str], List[Dict[str, object]]]:
+    """Provide lightweight training data for integration optimization runs."""
+    texts = [
+        """\
+        Alice Johnson is a 29-year-old data scientist based in Seattle, Washington.
+        She leads the analytics team at BlueSky Labs and mentors junior engineers.
+        """.strip(),
+    ]
+
+    labels = [
+        {"name": "Alice Johnson", "age": 29, "location": "Seattle, Washington"},
+    ]
+
+    return texts, labels
+
+
+@pytest.fixture
+def optimized_person_extractor(
+    person_schema,
+    optimization_dataset,
+    requires_api_key,
+):
+    """Create a LangStruct instance that has been optimized against the dataset."""
+    texts, labels = optimization_dataset
+
+    extractor = LangStruct(
+        schema=person_schema,
+        optimizer="miprov2",
+        use_sources=False,  # keep requests smaller for integration runs
+    )
+
+    extractor.optimize(
+        texts=texts,
+        expected_results=labels,
+        validation_split=0.0,
+    )
+
+    return {
+        "extractor": extractor,
+        "train_texts": texts,
+        "expected_results": labels,
+    }
+
+
+@pytest.mark.integration
+def test_integration_optimize_smoke(optimized_person_extractor):
+    """End-to-end smoke test covering optimize() and extraction afterwards."""
+    bundle = optimized_person_extractor
+    extractor: LangStruct = bundle["extractor"]
+
+    test_text = (
+        "Dr. Emily Davis is a 38-year-old physician based in Austin, Texas, "
+        "where she leads the cardiology program at Central Health."
+    )
+
+    result = extractor.extract(test_text, validate=False, return_sources=False)
+
+    assert isinstance(result.entities, dict)
+    assert extractor.optimizer is not None
+    assert getattr(extractor.optimizer, "optimizer", None) is not None
+    assert 0.0 <= result.confidence <= 1.0
+    assert any(str(v).strip() for v in result.entities.values())
+    assert result.metadata.get("pipeline") == "langstruct"
+
+
+@pytest.mark.integration
+def test_integration_save_load_after_optimization(optimized_person_extractor, tmp_path):
+    """Ensure optimized extractors persist and reload correctly."""
+    bundle = optimized_person_extractor
+    extractor: LangStruct = bundle["extractor"]
+    texts: List[str] = bundle["train_texts"]
+
+    save_path = tmp_path / "optimized_extractor"
+    extractor.save(str(save_path))
+
+    metadata_path = save_path / "langstruct_metadata.json"
+    with metadata_path.open("r", encoding="utf-8") as fh:
+        metadata = json.load(fh)
+
+    assert metadata["optimization_applied"] is True
+    assert metadata["optimizer_name"] == "miprov2"
+
+    loaded = LangStruct.load(str(save_path))
+    loaded_result = loaded.extract(texts[0], validate=False, return_sources=False)
+
+    assert isinstance(loaded_result.entities, dict)
+    assert loaded.optimizer is not None
+    assert any(str(v).strip() for v in loaded_result.entities.values())
+
+
+@pytest.mark.integration
+def test_integration_chunked_sources(person_schema, requires_api_key):
+    """Validate extraction with source grounding across multiple chunks."""
+    chunk_config = ChunkingConfig(
+        max_tokens=12,
+        overlap_tokens=4,
+        min_chunk_tokens=3,
+        preserve_paragraphs=False,
+        preserve_sentences=False,
+    )
+
+    extractor = LangStruct(schema=person_schema, chunking_config=chunk_config)
+
+    long_text = (
+        "Charlotte Rivera is a 41-year-old neurologist based in San Diego, "
+        "California. She leads the neuroscience unit at Horizon Medical Center. "
+        "Outside of work, Charlotte mentors students at the local university."
+    )
+
+    result = extractor.extract(long_text, validate=False, return_sources=True)
+
+    assert isinstance(result.entities, dict)
+    assert result.sources
+    assert result.metadata.get("total_chunks", 1) > 1
+    assert any(spans for spans in result.sources.values())
+
+
+@pytest.mark.integration
+def test_integration_query_parsing(person_schema, requires_api_key):
+    """Ensure query() returns structured output using the query parser."""
+    extractor = LangStruct(schema=person_schema)
+
+    query = "cardiologists in Seattle over 30"
+    parsed = extractor.query(query, explain=False)
+
+    assert parsed.raw_query == query
+    assert 0.0 <= parsed.confidence <= 1.0
+    assert parsed.metadata.get("parsed_by") == "llm"
+
+
+@pytest.mark.integration
+def test_integration_refinement_flow(person_schema, requires_api_key):
+    """Exercise refinement engine with conservative budget to limit cost."""
+    refine_config = Refine(
+        strategy="bon",
+        n_candidates=1,
+        max_refine_steps=1,
+        temperature=0.3,
+        budget=Budget(max_calls=1),
+    )
+
+    extractor = LangStruct(
+        schema=person_schema,
+        refine=refine_config,
+        use_sources=False,
+    )
+
+    text = (
+        "Dr. Olivia Chen is a 36-year-old cardiologist working at Bayview Medical "
+        "Center in San Francisco, California."
+    )
+
+    result = extractor.extract(text, validate=False, return_sources=False)
+
+    assert result.metadata.get("refinement_applied")
+    assert result.metadata.get("refinement_strategy") == refine_config.strategy
diff --git a/tests/test_persistence.py b/tests/test_persistence.py
index f539110..e9e0e64 100644
--- a/tests/test_persistence.py
+++ b/tests/test_persistence.py
@@ -369,7 +369,8 @@ def test_save_with_refinement_config(self):
 
     def test_metadata_with_optimization_flag(self):
         """Test that optimization flag is correctly saved in metadata."""
-        extractor = LangStruct(example={"name": "Alice", "age": 30}, optimize=True)
+        extractor = LangStruct(example={"name": "Alice", "age": 30})
+        extractor.optimizer = object()  # simulate optimization having run
 
         with tempfile.TemporaryDirectory() as temp_dir:
             save_path = Path(temp_dir) / "test_extractor"

From 37552044deb56f4503d5396e8a67be50cc3baf75 Mon Sep 17 00:00:00 2001
From: Quin Hoxie <qhoxie@gmail.com>
Date: Thu, 2 Oct 2025 14:40:39 -0700
Subject: [PATCH 2/2] Docs updates to clarify optimize usage.

---
 docs/src/content/docs/optimization.mdx | 25 ++++++++++++++++++-------
 docs/src/content/docs/quickstart.mdx   |  3 +--
 docs/src/content/docs/why-dspy.mdx     |  8 ++++----
 3 files changed, 23 insertions(+), 13 deletions(-)

diff --git a/docs/src/content/docs/optimization.mdx b/docs/src/content/docs/optimization.mdx
index 5ce19bf..c70ef38 100644
--- a/docs/src/content/docs/optimization.mdx
+++ b/docs/src/content/docs/optimization.mdx
@@ -74,8 +74,19 @@ Optimization can significantly improve accuracy on real-world tasks:
 
 ## Persisting Results
 
-Saving/loading an optimized extractor is not yet implemented.
-For now, re-run `optimize()` when you start up, or persist your training data and configuration.
+Save and load optimized extractors to reuse them without re-running optimization:
+
+```python
+# Save after optimization
+extractor.save("./my_extractor")
+
+# Load later
+from langstruct import LangStruct
+loaded = LangStruct.load("./my_extractor")
+
+# Use immediately - optimization is preserved
+result = loaded.extract("new text")
+```
 
 ## Advanced (If You Need It)
 
@@ -110,25 +121,25 @@ extractor.optimize(
 ## Common Questions
 
 **Q: Do I always need training data?**
-A: No! Optimization can work without training data, but providing examples improves results significantly.
+A: You need example texts, but not necessarily expected outputs. If you don't provide `expected_results`, LangStruct uses the LLM's confidence ratings to optimize. Providing expected outputs significantly improves accuracy.
 
 **Q: How long does optimization take?**
 A: Usually 1-5 minutes for typical datasets (10-100 examples).
 
 **Q: Can I optimize an already optimized extractor?**
-A: Yes! You can keep optimizing with new data as you get it.
+A: Yes, you can continue optimizing with new data as you collect it.
 
 **Q: Will this make my extractions slower?**
-A: No - optimization happens once during training. Production extraction speed is the same.
+A: No - optimization happens once during training. Production extraction speed is unchanged.
 
 **Q: What happens when I switch models?**
-A: Just change the model and re-optimize! Same training data, same accuracy - zero prompt rewriting needed.
+A: Change the model and re-optimize with the same training data. No prompt rewriting needed.
 
 ## Next Steps
 
 <CardGrid>
 	<Card title="Try It Now" icon="laptop">
-		Create a LangStruct extractor and enable optimization when you need accuracy!
+		Create a LangStruct extractor and enable optimization when you need accuracy.
 	</Card>
 	<Card title="Source Grounding" icon="document">
 		[Track where information comes from](/source-grounding/)
diff --git a/docs/src/content/docs/quickstart.mdx b/docs/src/content/docs/quickstart.mdx
index 62930af..7195e42 100644
--- a/docs/src/content/docs/quickstart.mdx
+++ b/docs/src/content/docs/quickstart.mdx
@@ -87,9 +87,8 @@ extractor = LangStruct(example=schema)
 # See optimization in action
 extractor.optimize(
     texts=["training texts..."],
-    expected=[{"expected outputs..."}]
+    expected_results=[{"expected outputs..."}]  # Optional - uses confidence if omitted
 )
-print(f"Optimized accuracy: {extractor.score:.1%}")
 ```
 
 ## Process Multiple Documents (with quotas)
diff --git a/docs/src/content/docs/why-dspy.mdx b/docs/src/content/docs/why-dspy.mdx
index 6936b02..ffe40cf 100644
--- a/docs/src/content/docs/why-dspy.mdx
+++ b/docs/src/content/docs/why-dspy.mdx
@@ -119,7 +119,7 @@ extractor = LangStruct(example={
 
 # 2. Let MIPROv2 optimize prompts and examples automatically
 extractor.optimize(
-    training_texts=["Apple reported $125B in Q3...", "Meta earned $40B..."],
+    texts=["Apple reported $125B in Q3...", "Meta earned $40B..."],
     expected_results=[
         {"company": "Apple", "revenue": 125.0, "quarter": "Q3"},
         {"company": "Meta", "revenue": 40.0, "quarter": "Q3"}
@@ -148,15 +148,15 @@ extractor = LangStruct(
     example={"company": "Apple", "revenue": 100.0},
     model="gpt-5-mini",
 )
-extractor.optimize(training_texts, expected_results)
+extractor.optimize(texts=training_texts, expected_results=expected_results)
 
 # 6 months later, switch to Claude - just two lines!
 extractor.model = "claude-3-7-sonnet-latest"
-extractor.optimize(training_texts, expected_results)  # Auto-reoptimizes prompts
+extractor.optimize(texts=training_texts, expected_results=expected_results)  # Auto-reoptimizes prompts
 
 # Or use local models for privacy
 extractor.model = "ollama/llama3.2"
-extractor.optimize(training_texts, expected_results)  # Works the same way
+extractor.optimize(texts=training_texts, expected_results=expected_results)  # Works the same way
 
 # Same accuracy, zero prompt rewriting, zero vendor lock-in
 ```