diff --git a/README.md b/README.md
index 0d70d35..4294624 100644
--- a/README.md
+++ b/README.md
@@ -205,8 +205,7 @@ Once you've got the basics working, there's more:
```python
extractor.optimize(
texts=your_examples,
- expected_results=expected_outputs,
- num_trials=50
+ expected_results=expected_outputs
)
```
diff --git a/docs/src/content/docs/examples/legal-contracts.mdx b/docs/src/content/docs/examples/legal-contracts.mdx
index d9492e2..2133b16 100644
--- a/docs/src/content/docs/examples/legal-contracts.mdx
+++ b/docs/src/content/docs/examples/legal-contracts.mdx
@@ -66,11 +66,11 @@ Create an extractor for legal document analysis:
extractor = LangStruct(
schema=LegalContractSchema,
model="gemini/gemini-2.5-flash-lite", # Fast and reliable for legal analysis
- optimize=True,
use_sources=True, # Critical for legal document traceability
temperature=0.1, # Lower temperature for consistency
max_retries=3 # Ensure reliability
)
+# Later: extractor.optimize(training_texts, expected_results)
# Example contract text
contract_text = """
diff --git a/docs/src/content/docs/examples/scientific-papers.mdx b/docs/src/content/docs/examples/scientific-papers.mdx
index 908de48..904be99 100644
--- a/docs/src/content/docs/examples/scientific-papers.mdx
+++ b/docs/src/content/docs/examples/scientific-papers.mdx
@@ -76,11 +76,11 @@ Create an extractor for research paper analysis:
extractor = LangStruct(
schema=ScientificPaperSchema,
model="gemini/gemini-2.5-flash-lite", # Fast and reliable for academic content
- optimize=True,
use_sources=True, # Track where information was found
temperature=0.2, # Slightly higher for nuanced interpretation
max_retries=3
)
+# Later: extractor.optimize(training_texts, expected_results)
# Example research paper text (excerpt)
paper_text = """
diff --git a/docs/src/content/docs/optimization.mdx b/docs/src/content/docs/optimization.mdx
index caa76ee..c70ef38 100644
--- a/docs/src/content/docs/optimization.mdx
+++ b/docs/src/content/docs/optimization.mdx
@@ -9,28 +9,28 @@ Make your extraction more accurate with automatic optimization. LangStruct learn
## The Easy Way
-**Enable optimization (configure optimizer) and then optimize with your data:**
+**Create an extractor (optionally choose the optimizer) and call `optimize()` when you're ready:**
```python
from langstruct import LangStruct
-# Create extractor with optimization enabled
extractor = LangStruct(
example={
"name": "Dr. Sarah Johnson",
"age": 34,
"occupation": "data scientist"
},
- optimize=True # sets up optimizer; run .optimize(...) to train
+ optimizer="miprov2", # default optimizer
)
+
+# Later, once you have training data:
+# extractor.optimize(texts=training_texts, expected_results=good_results)
```
-**Default behavior (faster startup, good baseline accuracy):**
+**Quick experiments (skip optimization entirely):**
```python
-# No optimization - good for quick experiments
extractor = LangStruct(example={"name": "John", "age": 30})
-# optimize=False by default - enables faster startup
```
## When You Have Training Data
@@ -74,8 +74,19 @@ Optimization can significantly improve accuracy on real-world tasks:
## Persisting Results
-Saving/loading an optimized extractor is not yet implemented.
-For now, re-run `optimize()` when you start up, or persist your training data and configuration.
+Save and load optimized extractors to reuse them without re-running optimization:
+
+```python
+# Save after optimization
+extractor.save("./my_extractor")
+
+# Load later
+from langstruct import LangStruct
+loaded = LangStruct.load("./my_extractor")
+
+# Use immediately - optimization is preserved
+result = loaded.extract("new text")
+```
## Advanced (If You Need It)
@@ -86,7 +97,6 @@ Most users don't need this, but if you want more control:
extractor.optimize(
texts=training_texts,
expected_results=good_results,
- num_trials=50, # More trials = better results (takes longer)
validation_split=0.3 # Use 30% for testing improvements
)
```
@@ -110,26 +120,26 @@ extractor.optimize(
## Common Questions
-**Q: Do I always need training data?**
-A: No! Optimization can work without training data, but providing examples improves results significantly.
+**Q: Do I always need training data?**
+A: You need example texts, but not necessarily expected outputs. If you don't provide `expected_results`, LangStruct uses the LLM's confidence ratings to optimize. Providing expected outputs significantly improves accuracy.
-**Q: How long does optimization take?**
+**Q: How long does optimization take?**
A: Usually 1-5 minutes for typical datasets (10-100 examples).
-**Q: Can I optimize an already optimized extractor?**
-A: Yes! You can keep optimizing with new data as you get it.
+**Q: Can I optimize an already optimized extractor?**
+A: Yes, you can continue optimizing with new data as you collect it.
-**Q: Will this make my extractions slower?**
-A: No - optimization happens once during training. Production extraction speed is the same.
+**Q: Will this make my extractions slower?**
+A: No - optimization happens once during training. Production extraction speed is unchanged.
-**Q: What happens when I switch models?**
-A: Just change the model and re-optimize! Same training data, same accuracy - zero prompt rewriting needed.
+**Q: What happens when I switch models?**
+A: Change the model and re-optimize with the same training data. No prompt rewriting needed.
## Next Steps
- Create a LangStruct extractor and enable optimization when you need accuracy!
+ Create a LangStruct extractor and enable optimization when you need accuracy.
[Track where information comes from](/source-grounding/)
diff --git a/docs/src/content/docs/persistence.mdx b/docs/src/content/docs/persistence.mdx
index 0405427..48e92da 100644
--- a/docs/src/content/docs/persistence.mdx
+++ b/docs/src/content/docs/persistence.mdx
@@ -46,10 +46,9 @@ print(result.entities)
```python
from langstruct import LangStruct
-# Create extractor with optimization
+# Create extractor
extractor = LangStruct(
example={"name": "John", "age": 30, "role": "engineer"},
- optimize=True
)
# Train the extractor
@@ -58,8 +57,7 @@ expected_results = [{"name": "Expected outputs..."}]
extractor.optimize(
texts=training_texts,
- expected_results=expected_results,
- num_trials=50
+ expected_results=expected_results
)
# Save optimized state
@@ -215,7 +213,7 @@ Common error scenarios:
```python
# Development: Train and save
-extractor = LangStruct(schema=MySchema, optimize=True)
+extractor = LangStruct(schema=MySchema)
extractor.optimize(training_data, expected_results)
extractor.save("./production_extractor")
diff --git a/docs/src/content/docs/query-parsing.mdx b/docs/src/content/docs/query-parsing.mdx
index c52d8f7..cddfa60 100644
--- a/docs/src/content/docs/query-parsing.mdx
+++ b/docs/src/content/docs/query-parsing.mdx
@@ -44,14 +44,14 @@ This single query contains **three distinct types of information**:
- Quarter: Q3 2024 (exact match)
- Revenue: > $100B (numeric comparison)
- Sector: Technology (category match)
-
+
These need **database-style filtering**, not semantic search
**Conceptual topics for similarity search:**
- "financial reports" (could be 10-K, earnings, statements)
- "AI investments" (could be ML, artificial intelligence, neural networks)
-
+
These need **embedding-based semantic search**
@@ -59,7 +59,7 @@ This single query contains **three distinct types of information**:
- "Show me" implies retrieval intent
- "companies" implies corporate entities
- Plural suggests multiple results expected
-
+
These provide **query understanding context**
@@ -86,14 +86,14 @@ results = vector_db.similarity_search(query_embedding)
**What they are:** Conceptual topics that benefit from semantic understanding
-
+
**Examples:**
- "artificial intelligence" ≈ "AI" ≈ "machine learning"
- "financial performance" ≈ "earnings" ≈ "fiscal results"
- "customer satisfaction" ≈ "user happiness" ≈ "client feedback"
-
+
**How they work:** Converted to embeddings for similarity matching
-
+
**Best for:**
- Finding conceptually related content
- Handling synonyms and variations
@@ -101,15 +101,15 @@ results = vector_db.similarity_search(query_embedding)
**What they are:** Exact constraints that must be precisely matched
-
+
**Examples:**
- Date/Time: "Q3 2024", "after 2023", "last 30 days"
- Numbers: "revenue > $100M", "5-10 employees", "top 3"
- Categories: "tech sector", "approved status", "high priority"
- Entities: "Apple Inc.", "California", "John Smith"
-
+
**How they work:** Converted to database-style filter operations
-
+
**Best for:**
- Enforcing hard constraints
- Filtering by exact values
@@ -129,7 +129,7 @@ Let's see how different queries naturally decompose:
- **Structured filters:** `{"quarter": "Q3 2024", "sector": "Technology", "profitable": true}`
- **Why it matters:** You want companies that ARE profitable (filter), not just ones that DISCUSS profitability
-#### Healthcare Query
+#### Healthcare Query
> "Patient records over 65 years old with diabetes showing improvement"
- **Semantic terms:** `["showing improvement", "better outcomes"]`
@@ -216,7 +216,7 @@ print("📖 Explanation:", result.explanation)
'revenue': {'$gte': 100.0}
}
💯 Confidence: 91.5%
-📖 Explanation:
+📖 Explanation:
Searching for: tech companies
With filters:
• quarter = Q3 2024
@@ -270,30 +270,30 @@ class EnhancedRAGSystem:
# Same schema for both extraction and parsing!
self.langstruct = LangStruct(example=schema_example)
self.vectorstore = Chroma(embedding_function=OpenAIEmbeddings())
-
+
def index_document(self, text: str):
"""Extract metadata and index document"""
# Extract structured metadata
extraction = self.langstruct.extract(text)
-
+
# Index with both text and metadata
self.vectorstore.add_texts(
texts=[text],
metadatas=[extraction.entities]
)
-
+
def natural_query(self, query: str, k: int = 5):
"""Query using natural language"""
# Parse query into components
parsed = self.langstruct.query(query)
-
+
# Perform hybrid search
results = self.vectorstore.similarity_search(
query=' '.join(parsed.semantic_terms),
k=k,
filter=parsed.structured_filters
)
-
+
return results, parsed.explanation
# Usage
@@ -407,13 +407,13 @@ ls = LangStruct(example=your_schema)
# Query with natural language
def smart_search(query: str):
parsed = ls.query(query)
-
+
results = collection.query(
query_texts=parsed.semantic_terms,
where=parsed.structured_filters,
n_results=10
)
-
+
return results
```
@@ -431,19 +431,19 @@ ls = LangStruct(example=your_schema)
# Natural language query
def pinecone_search(query: str):
parsed = ls.query(query)
-
+
# Convert to Pinecone filter format
pinecone_filter = {
- f"metadata.{k}": v
+ f"metadata.{k}": v
for k, v in parsed.structured_filters.items()
}
-
+
results = index.query(
vector=embed(parsed.semantic_terms),
filter=pinecone_filter,
top_k=10
)
-
+
return results
```
@@ -497,9 +497,8 @@ domain_ls = LangStruct(
# Include synonyms in descriptions
"earnings": 10.5, # Also covers "profits", "income"
},
- # Can optimize for better accuracy
- optimize=True
)
+# Call domain_ls.optimize(...) with training examples when ready
```
## Performance Considerations
@@ -512,7 +511,7 @@ from functools import lru_cache
class CachedLangStruct:
def __init__(self, schema):
self.ls = LangStruct(example=schema)
-
+
@lru_cache(maxsize=1000)
def query_cached(self, query: str):
"""Cache frequently used queries"""
diff --git a/docs/src/content/docs/quickstart.mdx b/docs/src/content/docs/quickstart.mdx
index e6418b8..7195e42 100644
--- a/docs/src/content/docs/quickstart.mdx
+++ b/docs/src/content/docs/quickstart.mdx
@@ -87,10 +87,8 @@ extractor = LangStruct(example=schema)
# See optimization in action
extractor.optimize(
texts=["training texts..."],
- expected=[{"expected outputs..."}],
- num_trials=50 # More trials = better accuracy
+ expected_results=[{"expected outputs..."}] # Optional - uses confidence if omitted
)
-print(f"Optimized accuracy: {extractor.score:.1%}")
```
## Process Multiple Documents (with quotas)
diff --git a/docs/src/content/docs/why-dspy.mdx b/docs/src/content/docs/why-dspy.mdx
index 338e96d..ffe40cf 100644
--- a/docs/src/content/docs/why-dspy.mdx
+++ b/docs/src/content/docs/why-dspy.mdx
@@ -119,7 +119,7 @@ extractor = LangStruct(example={
# 2. Let MIPROv2 optimize prompts and examples automatically
extractor.optimize(
- training_texts=["Apple reported $125B in Q3...", "Meta earned $40B..."],
+ texts=["Apple reported $125B in Q3...", "Meta earned $40B..."],
expected_results=[
{"company": "Apple", "revenue": 125.0, "quarter": "Q3"},
{"company": "Meta", "revenue": 40.0, "quarter": "Q3"}
@@ -147,17 +147,16 @@ result = extractor.extract("Microsoft announced $65B revenue for Q4")
extractor = LangStruct(
example={"company": "Apple", "revenue": 100.0},
model="gpt-5-mini",
- optimize=True
)
-extractor.optimize(training_texts, expected_results)
+extractor.optimize(texts=training_texts, expected_results=expected_results)
# 6 months later, switch to Claude - just two lines!
extractor.model = "claude-3-7-sonnet-latest"
-extractor.optimize(training_texts, expected_results) # Auto-reoptimizes prompts
+extractor.optimize(texts=training_texts, expected_results=expected_results) # Auto-reoptimizes prompts
# Or use local models for privacy
extractor.model = "ollama/llama3.2"
-extractor.optimize(training_texts, expected_results) # Works the same way
+extractor.optimize(texts=training_texts, expected_results=expected_results) # Works the same way
# Same accuracy, zero prompt rewriting, zero vendor lock-in
```
diff --git a/docs/src/content/docs/why-langstruct.mdx b/docs/src/content/docs/why-langstruct.mdx
index d0c2b96..6a7c687 100644
--- a/docs/src/content/docs/why-langstruct.mdx
+++ b/docs/src/content/docs/why-langstruct.mdx
@@ -155,7 +155,7 @@ extractor = LangExtract(...)
# Month 6: Switch to Claude - everything breaks!
# ❌ Prompts don't work the same way
-# ❌ Few-shot examples need rewriting
+# ❌ Few-shot examples need rewriting
# ❌ Back to manual tuning for weeks
# Month 12: Move to local Llama - start over again!
@@ -166,7 +166,7 @@ extractor = LangExtract(...)
### With LangStruct
```python
# Month 1: Set up once
-extractor = LangStruct(example=schema, optimize=True)
+extractor = LangStruct(example=schema)
extractor.optimize(training_data)
# Month 6: Switch to Claude
diff --git a/examples/06_rag_integration.py b/examples/06_rag_integration.py
index b98a270..99db1c8 100644
--- a/examples/06_rag_integration.py
+++ b/examples/06_rag_integration.py
@@ -69,8 +69,8 @@ def __init__(self, extraction_schema: Dict[str, Any]):
self.metadata_extractor = LangStruct(
example=extraction_schema,
# Model will use LangStruct's default unless specified
- optimize=True, # Enable auto-optimization
)
+ # Call self.metadata_extractor.optimize(...) later with labeled data if needed
except Exception as e:
raise ValueError(
f"Failed to initialize LangStruct: {e}. "
diff --git a/examples/07_optimization.py b/examples/07_optimization.py
index 980c3a1..c3528bf 100644
--- a/examples/07_optimization.py
+++ b/examples/07_optimization.py
@@ -27,8 +27,9 @@ def main():
print("=" * 40)
try:
- # Step 1: Create extractor with optimization enabled
- print("\n1️⃣ Creating extractor with auto-optimization...")
+ # Step 1: Create extractor
+ print()
+ print("1️⃣ Creating extractor...")
extractor = LangStruct(
example={
"person_name": "Dr. Sarah Johnson",
@@ -36,9 +37,8 @@ def main():
"years_experience": 8,
"specialization": "interventional cardiology",
},
- optimize=True, # Enable optimization for better accuracy
)
- print("✅ Extractor created with optimization enabled!")
+ print("✅ Extractor ready! Call optimize() once you have training data.")
# Step 2: Initial extraction (baseline)
print("\n2️⃣ Baseline extraction...")
@@ -143,7 +143,6 @@ def main():
extractor.optimize(
texts=training_texts,
expected_results=expected_results,
- num_trials=10, # More trials → better results (higher cost)
)
did_optimize = True
print(" ✅ Optimization complete!")
diff --git a/langstruct/api.py b/langstruct/api.py
index 2d504de..17d6984 100644
--- a/langstruct/api.py
+++ b/langstruct/api.py
@@ -22,7 +22,6 @@
PersistenceError,
ValidationError,
)
-from .optimizers.bootstrap import BootstrapOptimizer
from .optimizers.metrics import ExtractionMetrics
from .optimizers.mipro import MIPROv2Optimizer
from .parallel import ParallelProcessor, ProcessingResult
@@ -64,7 +63,6 @@ def __init__(
self,
schema: Optional[Type[Schema]] = None,
model: Optional[Union[str, dspy.LM]] = None,
- optimize: bool = False,
optimizer: str = "miprov2",
chunking_config: Optional[ChunkingConfig] = None,
use_sources: bool = True,
@@ -84,8 +82,7 @@ def __init__(
schema: Pydantic schema defining the extraction structure (optional)
model: Model name or DSPy LM instance (defaults to "gpt-5-mini"; pass
"gpt-5-mini"/"gpt-5-pro" for the latest OpenAI models)
- optimize: Whether to use automatic prompt optimization (default: False)
- optimizer: Optimizer to use ("miprov2", "bootstrap")
+ optimizer: Optimizer to use when optimize() runs (default: "miprov2")
chunking_config: Configuration for text chunking
use_sources: Whether to include source grounding (default: True)
example: Single example dict for auto schema generation (optional)
@@ -128,7 +125,6 @@ def __init__(
schema = ensure_schema_class(schema)
self.schema = schema
- self.optimize = optimize
self.optimizer_name = optimizer
self.chunking_config = chunking_config or ChunkingConfig()
self.use_sources = use_sources
@@ -166,7 +162,8 @@ def __init__(
# Initialize the extraction pipeline (robust to monkeypatched constructors)
pipeline_cls = core_modules.ExtractionPipeline
try:
- sig = inspect.signature(pipeline_cls)
+ # Inspect __init__ directly to get actual parameters (not dspy.Module's *args, **kwargs)
+ sig = inspect.signature(pipeline_cls.__init__)
except (TypeError, ValueError):
# Fallback if signature can't be inspected (e.g., C-extensions or mocks)
sig = None
@@ -198,10 +195,8 @@ def __init__(
except TypeError:
self.pipeline = pipeline_cls(schema)
- # Initialize optimizer if requested
+ # Optimizer is created lazily when optimize() is called
self.optimizer = None
- if optimize:
- self._initialize_optimizer()
# Initialize refinement engine if requested
self.refinement_engine = None
@@ -509,7 +504,13 @@ def _extract_single(
else self.refine_config
)
- if effective_refine and self.refinement_engine:
+ if effective_refine:
+ # Lazily initialize refinement engine if not already created
+ if self.refinement_engine is None:
+ self.refinement_engine = RefinementEngine(
+ self.schema, self.pipeline.extractor
+ )
+
# Run refinement process
refined_result, trace = self.refinement_engine(text, effective_refine)
result = refined_result
@@ -580,7 +581,6 @@ def optimize(
self,
texts: List[str],
expected_results: Optional[List[Dict]] = None,
- num_trials: int = 20,
validation_split: float = 0.2,
) -> "LangStruct":
"""Optimize extraction performance on provided data.
@@ -588,7 +588,6 @@ def optimize(
Args:
texts: Training texts for optimization
expected_results: Optional ground truth results for supervised optimization
- num_trials: Number of optimization trials to run
validation_split: Fraction of data to use for validation
Returns:
@@ -619,7 +618,6 @@ def optimize(
val_texts=val_texts or train_texts, # Use train if no val data
train_expected=train_expected,
val_expected=val_expected,
- num_trials=num_trials,
)
self.pipeline = optimized_pipeline
@@ -802,7 +800,7 @@ def save(self, path: str) -> None:
path: Directory path to save the extractor to (will be created if needed)
Example:
- >>> extractor = LangStruct(schema=PersonSchema, optimize=True)
+ >>> extractor = LangStruct(schema=PersonSchema)
>>> extractor.optimize(train_texts, expected_results)
>>> extractor.save("./my_extractor")
>>> # Creates directory with all extractor components
@@ -847,11 +845,9 @@ def _initialize_optimizer(self) -> None:
"""Initialize the appropriate optimizer."""
if self.optimizer_name.lower() == "miprov2":
self.optimizer = MIPROv2Optimizer()
- elif self.optimizer_name.lower() == "bootstrap":
- self.optimizer = BootstrapOptimizer()
else:
raise ValueError(
- f"Unknown optimizer: {self.optimizer_name}. Supported optimizers: miprov2, bootstrap"
+ f"Unknown optimizer: {self.optimizer_name}. Only 'miprov2' is supported."
)
def _parse_refine_config(
@@ -1114,5 +1110,5 @@ def __repr__(self) -> str:
return (
f"LangStruct(schema={self.schema.__name__}, "
f"model={self.lm.__class__.__name__}, "
- f"optimize={self.optimize})"
+ f"optimizer_initialized={self.optimizer is not None})"
)
diff --git a/langstruct/core/persistence.py b/langstruct/core/persistence.py
index 2606829..3d5a623 100644
--- a/langstruct/core/persistence.py
+++ b/langstruct/core/persistence.py
@@ -145,11 +145,10 @@ def load_extractor(cls, path: Union[str, Path]) -> "LangStruct":
extractor = LangStruct(
schema=schema_class,
model=metadata.model_name,
- optimize=False, # We'll handle optimization separately
chunking_config=chunking_config,
use_sources=metadata.use_sources,
**metadata.lm_config,
- )
+ ) # Optimizer state restored separately
except Exception as e:
raise PersistenceError(
f"Failed to recreate LangStruct instance. This may be due to missing API keys, "
@@ -445,12 +444,6 @@ def _restore_optimizer_state(
num_threads=optimizer_state.get("num_threads", 4),
**optimizer_state.get("kwargs", {}),
)
- elif optimizer_name == "bootstrap":
- from ..optimizers.bootstrap import BootstrapOptimizer
-
- extractor.optimizer = BootstrapOptimizer(
- **optimizer_state.get("kwargs", {})
- )
@classmethod
def _restore_refinement_state(
diff --git a/langstruct/core/validation.py b/langstruct/core/validation.py
index b2934d8..42b0a03 100644
--- a/langstruct/core/validation.py
+++ b/langstruct/core/validation.py
@@ -452,7 +452,9 @@ def _generate_suggestions(self, issues: List[ValidationIssue]) -> List[str]:
if IssueType.LOW_CONFIDENCE in issue_types:
suggestions.append("🎯 Try a more powerful model (e.g. gpt-5-mini)")
suggestions.append("📝 Add more detailed field descriptions")
- suggestions.append("🔄 Enable auto-optimization with optimize=True")
+ suggestions.append(
+ "🔄 Run extractor.optimize(...) with representative data"
+ )
if IssueType.MISSING_FIELDS in issue_types:
suggestions.append("❓ Make optional fields Optional[type] in schema")
diff --git a/langstruct/optimizers/__init__.py b/langstruct/optimizers/__init__.py
index a79255f..e4209de 100644
--- a/langstruct/optimizers/__init__.py
+++ b/langstruct/optimizers/__init__.py
@@ -1,7 +1,6 @@
"""Optimization functionality using DSPy optimizers."""
-from .bootstrap import BootstrapOptimizer
from .metrics import ExtractionMetrics
from .mipro import MIPROv2Optimizer
-__all__ = ["MIPROv2Optimizer", "BootstrapOptimizer", "ExtractionMetrics"]
+__all__ = ["MIPROv2Optimizer", "ExtractionMetrics"]
diff --git a/langstruct/optimizers/bootstrap.py b/langstruct/optimizers/bootstrap.py
deleted file mode 100644
index 7599ffc..0000000
--- a/langstruct/optimizers/bootstrap.py
+++ /dev/null
@@ -1,55 +0,0 @@
-"""Bootstrap optimizer for few-shot example generation."""
-
-import logging
-from typing import Any, Dict, List, Optional
-
-from ..core.modules import ExtractionPipeline
-
-logger = logging.getLogger(__name__)
-
-
-class BootstrapOptimizer:
- """DSPy Bootstrap optimizer for generating few-shot examples."""
-
- def __init__(self, max_bootstrapped_demos: int = 8, max_labeled_demos: int = 16):
- """Initialize Bootstrap optimizer.
-
- Args:
- max_bootstrapped_demos: Maximum number of bootstrapped examples
- max_labeled_demos: Maximum number of labeled examples to use
- """
- self.max_bootstrapped_demos = max_bootstrapped_demos
- self.max_labeled_demos = max_labeled_demos
-
- def optimize(
- self,
- pipeline: ExtractionPipeline,
- train_texts: List[str],
- val_texts: List[str],
- train_expected: Optional[List[Dict]] = None,
- val_expected: Optional[List[Dict]] = None,
- num_trials: int = 20,
- ) -> ExtractionPipeline:
- """Optimize extraction pipeline using Bootstrap few-shot learning.
-
- Args:
- pipeline: Extraction pipeline to optimize
- train_texts: Training texts
- val_texts: Validation texts
- train_expected: Expected results for training (optional)
- val_expected: Expected results for validation (optional)
- num_trials: Number of optimization trials
-
- Returns:
- Optimized extraction pipeline
- """
- # TODO: Implement Bootstrap optimization
- # This will use DSPy's BootstrapFewShot to automatically
- # generate good few-shot examples
-
- logger.info("Bootstrap optimization not yet implemented")
- logger.info("Would bootstrap %d examples", self.max_bootstrapped_demos)
- logger.info("From %d training examples", len(train_texts))
-
- # For now, return the original pipeline
- return pipeline
diff --git a/tests/conftest.py b/tests/conftest.py
index 258ff9d..5f2ac28 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -149,7 +149,7 @@ def setup_test_environment():
import dspy
if GOOGLE_API_KEY:
- dspy.configure(lm=dspy.LM("gemini/gemini-2.5-flash"))
+ dspy.configure(lm=dspy.LM("gemini/gemini-2.5-flash-lite"))
print(f"\n✅ Running tests with Gemini 2.5 Flash")
elif OPENAI_API_KEY:
dspy.configure(lm=dspy.LM("openai/gpt-4o-mini"))
diff --git a/tests/test_api.py b/tests/test_api.py
index 9d1e8f9..0b30ff0 100644
--- a/tests/test_api.py
+++ b/tests/test_api.py
@@ -21,16 +21,14 @@ def test_basic_initialization(self, person_schema, mock_extraction_pipeline):
assert issubclass(extractor.schema, person_schema)
assert extractor.schema is not person_schema
assert extractor.use_sources is True # Default
- assert extractor.optimize is False # Default
+ assert extractor.optimizer is None
def test_initialization_with_options(self, person_schema, mock_extraction_pipeline):
"""Test LangStruct initialization with custom options."""
- extractor = LangStruct(
- schema=person_schema, model="gpt-4o", optimize=True, use_sources=False
- )
+ extractor = LangStruct(schema=person_schema, model="gpt-4o", use_sources=False)
assert issubclass(extractor.schema, person_schema)
- assert extractor.optimize is True
+ assert extractor.optimizer is None
assert extractor.use_sources is False
@integration_test
@@ -155,7 +153,7 @@ def test_constructor_with_schema(self, person_schema, mock_extraction_pipeline):
extractor = LangStruct(schema=person_schema)
assert issubclass(extractor.schema, person_schema)
- assert extractor.optimize is False # Default behavior
+ assert extractor.optimizer is None
assert extractor.use_sources is True # Should be enabled by auto
def test_schema_wrapping_enforces_extra_forbid(self, mock_extraction_pipeline):
@@ -204,7 +202,7 @@ def test_constructor_with_example(
extractor = LangStruct(example=person_example_data)
assert extractor.schema is not None
- assert extractor.optimize is False # Default behavior
+ assert extractor.optimizer is None
assert extractor.use_sources is True
def test_constructor_no_input(self, mock_extraction_pipeline):
@@ -308,12 +306,12 @@ def test_source_grounding_override(
def test_repr(self, person_schema, mock_extraction_pipeline):
"""Test __repr__ method."""
- extractor = LangStruct(schema=person_schema, optimize=True)
+ extractor = LangStruct(schema=person_schema)
repr_str = repr(extractor)
assert "LangStruct" in repr_str
assert "PersonSchema" in repr_str
- assert "optimize=True" in repr_str
+ assert "optimizer_initialized=False" in repr_str
def test_save_load_basic_functionality(
self, person_schema, mock_extraction_pipeline
@@ -342,23 +340,14 @@ def test_save_load_basic_functionality(
assert loaded is not None
assert issubclass(loaded.schema, person_schema)
- def test_optimization_setup(self, person_schema, mock_extraction_pipeline):
- """Test optimizer initialization."""
- # Test with MIPROv2
- extractor1 = LangStruct(
- schema=person_schema, optimize=True, optimizer="miprov2"
- )
- assert extractor1.optimizer is not None
-
- # Test with Bootstrap
- extractor2 = LangStruct(
- schema=person_schema, optimize=True, optimizer="bootstrap"
- )
- assert extractor2.optimizer is not None
+ def test_optimize_raises_for_invalid_optimizer(
+ self, person_schema, mock_extraction_pipeline
+ ):
+ """Ensure invalid optimizer names raise when optimization runs."""
+ extractor = LangStruct(schema=person_schema, optimizer="invalid")
- # Test with invalid optimizer
with pytest.raises(ValueError, match="Unknown optimizer"):
- LangStruct(schema=person_schema, optimize=True, optimizer="invalid")
+ extractor.optimize(["text"])
def test_optimization_default_disabled(
self, person_schema, mock_extraction_pipeline
@@ -367,7 +356,7 @@ def test_optimization_default_disabled(
extractor = LangStruct(schema=person_schema)
# Optimization should be disabled by default now
- assert extractor.optimize is False
+ assert extractor.optimizer is None
def test_evaluate_placeholder(self, person_schema, mock_extraction_pipeline):
"""Test evaluate method (currently placeholder)."""
@@ -523,7 +512,7 @@ def test_auto_configuration_workflow(self, mock_extraction_pipeline):
extractor = LangStruct(example=example)
# Verify default settings
- assert extractor.optimize is False # Default behavior
+ assert extractor.optimizer is None
assert extractor.use_sources is True
# Should work for extraction
diff --git a/tests/test_integration_workflows.py b/tests/test_integration_workflows.py
new file mode 100644
index 0000000..9ca542b
--- /dev/null
+++ b/tests/test_integration_workflows.py
@@ -0,0 +1,171 @@
+"""Slow integration tests that hit real LLM providers when API keys are configured."""
+
+from __future__ import annotations
+
+import json
+from typing import Dict, List, Tuple
+
+import pytest
+
+from langstruct import LangStruct
+from langstruct.core.chunking import ChunkingConfig
+from langstruct.core.refinement import Budget, Refine
+
+
+@pytest.fixture(scope="module")
+def optimization_dataset() -> Tuple[List[str], List[Dict[str, object]]]:
+ """Provide lightweight training data for integration optimization runs."""
+ texts = [
+ """\
+ Alice Johnson is a 29-year-old data scientist based in Seattle, Washington.
+ She leads the analytics team at BlueSky Labs and mentors junior engineers.
+ """.strip(),
+ ]
+
+ labels = [
+ {"name": "Alice Johnson", "age": 29, "location": "Seattle, Washington"},
+ ]
+
+ return texts, labels
+
+
+@pytest.fixture
+def optimized_person_extractor(
+ person_schema,
+ optimization_dataset,
+ requires_api_key,
+):
+ """Create a LangStruct instance that has been optimized against the dataset."""
+ texts, labels = optimization_dataset
+
+ extractor = LangStruct(
+ schema=person_schema,
+ optimizer="miprov2",
+ use_sources=False, # keep requests smaller for integration runs
+ )
+
+ extractor.optimize(
+ texts=texts,
+ expected_results=labels,
+ validation_split=0.0,
+ )
+
+ return {
+ "extractor": extractor,
+ "train_texts": texts,
+ "expected_results": labels,
+ }
+
+
+@pytest.mark.integration
+def test_integration_optimize_smoke(optimized_person_extractor):
+ """End-to-end smoke test covering optimize() and extraction afterwards."""
+ bundle = optimized_person_extractor
+ extractor: LangStruct = bundle["extractor"]
+
+ test_text = (
+ "Dr. Emily Davis is a 38-year-old physician based in Austin, Texas, "
+ "where she leads the cardiology program at Central Health."
+ )
+
+ result = extractor.extract(test_text, validate=False, return_sources=False)
+
+ assert isinstance(result.entities, dict)
+ assert extractor.optimizer is not None
+ assert getattr(extractor.optimizer, "optimizer", None) is not None
+ assert 0.0 <= result.confidence <= 1.0
+ assert any(str(v).strip() for v in result.entities.values())
+ assert result.metadata.get("pipeline") == "langstruct"
+
+
+@pytest.mark.integration
+def test_integration_save_load_after_optimization(optimized_person_extractor, tmp_path):
+ """Ensure optimized extractors persist and reload correctly."""
+ bundle = optimized_person_extractor
+ extractor: LangStruct = bundle["extractor"]
+ texts: List[str] = bundle["train_texts"]
+
+ save_path = tmp_path / "optimized_extractor"
+ extractor.save(str(save_path))
+
+ metadata_path = save_path / "langstruct_metadata.json"
+ with metadata_path.open("r", encoding="utf-8") as fh:
+ metadata = json.load(fh)
+
+ assert metadata["optimization_applied"] is True
+ assert metadata["optimizer_name"] == "miprov2"
+
+ loaded = LangStruct.load(str(save_path))
+ loaded_result = loaded.extract(texts[0], validate=False, return_sources=False)
+
+ assert isinstance(loaded_result.entities, dict)
+ assert loaded.optimizer is not None
+ assert any(str(v).strip() for v in loaded_result.entities.values())
+
+
+@pytest.mark.integration
+def test_integration_chunked_sources(person_schema, requires_api_key):
+ """Validate extraction with source grounding across multiple chunks."""
+ chunk_config = ChunkingConfig(
+ max_tokens=12,
+ overlap_tokens=4,
+ min_chunk_tokens=3,
+ preserve_paragraphs=False,
+ preserve_sentences=False,
+ )
+
+ extractor = LangStruct(schema=person_schema, chunking_config=chunk_config)
+
+ long_text = (
+ "Charlotte Rivera is a 41-year-old neurologist based in San Diego, "
+ "California. She leads the neuroscience unit at Horizon Medical Center. "
+ "Outside of work, Charlotte mentors students at the local university."
+ )
+
+ result = extractor.extract(long_text, validate=False, return_sources=True)
+
+ assert isinstance(result.entities, dict)
+ assert result.sources
+ assert result.metadata.get("total_chunks", 1) > 1
+ assert any(spans for spans in result.sources.values())
+
+
+@pytest.mark.integration
+def test_integration_query_parsing(person_schema, requires_api_key):
+ """Ensure query() returns structured output using the query parser."""
+ extractor = LangStruct(schema=person_schema)
+
+ query = "cardiologists in Seattle over 30"
+ parsed = extractor.query(query, explain=False)
+
+ assert parsed.raw_query == query
+ assert 0.0 <= parsed.confidence <= 1.0
+ assert parsed.metadata.get("parsed_by") == "llm"
+
+
+@pytest.mark.integration
+def test_integration_refinement_flow(person_schema, requires_api_key):
+ """Exercise refinement engine with conservative budget to limit cost."""
+ refine_config = Refine(
+ strategy="bon",
+ n_candidates=1,
+ max_refine_steps=1,
+ temperature=0.3,
+ budget=Budget(max_calls=1),
+ )
+
+ extractor = LangStruct(
+ schema=person_schema,
+ refine=refine_config,
+ use_sources=False,
+ )
+
+ text = (
+ "Dr. Olivia Chen is a 36-year-old cardiologist working at Bayview Medical "
+ "Center in San Francisco, California."
+ )
+
+ result = extractor.extract(text, validate=False, return_sources=False)
+
+ assert result.metadata.get("refinement_applied")
+ assert result.metadata.get("refinement_strategy") == refine_config.strategy
diff --git a/tests/test_persistence.py b/tests/test_persistence.py
index f539110..e9e0e64 100644
--- a/tests/test_persistence.py
+++ b/tests/test_persistence.py
@@ -369,7 +369,8 @@ def test_save_with_refinement_config(self):
def test_metadata_with_optimization_flag(self):
"""Test that optimization flag is correctly saved in metadata."""
- extractor = LangStruct(example={"name": "Alice", "age": 30}, optimize=True)
+ extractor = LangStruct(example={"name": "Alice", "age": 30})
+ extractor.optimizer = object() # simulate optimization having run
with tempfile.TemporaryDirectory() as temp_dir:
save_path = Path(temp_dir) / "test_extractor"