SIMOUNIX · SIMOUNIX · Feb 2, 2026 · Feb 2, 2026 · Feb 2, 2026
diff --git a/.env.template b/.env.template
@@ -1,25 +1,3 @@
 # LLM API Keys
 ANTHROPIC_API_KEY=sk-ant-your-key-here
 OPENAI_API_KEY=sk-your-key-here
-
-# Database
-DATABASE_URL=sqlite:///./data/harvestor.db
-
-# Cost Limits
-MAX_COST_PER_DOCUMENT=0.50
-DAILY_COST_LIMIT=100.00
-
-# Models
-DEFAULT_EXTRACTION_MODEL=claude-haiku-4-5-20251001
-DEFAULT_VALIDATION_MODEL=claude-sonnet-4-5-20250929
-
-# OCR Settings
-ENABLE_TESSERACT_PREPROCESSING=true
-OCR_DPI=300
-OCR_LANGUAGES=eng+fra+deu+spa
-
-# Features
-USE_LAYOUT_ANALYSIS=true
-USE_TABLE_EXTRACTION=true
-USE_KEYWORD_PROXIMITY=true
-ENABLE_CACHING=true
diff --git a/example.py b/example.py
@@ -1,16 +1,16 @@
 from typing import Optional
+
 from dotenv import load_dotenv
 from pydantic import BaseModel, Field
 
-from harvestor import Harvestor  # , harvest
-import os
+from harvestor import Harvestor, list_models
 
 load_dotenv()
 
 
-class SimpleInoviceModelSchema(BaseModel):
+class SimpleInvoiceSchema(BaseModel):
     """
-    Implement the schema you want as output. Customise for each document types.
+    Implement the schema you want as output. Customize for each document type.
     """
 
     vendor: Optional[str] = Field(None, description="The vendor name")
@@ -20,16 +20,22 @@ class SimpleInoviceModelSchema(BaseModel):
     customer_lastname: Optional[str] = Field(None, description="The customer lastname")
 
 
-ANTHROPIC_API_KEY = os.environ.get("ANTHROPIC_API_KEY")
+# List available models
+print("Available models:", list(list_models().keys()))
 
-h = Harvestor(api_key=ANTHROPIC_API_KEY, model="Claude Haiku 3")
+# Use default model (claude-haiku)
+h = Harvestor(model="claude-haiku")
 
 output = h.harvest_file(
-    source="data/uploads/keep_for_test.jpg", schema=SimpleInoviceModelSchema
+    source="data/uploads/keep_for_test.jpg", schema=SimpleInvoiceSchema
 )
 
 print(output.to_summary())
 
-# output_2 = harvest("data/uploads/keep_for_test.jpg", schema=SimpleInoviceModelSchema)
+# Alternative: use OpenAI
+# h_openai = Harvestor(model="gpt-4o-mini")
+# output = h_openai.harvest_file("invoice.jpg", schema=SimpleInvoiceSchema)
 
-# print(output_2.to_summary())
+# Alternative: use local Ollama (free)
+# h_ollama = Harvestor(model="llava")
+# output = h_ollama.harvest_file("invoice.jpg", schema=SimpleInvoiceSchema)
diff --git a/pyproject.toml b/pyproject.toml
@@ -26,6 +26,7 @@ dependencies = [
     "langchain-openai>=0.0.5",
     "anthropic>=0.18.0",
     "openai>=1.10.0",
+    "httpx>=0.27.0",  # For Ollama provider
 
     # Document Processing
     "PyMuPDF>=1.23.0",

diff --git a/pytest.ini b/pytest.ini
@@ -24,9 +24,5 @@ markers =
     slow: Slow running tests
     vision: Tests that use vision API
 
-# Coverage options (if using pytest-cov)
-# Uncomment when pytest-cov is installed
-# addopts = --cov=src/harvestor --cov-report=term-missing --cov-report=html
-
 # Minimum Python version
 minversion = 3.10
diff --git a/src/harvestor/__init__.py b/src/harvestor/__init__.py
@@ -2,6 +2,7 @@
 Harvestor - Harvest intelligence from any document
 
 Extract structured data from any document with AI-powered extraction.
+Supports multiple LLM providers: Anthropic, OpenAI, and Ollama.
 """
 
 __version__ = "0.1.0"
@@ -14,6 +15,20 @@
 from .config import SUPPORTED_MODELS
 from .core.cost_tracker import cost_tracker
 from .core.harvestor import Harvestor, harvest
+from .providers import (
+    DEFAULT_MODEL,
+    MODELS,
+    PROVIDERS,
+    AnthropicProvider,
+    BaseLLMProvider,
+    CompletionResult,
+    ModelInfo,
+    OllamaProvider,
+    OpenAIProvider,
+    get_provider,
+    list_models,
+    list_providers,
+)
 from .schemas.base import (
     ExtractionResult,
     ExtractionStrategy,
@@ -39,4 +54,17 @@
     "LineItem",
     # Config
     "SUPPORTED_MODELS",
+    "MODELS",
+    "DEFAULT_MODEL",
+    # Providers
+    "PROVIDERS",
+    "BaseLLMProvider",
+    "CompletionResult",
+    "ModelInfo",
+    "AnthropicProvider",
+    "OpenAIProvider",
+    "OllamaProvider",
+    "get_provider",
+    "list_models",
+    "list_providers",
 ]
diff --git a/src/harvestor/cli/main.py b/src/harvestor/cli/main.py
@@ -7,7 +7,7 @@
 import sys
 from pathlib import Path
 
-from harvestor import harvest
+from harvestor import DEFAULT_MODEL, harvest, list_models
 from harvestor.schemas.defaults import InvoiceData, ReceiptData
 
 
@@ -20,17 +20,19 @@ def build_parser():
     parser.add_argument(
         "file_path",
         type=Path,
+        nargs="?",
         help="Path to the document to process",
     )
     parser.add_argument(
         "schema",
+        nargs="?",
         help="Schema to use (e.g., InvoiceData, ReceiptData)",
     )
     parser.add_argument(
         "-m",
         "--model",
-        default="Claude Haiku 3",
-        help="Model to use (default: Claude Haiku 3)",
+        default=DEFAULT_MODEL,
+        help=f"Model to use (default: {DEFAULT_MODEL})",
     )
     parser.add_argument(
         "-o",
@@ -43,13 +45,22 @@ def build_parser():
         action="store_true",
         help="Pretty print JSON output",
     )
+    parser.add_argument(
+        "--list-models",
+        action="store_true",
+        help="List available models and exit",
+    )
+    parser.add_argument(
+        "--list-schemas",
+        action="store_true",
+        help="List available schemas and exit",
+    )
 
     return parser
 
 
 def get_schema(schema_name: str):
     """Resolve schema name to actual schema class."""
-
     schemas = {
         "InvoiceData": InvoiceData,
         "ReceiptData": ReceiptData,
@@ -62,10 +73,70 @@ def get_schema(schema_name: str):
     return schemas[schema_name]
 
 
+def print_models():
+    """Print available models grouped by provider."""
+    models = list_models()
+
+    providers = {}
+    for name, info in models.items():
+        provider = info.get("provider", "unknown")
+        if provider not in providers:
+            providers[provider] = []
+        providers[provider].append((name, info))
+
+    print("\nAvailable models:")
+    print("=" * 50)
+
+    for provider, model_list in sorted(providers.items()):
+        print(f"\n{provider.upper()}:")
+        for name, info in sorted(model_list):
+            vision = " (vision)" if info.get("supports_vision") else ""
+            cost = info.get("input_cost", 0)
+            if cost == 0:
+                cost_str = "free"
+            else:
+                cost_str = f"${cost}/M tokens"
+            print(f"  {name:<20} {cost_str}{vision}")
+
+    print(f"\nDefault: {DEFAULT_MODEL}")
+    print()
+
+
+def print_schemas():
+    """Print available schemas."""
+    schemas = {
+        "InvoiceData": InvoiceData,
+        "ReceiptData": ReceiptData,
+    }
+
+    print("\nAvailable schemas:")
+    print("=" * 50)
+
+    for name, schema in schemas.items():
+        doc = schema.__doc__ or "No description"
+        print(f"  {name}: {doc.strip().split(chr(10))[0]}")
+
+    print()
+
+
 def main():
     parser = build_parser()
     args = parser.parse_args()
 
+    if args.list_models:
+        print_models()
+        sys.exit(0)
+
+    if args.list_schemas:
+        print_schemas()
+        sys.exit(0)
+
+    if not args.file_path:
+        parser.error("file_path is required")
+
+    if not args.schema:
+        parser.error("schema is required")
+
     if not args.file_path.exists():
         print(f"Error: File not found: {args.file_path}", file=sys.stderr)
         sys.exit(1)

diff --git a/src/harvestor/config.py b/src/harvestor/config.py
@@ -1,28 +1,19 @@
-SUPPORTED_MODELS = {
-    # Anthropic Claude
-    "Claude Haiku 3": {"id": "claude-3-haiku-20240307", "input": 0.25, "output": 1.25},
-    "Claude Haiku 4.5": {
-        "id": "claude-haiku-4-5-20251001",
-        "input": 1.0,
-        "output": 5.0,
-    },
-    "Claude Sonnet 3.7": {
-        "id": "claude-3-7-sonnet-20250219",
-        "input": 3.0,
-        "output": 15.0,
-    },
-    "Claude Sonnet 4.5": {
-        "id": "claude-sonnet-4-5-20250929",
-        "input": 3.0,
-        "output": 15.0,
-    },
-    "Claude Opus 4,5": {
-        "id": "claude-opus-4-5-20251101",
-        "input": 5.0,
-        "output": 25.0,
-    },  # very good stuff
-    # OpenAI TODO: check OpenAI models
-    # "gpt-3.5-turbo": {"input": 0.50, "output": 1.50},
-    "gpt-4": {"input": 30.0, "output": 60.0},
-    # "gpt-4-turbo": {"input": 10.0, "output": 30.0},
-}
+"""
+Configuration for Harvestor.
+
+Model definitions are now managed in the providers module.
+This file re-exports them for backwards compatibility.
+"""
+
+from .providers import DEFAULT_MODEL, MODELS, list_models, list_providers
+
+# Backwards compatibility alias
+SUPPORTED_MODELS = MODELS
+
+__all__ = [
+    "MODELS",
+    "SUPPORTED_MODELS",
+    "DEFAULT_MODEL",
+    "list_models",
+    "list_providers",
+]
diff --git a/src/harvestor/core/cost_tracker.py b/src/harvestor/core/cost_tracker.py
@@ -12,7 +12,6 @@
 from typing import Dict, List, Optional
 
 from ..schemas.base import CostReport, ExtractionStrategy
-from ..config import SUPPORTED_MODELS
 
 
 @dataclass
@@ -107,14 +106,15 @@ def calculate_cost(
         self, model: str, input_tokens: int, output_tokens: int
     ) -> float:
         """Calculate cost for a given API call."""
-        if model not in SUPPORTED_MODELS:
-            # Unknown model, use conservative estimate (GPT-4 pricing)
-            raise ModelNotSupported(f"Model {model} is not supported.")
-        else:
-            pricing = SUPPORTED_MODELS[model]
+        from ..providers import MODELS
 
-        input_cost = (input_tokens / 1_000_000) * pricing["input"]
-        output_cost = (output_tokens / 1_000_000) * pricing["output"]
+        if model not in MODELS:
+            # Unknown model (possibly Ollama custom), assume free
+            return 0.0
+
+        pricing = MODELS[model]
+        input_cost = (input_tokens / 1_000_000) * pricing["input_cost"]
+        output_cost = (output_tokens / 1_000_000) * pricing["output_cost"]
 
         return input_cost + output_cost
 
@@ -263,19 +263,12 @@ def generate_report(self, days: int = 7) -> CostReport:
 
         # Calculate costs
         total_cost = sum(c.cost for c in recent_calls)
-        free_successes = 0
-        llm_calls = len(
-            [
-                c
-                for c in recent_calls
-                if c.strategy
-                in {
-                    ExtractionStrategy.LLM_HAIKU,
-                    ExtractionStrategy.LLM_SONNET,
-                    ExtractionStrategy.LLM_GPT35,
-                }
-            ]
+        free_successes = sum(
+            1
+            for c in recent_calls
+            if c.strategy == ExtractionStrategy.LLM_OLLAMA and c.success
         )
+        llm_calls = len(recent_calls)
 
         # Cost by strategy
         cost_by_strategy: Dict[str, float] = {}