docs: fix documentation links and add missing sections

Co-Authored-By: jason@jxnl.co <jason@jxnl.co>
instructor-ai · Dec 15, 2024 · 5a6064b · 5a6064b
1 parent 8b65edb
commit 5a6064b
Show file tree

Hide file tree

Showing 12 changed files with 312 additions and 278 deletions.
diff --git a/conftest.py b/conftest.py
@@ -0,0 +1,12 @@
+import pytest  # noqa: F401
+from _pytest.config import Config
+
+def pytest_configure(config: Config) -> None:
+    config.addinivalue_line(
+        "markers",
+        "requires_openai: mark test as requiring OpenAI API credentials",
+    )
+    config.addinivalue_line(
+        "markers",
+        "requires_mistral: mark test as requiring Mistral API credentials",
+    )
diff --git a/docs/examples/bulk_classification.md b/docs/examples/bulk_classification.md
@@ -268,6 +268,85 @@ async def tag_request(request: TagRequest) -> TagResponse:
         predictions=predictions,
     )
 
+## Working with DataFrames
+
+When working with large datasets, it's often convenient to use pandas DataFrames. Here's how you can integrate this classification system with pandas:
+
+```python
+import pandas as pd
+
+async def classify_dataframe(df: pd.DataFrame, text_column: str, tags: List[TagWithInstructions]) -> pd.DataFrame:
+    request = TagRequest(
+        texts=df[text_column].tolist(),
+        tags=tags
+    )
+    response = await tag_request(request)
+    df['predicted_tag'] = [pred.name for pred in response.predictions]
+    return df
+```
+
+## Streaming Responses
+
+For real-time processing, you can stream responses as they become available:
+
+```python
+async def stream_classifications(texts: List[str], tags: List[TagWithInstructions]):
+    async def process_single(text: str):
+        prediction = await tag_single_request(text, tags)
+        return {"text": text, "prediction": prediction}
+
+    tasks = [process_single(text) for text in texts]
+    for completed in asyncio.as_completed(tasks):
+        yield await completed
+```
+
+## Single-Label Classification
+
+For simple classification tasks where each text belongs to exactly one category:
+
+```python
+async def classify_single_label(text: str, tags: List[TagWithInstructions]) -> Tag:
+    return await tag_single_request(text, tags)
+```
+
+## Multi-Label Classification
+
+For cases where texts might belong to multiple categories:
+
+```python
+class MultiLabelTag(BaseModel):
+    tags: List[Tag]
+
+    @model_validator(mode="after")
+    def validate_tags(self, info: ValidationInfo):
+        context = info.context
+        if context and context.get("tags"):
+            valid_tags = context["tags"]
+            for tag in self.tags:
+                assert tag.id in {t.id for t in valid_tags}, f"Tag ID {tag.id} not found"
+                assert tag.name in {t.name for t in valid_tags}, f"Tag name {tag.name} not found"
+        return self
+
+async def classify_multi_label(text: str, tags: List[TagWithInstructions]) -> List[Tag]:
+    response = await client.chat.completions.create(
+        model="gpt-4",
+        messages=[
+            {"role": "system", "content": "You are a multi-label classification system."},
+            {"role": "user", "content": f"Classify this text into multiple categories: {text}"},
+            {"role": "user", "content": f"Available categories: {', '.join(t.name for t in tags)}"},
+        ],
+        response_model=MultiLabelTag,
+        validation_context={"tags": tags},
+    )
+    return response.tags
+```
+
+# Example Usage
+
+```python
+# PLACEHOLDER: existing example code
+```
+
 
 # <%hide%>
 tags = [

diff --git a/docs/examples/index.md b/docs/examples/index.md
@@ -37,7 +37,7 @@ Welcome to our collection of cookbooks showcasing the power of structured output
 26. [Action Items Extraction](action_items.md): Extract structured action items and tasks from text content.
 27. [Batch Classification with LangSmith](batch_classification_langsmith.md): Efficiently classify content in batches using LangSmith integration.
 28. [Contact Information Extraction](extract_contact_info.md): Extract structured contact details from unstructured text.
-29. [Knowledge Graph Building](building_knowledge_graph.md): Create and manipulate knowledge graphs from textual data.
+29. [Knowledge Graph Building](building_knowledge_graphs.md): Create and manipulate knowledge graphs from textual data.
 30. [Multiple Classification Tasks](multiple_classification.md): Handle multiple classification categories simultaneously.
 31. [Pandas DataFrame Integration](pandas_df.md): Work with structured data using Pandas DataFrames.
 32. [Partial Response Streaming](partial_streaming.md): Stream partial results for real-time processing.

diff --git a/docs/integrations/mistral.md b/docs/integrations/mistral.md
@@ -2,21 +2,24 @@
 draft: False
 date: 2024-02-26
 title: "Structured outputs with Mistral, a complete guide w/ instructor"
-description: "Complete guide to using Instructor with Mistral. Learn how to generate structured, type-safe outputs with Mistral."
+description: "Complete guide to using Instructor with Mistral. Learn how to generate structured, type-safe outputs with Mistral, including multimodal support with Pixtral."
 slug: mistral
 tags:
   - patching
+  - multimodal
 authors:
   - shanktt
 ---
 
 # Structured outputs with Mistral, a complete guide w/ instructor
 
-This guide demonstrates how to use Mistral with Instructor to generate structured outputs. You'll learn how to use function calling with Mistral Large to create type-safe responses.
+This guide demonstrates how to use Mistral with Instructor to generate structured outputs. You'll learn how to use function calling with Mistral Large to create type-safe responses, including support for multimodal inputs with Pixtral.
 
-Mistral Large is the flagship model from Mistral AI, supporting 32k context windows and functional calling abilities. Mistral Large's addition of [function calling](https://docs.mistral.ai/guides/function-calling/) makes it possible to obtain structured outputs using JSON schema.
+Mistral Large is the flagship model from Mistral AI, supporting 32k context windows and functional calling abilities. Mistral Large's addition of [function calling](https://docs.mistral.ai/guides/function-calling/) makes it possible to obtain structured outputs using JSON schema. With Pixtral, you can now also process images alongside text inputs.
 
-By the end of this blog post, you will learn how to effectively utilize Instructor with Mistral Large.
+By the end of this blog post, you will learn how to effectively utilize Instructor with Mistral Large and Pixtral for both text and image processing tasks.
+
+## Text Processing with Mistral Large
 
 ```python
 import os
@@ -47,5 +50,56 @@ resp = instructor_client.messages.create(
 )
 
 print(resp)
+```
+
+## Multimodal Processing with Pixtral
+
+```python
+import os
+from pydantic import BaseModel
+from mistralai import Mistral
+from instructor import from_mistral, Mode
+from instructor.multimodal import Image
+
+class ImageDescription(BaseModel):
+    description: str
+    objects: list[str]
+    colors: list[str]
+
+# Initialize the client with Pixtral model
+client = Mistral(api_key=os.environ.get("MISTRAL_API_KEY"))
+instructor_client = from_mistral(
+    client=client,
+    model="pixtral",  # Use Pixtral for multimodal capabilities
+    mode=Mode.MISTRAL_JSON,
+    max_tokens=1000,
+)
+
+# Load and process an image
+image = Image.from_path("path/to/your/image.jpg")
+resp = instructor_client.messages.create(
+    response_model=ImageDescription,
+    messages=[
+        {
+            "role": "user",
+            "content": [
+                "Describe this image in detail, including the main objects and colors present.",
+                image
+            ]
+        }
+    ],
+    temperature=0,
+)
 
+print(resp)
 ```
+
+## Image Requirements and Validation
+
+When working with images in Pixtral:
+- Supported formats: JPEG, PNG, GIF, WEBP
+- Maximum image size: 20MB
+- Images larger than the size limit will be automatically resized
+- Base64 and file paths are supported input formats
+
+The `Image` class handles all validation and preprocessing automatically, ensuring your images meet Mistral's requirements.
diff --git a/instructor/__init__.py b/instructor/__init__.py
@@ -1,9 +1,11 @@
+from __future__ import annotations
 import importlib.util
+from typing import Callable, Union, TypeVar
 
 from .mode import Mode
 from .process_response import handle_response_model
 from .distil import FinetuneFormat, Instructions
-from .multimodal import Image, Audio
+from .multimodal import Image
 from .dsl import (
     CitationMixin,
     Maybe,
@@ -23,10 +25,17 @@
     Provider,
 )
 
-__all__ = [
+T = TypeVar("T")
+
+# Type aliases for client functions
+ClientFunction = Union[
+    Callable[..., Union[Instructor, AsyncInstructor]],
+    None
+]
+
+__all__: list[str] = [
     "Instructor",
     "Image",
-    "Audio",
     "from_openai",
     "from_litellm",
     "AsyncInstructor",
@@ -48,51 +57,66 @@
     "handle_response_model",
 ]
 
-
+def _extend_all(new_items: list[str]) -> None:
+    global __all__
+    __all__ = __all__ + new_items
+
+# Initialize optional client functions with explicit types
+from_anthropic: ClientFunction = None
+from_gemini: ClientFunction = None
+from_fireworks: ClientFunction = None
+from_cerebras: ClientFunction = None
+from_groq: ClientFunction = None
+from_mistral: ClientFunction = None
+from_cohere: ClientFunction = None
+from_vertexai: ClientFunction = None
+from_writer: ClientFunction = None
+
+# Import optional clients
 if importlib.util.find_spec("anthropic") is not None:
-    from .client_anthropic import from_anthropic
-
-    __all__ += ["from_anthropic"]
+    from .client_anthropic import from_anthropic as _from_anthropic
+    globals()["from_anthropic"] = _from_anthropic
+    _extend_all(["from_anthropic"])
 
 if (
     importlib.util.find_spec("google")
     and importlib.util.find_spec("google.generativeai") is not None
 ):
-    from .client_gemini import from_gemini
-
-    __all__ += ["from_gemini"]
+    from .client_gemini import from_gemini as _from_gemini
+    globals()["from_gemini"] = _from_gemini
+    _extend_all(["from_gemini"])
 
 if importlib.util.find_spec("fireworks") is not None:
-    from .client_fireworks import from_fireworks
-
-    __all__ += ["from_fireworks"]
+    from .client_fireworks import from_fireworks as _from_fireworks
+    globals()["from_fireworks"] = _from_fireworks
+    _extend_all(["from_fireworks"])
 
 if importlib.util.find_spec("cerebras") is not None:
-    from .client_cerebras import from_cerebras
-
-    __all__ += ["from_cerebras"]
+    from .client_cerebras import from_cerebras as _from_cerebras
+    globals()["from_cerebras"] = _from_cerebras
+    _extend_all(["from_cerebras"])
 
 if importlib.util.find_spec("groq") is not None:
-    from .client_groq import from_groq
-
-    __all__ += ["from_groq"]
+    from .client_groq import from_groq as _from_groq
+    globals()["from_groq"] = _from_groq
+    _extend_all(["from_groq"])
 
 if importlib.util.find_spec("mistralai") is not None:
-    from .client_mistral import from_mistral
-
-    __all__ += ["from_mistral"]
+    from .client_mistral import from_mistral as _from_mistral
+    globals()["from_mistral"] = _from_mistral
+    _extend_all(["from_mistral"])
 
 if importlib.util.find_spec("cohere") is not None:
-    from .client_cohere import from_cohere
-
-    __all__ += ["from_cohere"]
+    from .client_cohere import from_cohere as _from_cohere
+    globals()["from_cohere"] = _from_cohere
+    _extend_all(["from_cohere"])
 
 if all(importlib.util.find_spec(pkg) for pkg in ("vertexai", "jsonref")):
-    from .client_vertexai import from_vertexai
-
-    __all__ += ["from_vertexai"]
+    from .client_vertexai import from_vertexai as _from_vertexai
+    globals()["from_vertexai"] = _from_vertexai
+    _extend_all(["from_vertexai"])
 
 if importlib.util.find_spec("writerai") is not None:
-    from .client_writer import from_writer
-
-    __all__ += ["from_writer"]
+    from .client_writer import from_writer as _from_writer
+    globals()["from_writer"] = _from_writer
+    _extend_all(["from_writer"])