Skip to content

Commit

Permalink
docs: fix documentation links and add missing sections
Browse files Browse the repository at this point in the history
Co-Authored-By: jason@jxnl.co <jason@jxnl.co>
  • Loading branch information
devin-ai-integration[bot] and jxnl committed Dec 15, 2024
1 parent 8b65edb commit 5a6064b
Show file tree
Hide file tree
Showing 12 changed files with 312 additions and 278 deletions.
12 changes: 12 additions & 0 deletions conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import pytest # noqa: F401
from _pytest.config import Config

def pytest_configure(config: Config) -> None:
config.addinivalue_line(
"markers",
"requires_openai: mark test as requiring OpenAI API credentials",
)
config.addinivalue_line(
"markers",
"requires_mistral: mark test as requiring Mistral API credentials",
)
79 changes: 79 additions & 0 deletions docs/examples/bulk_classification.md
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,85 @@ async def tag_request(request: TagRequest) -> TagResponse:
predictions=predictions,
)

## Working with DataFrames

When working with large datasets, it's often convenient to use pandas DataFrames. Here's how you can integrate this classification system with pandas:

```python
import pandas as pd

async def classify_dataframe(df: pd.DataFrame, text_column: str, tags: List[TagWithInstructions]) -> pd.DataFrame:
request = TagRequest(
texts=df[text_column].tolist(),
tags=tags
)
response = await tag_request(request)
df['predicted_tag'] = [pred.name for pred in response.predictions]
return df
```

## Streaming Responses

For real-time processing, you can stream responses as they become available:

```python
async def stream_classifications(texts: List[str], tags: List[TagWithInstructions]):
async def process_single(text: str):
prediction = await tag_single_request(text, tags)
return {"text": text, "prediction": prediction}

tasks = [process_single(text) for text in texts]
for completed in asyncio.as_completed(tasks):
yield await completed
```

## Single-Label Classification

For simple classification tasks where each text belongs to exactly one category:

```python
async def classify_single_label(text: str, tags: List[TagWithInstructions]) -> Tag:
return await tag_single_request(text, tags)
```

## Multi-Label Classification

For cases where texts might belong to multiple categories:

```python
class MultiLabelTag(BaseModel):
tags: List[Tag]

@model_validator(mode="after")
def validate_tags(self, info: ValidationInfo):
context = info.context
if context and context.get("tags"):
valid_tags = context["tags"]
for tag in self.tags:
assert tag.id in {t.id for t in valid_tags}, f"Tag ID {tag.id} not found"
assert tag.name in {t.name for t in valid_tags}, f"Tag name {tag.name} not found"
return self

async def classify_multi_label(text: str, tags: List[TagWithInstructions]) -> List[Tag]:
response = await client.chat.completions.create(
model="gpt-4",
messages=[
{"role": "system", "content": "You are a multi-label classification system."},
{"role": "user", "content": f"Classify this text into multiple categories: {text}"},
{"role": "user", "content": f"Available categories: {', '.join(t.name for t in tags)}"},
],
response_model=MultiLabelTag,
validation_context={"tags": tags},
)
return response.tags
```

# Example Usage

```python
# PLACEHOLDER: existing example code
```


# <%hide%>
tags = [
Expand Down
2 changes: 1 addition & 1 deletion docs/examples/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ Welcome to our collection of cookbooks showcasing the power of structured output
26. [Action Items Extraction](action_items.md): Extract structured action items and tasks from text content.
27. [Batch Classification with LangSmith](batch_classification_langsmith.md): Efficiently classify content in batches using LangSmith integration.
28. [Contact Information Extraction](extract_contact_info.md): Extract structured contact details from unstructured text.
29. [Knowledge Graph Building](building_knowledge_graph.md): Create and manipulate knowledge graphs from textual data.
29. [Knowledge Graph Building](building_knowledge_graphs.md): Create and manipulate knowledge graphs from textual data.
30. [Multiple Classification Tasks](multiple_classification.md): Handle multiple classification categories simultaneously.
31. [Pandas DataFrame Integration](pandas_df.md): Work with structured data using Pandas DataFrames.
32. [Partial Response Streaming](partial_streaming.md): Stream partial results for real-time processing.
Expand Down
62 changes: 58 additions & 4 deletions docs/integrations/mistral.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,24 @@
draft: False
date: 2024-02-26
title: "Structured outputs with Mistral, a complete guide w/ instructor"
description: "Complete guide to using Instructor with Mistral. Learn how to generate structured, type-safe outputs with Mistral."
description: "Complete guide to using Instructor with Mistral. Learn how to generate structured, type-safe outputs with Mistral, including multimodal support with Pixtral."
slug: mistral
tags:
- patching
- multimodal
authors:
- shanktt
---

# Structured outputs with Mistral, a complete guide w/ instructor

This guide demonstrates how to use Mistral with Instructor to generate structured outputs. You'll learn how to use function calling with Mistral Large to create type-safe responses.
This guide demonstrates how to use Mistral with Instructor to generate structured outputs. You'll learn how to use function calling with Mistral Large to create type-safe responses, including support for multimodal inputs with Pixtral.

Mistral Large is the flagship model from Mistral AI, supporting 32k context windows and functional calling abilities. Mistral Large's addition of [function calling](https://docs.mistral.ai/guides/function-calling/) makes it possible to obtain structured outputs using JSON schema.
Mistral Large is the flagship model from Mistral AI, supporting 32k context windows and functional calling abilities. Mistral Large's addition of [function calling](https://docs.mistral.ai/guides/function-calling/) makes it possible to obtain structured outputs using JSON schema. With Pixtral, you can now also process images alongside text inputs.

By the end of this blog post, you will learn how to effectively utilize Instructor with Mistral Large.
By the end of this blog post, you will learn how to effectively utilize Instructor with Mistral Large and Pixtral for both text and image processing tasks.

## Text Processing with Mistral Large

```python
import os
Expand Down Expand Up @@ -47,5 +50,56 @@ resp = instructor_client.messages.create(
)

print(resp)
```

## Multimodal Processing with Pixtral

```python
import os
from pydantic import BaseModel
from mistralai import Mistral
from instructor import from_mistral, Mode
from instructor.multimodal import Image

class ImageDescription(BaseModel):
description: str
objects: list[str]
colors: list[str]

# Initialize the client with Pixtral model
client = Mistral(api_key=os.environ.get("MISTRAL_API_KEY"))
instructor_client = from_mistral(
client=client,
model="pixtral", # Use Pixtral for multimodal capabilities
mode=Mode.MISTRAL_JSON,
max_tokens=1000,
)

# Load and process an image
image = Image.from_path("path/to/your/image.jpg")
resp = instructor_client.messages.create(
response_model=ImageDescription,
messages=[
{
"role": "user",
"content": [
"Describe this image in detail, including the main objects and colors present.",
image
]
}
],
temperature=0,
)

print(resp)
```

## Image Requirements and Validation

When working with images in Pixtral:
- Supported formats: JPEG, PNG, GIF, WEBP
- Maximum image size: 20MB
- Images larger than the size limit will be automatically resized
- Base64 and file paths are supported input formats

The `Image` class handles all validation and preprocessing automatically, ensuring your images meet Mistral's requirements.
86 changes: 55 additions & 31 deletions instructor/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from __future__ import annotations
import importlib.util
from typing import Callable, Union, TypeVar

from .mode import Mode
from .process_response import handle_response_model
from .distil import FinetuneFormat, Instructions
from .multimodal import Image, Audio
from .multimodal import Image
from .dsl import (
CitationMixin,
Maybe,
Expand All @@ -23,10 +25,17 @@
Provider,
)

__all__ = [
T = TypeVar("T")

# Type aliases for client functions
ClientFunction = Union[
Callable[..., Union[Instructor, AsyncInstructor]],
None
]

__all__: list[str] = [
"Instructor",
"Image",
"Audio",
"from_openai",
"from_litellm",
"AsyncInstructor",
Expand All @@ -48,51 +57,66 @@
"handle_response_model",
]


def _extend_all(new_items: list[str]) -> None:
global __all__
__all__ = __all__ + new_items

# Initialize optional client functions with explicit types
from_anthropic: ClientFunction = None
from_gemini: ClientFunction = None
from_fireworks: ClientFunction = None
from_cerebras: ClientFunction = None
from_groq: ClientFunction = None
from_mistral: ClientFunction = None
from_cohere: ClientFunction = None
from_vertexai: ClientFunction = None
from_writer: ClientFunction = None

# Import optional clients
if importlib.util.find_spec("anthropic") is not None:
from .client_anthropic import from_anthropic

__all__ += ["from_anthropic"]
from .client_anthropic import from_anthropic as _from_anthropic
globals()["from_anthropic"] = _from_anthropic
_extend_all(["from_anthropic"])

if (
importlib.util.find_spec("google")
and importlib.util.find_spec("google.generativeai") is not None
):
from .client_gemini import from_gemini

__all__ += ["from_gemini"]
from .client_gemini import from_gemini as _from_gemini
globals()["from_gemini"] = _from_gemini
_extend_all(["from_gemini"])

if importlib.util.find_spec("fireworks") is not None:
from .client_fireworks import from_fireworks

__all__ += ["from_fireworks"]
from .client_fireworks import from_fireworks as _from_fireworks
globals()["from_fireworks"] = _from_fireworks
_extend_all(["from_fireworks"])

if importlib.util.find_spec("cerebras") is not None:
from .client_cerebras import from_cerebras

__all__ += ["from_cerebras"]
from .client_cerebras import from_cerebras as _from_cerebras
globals()["from_cerebras"] = _from_cerebras
_extend_all(["from_cerebras"])

if importlib.util.find_spec("groq") is not None:
from .client_groq import from_groq

__all__ += ["from_groq"]
from .client_groq import from_groq as _from_groq
globals()["from_groq"] = _from_groq
_extend_all(["from_groq"])

if importlib.util.find_spec("mistralai") is not None:
from .client_mistral import from_mistral

__all__ += ["from_mistral"]
from .client_mistral import from_mistral as _from_mistral
globals()["from_mistral"] = _from_mistral
_extend_all(["from_mistral"])

if importlib.util.find_spec("cohere") is not None:
from .client_cohere import from_cohere

__all__ += ["from_cohere"]
from .client_cohere import from_cohere as _from_cohere
globals()["from_cohere"] = _from_cohere
_extend_all(["from_cohere"])

if all(importlib.util.find_spec(pkg) for pkg in ("vertexai", "jsonref")):
from .client_vertexai import from_vertexai

__all__ += ["from_vertexai"]
from .client_vertexai import from_vertexai as _from_vertexai
globals()["from_vertexai"] = _from_vertexai
_extend_all(["from_vertexai"])

if importlib.util.find_spec("writerai") is not None:
from .client_writer import from_writer

__all__ += ["from_writer"]
from .client_writer import from_writer as _from_writer
globals()["from_writer"] = _from_writer
_extend_all(["from_writer"])
Loading

0 comments on commit 5a6064b

Please sign in to comment.