Skip to content

Commit

Permalink
Migrating Anthropic (#1281)
Browse files Browse the repository at this point in the history
  • Loading branch information
ivanleomk authored Dec 26, 2024
1 parent f484c9f commit e476bc5
Show file tree
Hide file tree
Showing 9 changed files with 95 additions and 67 deletions.
62 changes: 35 additions & 27 deletions docs/concepts/prompt_caching.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,23 +17,16 @@ This optimization is especially useful for applications making multiple API call

Prompt Caching is enabled for the following models:

* gpt-4o
* gpt-4o-mini
* o1-preview
* o1-mini
- gpt-4o
- gpt-4o-mini
- o1-preview
- o1-mini

Caching is based on prefix matching, so if you're using a system prompt that contains a common set of instructions, you're likely to see a cache hit as long as you move all variable parts of the prompt to the end of the message when possible.


## Prompt Caching in Anthropic

The `anthropic.beta.prompt_caching.messages.create` method enables you to:

1. Cache specific prompt portions
2. Reuse cached content in subsequent calls
3. Reduce processed data per request

By implementing prompt caching, you can potentially enhance efficiency and reduce costs, especially when dealing with large, shared contexts across multiple API interactions.
Prompt Caching is now generally avaliable for Anthropic. This enables you to cache specific prompt portions, reuse cached content in subsequent calls, and reduce processed data per request.

??? note "Source Text"

Expand Down Expand Up @@ -182,18 +175,11 @@ By implementing prompt caching, you can potentially enhance efficiency and reduc
```

```python
from instructor import Instructor, Mode, patch
import instructor
from anthropic import Anthropic
from pydantic import BaseModel

client = Instructor( # (1)!
client=Anthropic(),
create=patch(
create=Anthropic().beta.prompt_caching.messages.create,
mode=Mode.ANTHROPIC_TOOLS,
),
mode=Mode.ANTHROPIC_TOOLS,
)
client = instructor.from_anthropic(Anthropic())


class Character(BaseModel):
Expand All @@ -204,16 +190,16 @@ class Character(BaseModel):
with open("./book.txt") as f:
book = f.read()

resp = client.chat.completions.create(
model="claude-3-haiku-20240307",
resp, completion = client.chat.completions.create_with_completion(
model="claude-3-5-sonnet-20240620",
messages=[
{
"role": "user",
"content": [
{
"type": "text",
"text": "<book>" + book + "</book>",
"cache_control": {"type": "ephemeral"}, # (2)!
"cache_control": {"type": "ephemeral"}, #(1)!
},
{
"type": "text",
Expand All @@ -225,11 +211,33 @@ resp = client.chat.completions.create(
response_model=Character,
max_tokens=1000,
)
```

1. Since the feature is still in beta, we need to manually pass in the function that we're looking to patch.
print(completion)
# Message(
# id='msg_01QcqjktYc1PXL8nk7y5hkMV',
# content=[
# ToolUseBlock(
# id='toolu_019wABRzQxtSbXeuuRwvJo15',
# input={
# 'name': 'Jane Austen',
# 'description': 'A renowned English novelist of the early 19th century, known for her wit, humor, and keen observations of human nature. She is the author of
# several classic novels including "Pride and Prejudice," "Emma," "Sense and Sensibility," and "Mansfield Park." Austen\'s writing is characterized by its subtlety, delicate touch,
# and ability to create memorable characters. Her work often involves social commentary and explores themes of love, marriage, and societal expectations in Regency-era England.'
# },
# name='Character',
# type='tool_use'
# )
# ],
# model='claude-3-5-sonnet-20240620',
# role='assistant',
# stop_reason='tool_use',
# stop_sequence=None,
# type='message',
# usage=Usage(cache_creation_input_tokens=2777, cache_read_input_tokens=0, input_tokens=30, output_tokens=161)
# )
```

2. Anthropic requires that you explicitly pass in the `cache_control` parameter to indicate that you want to cache the content.
1. Anthropic requires that you explicitly pass in the `cache_control` parameter to indicate that you want to cache the content.

!!! Warning "Caching Considerations"

Expand Down
10 changes: 1 addition & 9 deletions instructor/client_anthropic.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ def from_anthropic(
| anthropic.AnthropicVertex
),
mode: instructor.Mode = instructor.Mode.ANTHROPIC_TOOLS,
enable_prompt_caching: bool = False,
beta: bool = False,
**kwargs: Any,
) -> instructor.Instructor | instructor.AsyncInstructor:
Expand Down Expand Up @@ -82,14 +81,7 @@ def from_anthropic(
),
), "Client must be an instance of {anthropic.Anthropic, anthropic.AsyncAnthropic, anthropic.AnthropicBedrock, anthropic.AsyncAnthropicBedrock, anthropic.AnthropicVertex, anthropic.AsyncAnthropicVertex}"

if enable_prompt_caching:
if isinstance(client, (anthropic.Anthropic, anthropic.AsyncAnthropic)):
create = client.beta.prompt_caching.messages.create
else:
raise TypeError(
"Client must be an instance of {anthropic.Anthropic, anthropic.AsyncAnthropic} to enable prompt caching"
)
elif beta:
if beta:
create = client.beta.messages.create
else:
create = client.messages.create
Expand Down
6 changes: 2 additions & 4 deletions instructor/reask.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,8 @@ def reask_anthropic_tools(
):
kwargs = kwargs.copy()
from anthropic.types import Message
from anthropic.types.beta.prompt_caching import PromptCachingBetaMessage

assert isinstance(response, Message) or isinstance(response, PromptCachingBetaMessage), "Response must be a Anthropic Message"
assert isinstance(response, Message), "Response must be a Anthropic Message"

assistant_content = []
tool_use_id = None
Expand Down Expand Up @@ -71,9 +70,8 @@ def reask_anthropic_json(
):
kwargs = kwargs.copy()
from anthropic.types import Message
from anthropic.types.beta.prompt_caching import PromptCachingBetaMessage

assert isinstance(response, Message) or isinstance(response, PromptCachingBetaMessage), "Response must be a Anthropic Message"
assert isinstance(response, Message), "Response must be a Anthropic Message"

reask_msg = {
"role": "user",
Expand Down
24 changes: 19 additions & 5 deletions instructor/retry.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,11 @@
from instructor.utils import update_total_usage
from instructor.validators import AsyncValidationError
from openai.types.chat import ChatCompletion
from openai.types.completion_usage import CompletionUsage, CompletionTokensDetails, PromptTokensDetails
from openai.types.completion_usage import (
CompletionUsage,
CompletionTokensDetails,
PromptTokensDetails,
)
from pydantic import BaseModel, ValidationError
from tenacity import (
AsyncRetrying,
Expand Down Expand Up @@ -71,14 +75,24 @@ def initialize_usage(mode: Mode) -> CompletionUsage | Any:
Returns:
CompletionUsage | Any: Initialized usage object.
"""
total_usage = CompletionUsage(completion_tokens=0, prompt_tokens=0, total_tokens=0,
completion_tokens_details = CompletionTokensDetails(audio_tokens=0, reasoning_tokens=0),
prompt_tokens_details = PromptTokensDetails(audio_tokens=0, cached_tokens=0)
total_usage = CompletionUsage(
completion_tokens=0,
prompt_tokens=0,
total_tokens=0,
completion_tokens_details=CompletionTokensDetails(
audio_tokens=0, reasoning_tokens=0
),
prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0),
)
if mode in {Mode.ANTHROPIC_TOOLS, Mode.ANTHROPIC_JSON}:
from anthropic.types import Usage as AnthropicUsage

total_usage = AnthropicUsage(input_tokens=0, output_tokens=0)
total_usage = AnthropicUsage(
input_tokens=0,
output_tokens=0,
cache_read_input_tokens=0,
cache_creation_input_tokens=0,
)
return total_usage


Expand Down
6 changes: 6 additions & 0 deletions instructor/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,12 @@ def update_total_usage(
):
total_usage.input_tokens += response_usage.input_tokens or 0
total_usage.output_tokens += response_usage.output_tokens or 0
total_usage.cache_creation_input_tokens += (
response_usage.cache_creation_input_tokens or 0
)
total_usage.cache_read_input_tokens += (
response_usage.cache_read_input_tokens or 0
)
response.usage = total_usage
return response
except ImportError:
Expand Down
7 changes: 4 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ test-docs = [
"mistralai<2.0.0,>=1.0.3",
]
anthropic = [
"anthropic<0.41.0,>=0.36.2",
"anthropic==0.42.0",
"xmltodict<0.15,>=0.13",
]
groq = [
Expand Down Expand Up @@ -106,9 +106,10 @@ docs = [
"mkdocs-rss-plugin<2.0.0,>=1.12.0",
"mkdocs-minify-plugin<1.0.0,>=0.8.0",
"mkdocs-redirects<2.0.0,>=1.2.1",
"material>=0.1",
]
anthropic = [
"anthropic<0.41.0,>=0.36.2",
"anthropic==0.42.0",
]
test-docs = [
"fastapi<0.116.0,>=0.109.2",
Expand All @@ -118,7 +119,7 @@ test-docs = [
"tabulate<1.0.0,>=0.9.0",
"pydantic-extra-types<3.0.0,>=2.6.0",
"litellm<2.0.0,>=1.35.31",
"anthropic<0.41.0,>=0.36.2",
"anthropic==0.42.0",
"xmltodict<0.15,>=0.13",
"groq<0.14.0,>=0.4.2",
"phonenumbers<9.0.0,>=8.13.33",
Expand Down
17 changes: 9 additions & 8 deletions tests/llm/test_anthropic/test_multimodal.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ class ImageDescription(BaseModel):
colors: list[str] = Field(..., description="The colors in the image")


image_url = "https://github.com/google-gemini/cookbook/blob/main/examples/assets/castle.png?raw=true"


@pytest.mark.parametrize("model, mode", product(models, modes))
def test_multimodal_image_description(model, mode, client):
client = instructor.from_anthropic(client, mode=mode)
Expand All @@ -27,9 +30,7 @@ def test_multimodal_image_description(model, mode, client):
"role": "user",
"content": [
"What is this?",
Image.from_url(
"https://pbs.twimg.com/profile_images/1816950591857233920/ZBxrWCbX_400x400.jpg"
),
Image.from_url(image_url),
],
},
],
Expand Down Expand Up @@ -59,7 +60,7 @@ def test_multimodal_image_description_autodetect(model, mode, client):
"role": "user",
"content": [
"What is this?",
"https://pbs.twimg.com/profile_images/1816950591857233920/ZBxrWCbX_400x400.jpg",
image_url,
],
},
],
Expand Down Expand Up @@ -94,7 +95,7 @@ def test_multimodal_image_description_autodetect_image_params(model, mode, clien
"What is this?",
{
"type": "image",
"source": "https://pbs.twimg.com/profile_images/1816950591857233920/ZBxrWCbX_400x400.jpg",
"source": image_url,
},
],
},
Expand All @@ -117,7 +118,7 @@ def test_multimodal_image_description_autodetect_image_params(model, mode, clien
def test_multimodal_image_description_autodetect_image_params_cache(
model, mode, client
):
client = instructor.from_anthropic(client, mode=mode, enable_prompt_caching=True)
client = instructor.from_anthropic(client, mode=mode)
messages = client.chat.completions.create(
model=model, # Ensure this is a vision-capable model
response_model=None,
Expand Down Expand Up @@ -174,7 +175,7 @@ def test_multimodal_image_description_autodetect_no_response_model(model, mode,
},
{
"role": "user",
"content": "https://pbs.twimg.com/profile_images/1816950591857233920/ZBxrWCbX_400x400.jpg",
"content": image_url,
},
],
max_tokens=1000,
Expand All @@ -192,7 +193,7 @@ def test_multimodal_image_description_autodetect_no_response_model(model, mode,
messages=[
{
"role": "user",
"content": "https://pbs.twimg.com/profile_images/1816950591857233920/ZBxrWCbX_400x400.jpg",
"content": image_url,
},
],
max_tokens=1000,
Expand Down
4 changes: 2 additions & 2 deletions tests/llm/test_anthropic/test_system.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def test_creation(model, mode, client):

@pytest.mark.parametrize("model, mode", product(models, modes))
def test_creation_with_system_cache(model, mode, client):
client = instructor.from_anthropic(client, mode=mode, enable_prompt_caching=True)
client = instructor.from_anthropic(client, mode=mode)
response, message = client.chat.completions.create_with_completion(
model=model,
response_model=User,
Expand Down Expand Up @@ -83,7 +83,7 @@ def test_creation_with_system_cache(model, mode, client):

@pytest.mark.parametrize("model, mode", product(models, modes))
def test_creation_with_system_cache_anthropic_style(model, mode, client):
client = instructor.from_anthropic(client, mode=mode, enable_prompt_caching=True)
client = instructor.from_anthropic(client, mode=mode)
response, message = client.chat.completions.create_with_completion(
model=model,
system=[
Expand Down
Loading

0 comments on commit e476bc5

Please sign in to comment.