Migrating Anthropic (#1281)

instructor-ai · Dec 26, 2024 · e476bc5 · e476bc5
1 parent f484c9f
commit e476bc5
Show file tree

Hide file tree

Showing 9 changed files with 95 additions and 67 deletions.
diff --git a/docs/concepts/prompt_caching.md b/docs/concepts/prompt_caching.md
@@ -17,23 +17,16 @@ This optimization is especially useful for applications making multiple API call
 
 Prompt Caching is enabled for the following models:
 
-* gpt-4o
-* gpt-4o-mini
-* o1-preview
-* o1-mini
+- gpt-4o
+- gpt-4o-mini
+- o1-preview
+- o1-mini
 
 Caching is based on prefix matching, so if you're using a system prompt that contains a common set of instructions, you're likely to see a cache hit as long as you move all variable parts of the prompt to the end of the message when possible.
 
-
 ## Prompt Caching in Anthropic
 
-The `anthropic.beta.prompt_caching.messages.create` method enables you to:
-
-1. Cache specific prompt portions
-2. Reuse cached content in subsequent calls
-3. Reduce processed data per request
-
-By implementing prompt caching, you can potentially enhance efficiency and reduce costs, especially when dealing with large, shared contexts across multiple API interactions.
+Prompt Caching is now generally avaliable for Anthropic. This enables you to cache specific prompt portions, reuse cached content in subsequent calls, and reduce processed data per request.
 
 ??? note "Source Text"
 
@@ -182,18 +175,11 @@ By implementing prompt caching, you can potentially enhance efficiency and reduc
     ```
 
 ```python
-from instructor import Instructor, Mode, patch
+import instructor
 from anthropic import Anthropic
 from pydantic import BaseModel
 
-client = Instructor(  # (1)!
-    client=Anthropic(),
-    create=patch(
-        create=Anthropic().beta.prompt_caching.messages.create,
-        mode=Mode.ANTHROPIC_TOOLS,
-    ),
-    mode=Mode.ANTHROPIC_TOOLS,
-)
+client = instructor.from_anthropic(Anthropic())
 
 
 class Character(BaseModel):
@@ -204,16 +190,16 @@ class Character(BaseModel):
 with open("./book.txt") as f:
     book = f.read()
 
-resp = client.chat.completions.create(
-    model="claude-3-haiku-20240307",
+resp, completion = client.chat.completions.create_with_completion(
+    model="claude-3-5-sonnet-20240620",
     messages=[
         {
             "role": "user",
             "content": [
                 {
                     "type": "text",
                     "text": "<book>" + book + "</book>",
-                    "cache_control": {"type": "ephemeral"},  # (2)!
+                    "cache_control": {"type": "ephemeral"}, #(1)!
                 },
                 {
                     "type": "text",
@@ -225,11 +211,33 @@ resp = client.chat.completions.create(
     response_model=Character,
     max_tokens=1000,
 )
-```
 
-1. Since the feature is still in beta, we need to manually pass in the function that we're looking to patch.
+print(completion)
+# Message(
+#     id='msg_01QcqjktYc1PXL8nk7y5hkMV',
+#     content=[
+#         ToolUseBlock(
+#             id='toolu_019wABRzQxtSbXeuuRwvJo15',
+#             input={
+#                 'name': 'Jane Austen',
+#                 'description': 'A renowned English novelist of the early 19th century, known for her wit, humor, and keen observations of human nature. She is the author of
+# several classic novels including "Pride and Prejudice," "Emma," "Sense and Sensibility," and "Mansfield Park." Austen\'s writing is characterized by its subtlety, delicate touch,
+# and ability to create memorable characters. Her work often involves social commentary and explores themes of love, marriage, and societal expectations in Regency-era England.'
+#             },
+#             name='Character',
+#             type='tool_use'
+#         )
+#     ],
+#     model='claude-3-5-sonnet-20240620',
+#     role='assistant',
+#     stop_reason='tool_use',
+#     stop_sequence=None,
+#     type='message',
+#     usage=Usage(cache_creation_input_tokens=2777, cache_read_input_tokens=0, input_tokens=30, output_tokens=161)
+# )
+```
 
-2. Anthropic requires that you explicitly pass in the `cache_control` parameter to indicate that you want to cache the content.
+1. Anthropic requires that you explicitly pass in the `cache_control` parameter to indicate that you want to cache the content.
 
 !!! Warning "Caching Considerations"
 

diff --git a/instructor/client_anthropic.py b/instructor/client_anthropic.py
@@ -42,7 +42,6 @@ def from_anthropic(
         | anthropic.AnthropicVertex
     ),
     mode: instructor.Mode = instructor.Mode.ANTHROPIC_TOOLS,
-    enable_prompt_caching: bool = False,
     beta: bool = False,
     **kwargs: Any,
 ) -> instructor.Instructor | instructor.AsyncInstructor:
@@ -82,14 +81,7 @@ def from_anthropic(
         ),
     ), "Client must be an instance of {anthropic.Anthropic, anthropic.AsyncAnthropic, anthropic.AnthropicBedrock, anthropic.AsyncAnthropicBedrock,  anthropic.AnthropicVertex, anthropic.AsyncAnthropicVertex}"
 
-    if enable_prompt_caching:
-        if isinstance(client, (anthropic.Anthropic, anthropic.AsyncAnthropic)):
-            create = client.beta.prompt_caching.messages.create
-        else:
-            raise TypeError(
-                "Client must be an instance of {anthropic.Anthropic, anthropic.AsyncAnthropic} to enable prompt caching"
-            )
-    elif beta:
+    if beta:
         create = client.beta.messages.create
     else:
         create = client.messages.create

diff --git a/instructor/reask.py b/instructor/reask.py
@@ -23,9 +23,8 @@ def reask_anthropic_tools(
 ):
     kwargs = kwargs.copy()
     from anthropic.types import Message
-    from anthropic.types.beta.prompt_caching import PromptCachingBetaMessage
 
-    assert isinstance(response, Message) or isinstance(response, PromptCachingBetaMessage), "Response must be a Anthropic Message"
+    assert isinstance(response, Message), "Response must be a Anthropic Message"
 
     assistant_content = []
     tool_use_id = None
@@ -71,9 +70,8 @@ def reask_anthropic_json(
 ):
     kwargs = kwargs.copy()
     from anthropic.types import Message
-    from anthropic.types.beta.prompt_caching import PromptCachingBetaMessage
 
-    assert isinstance(response, Message) or isinstance(response, PromptCachingBetaMessage), "Response must be a Anthropic Message"
+    assert isinstance(response, Message), "Response must be a Anthropic Message"
 
     reask_msg = {
         "role": "user",

diff --git a/instructor/retry.py b/instructor/retry.py
@@ -14,7 +14,11 @@
 from instructor.utils import update_total_usage
 from instructor.validators import AsyncValidationError
 from openai.types.chat import ChatCompletion
-from openai.types.completion_usage import CompletionUsage, CompletionTokensDetails, PromptTokensDetails
+from openai.types.completion_usage import (
+    CompletionUsage,
+    CompletionTokensDetails,
+    PromptTokensDetails,
+)
 from pydantic import BaseModel, ValidationError
 from tenacity import (
     AsyncRetrying,
@@ -71,14 +75,24 @@ def initialize_usage(mode: Mode) -> CompletionUsage | Any:
     Returns:
         CompletionUsage | Any: Initialized usage object.
     """
-    total_usage = CompletionUsage(completion_tokens=0, prompt_tokens=0, total_tokens=0,
-        completion_tokens_details = CompletionTokensDetails(audio_tokens=0, reasoning_tokens=0),
-        prompt_tokens_details = PromptTokensDetails(audio_tokens=0, cached_tokens=0)
+    total_usage = CompletionUsage(
+        completion_tokens=0,
+        prompt_tokens=0,
+        total_tokens=0,
+        completion_tokens_details=CompletionTokensDetails(
+            audio_tokens=0, reasoning_tokens=0
+        ),
+        prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0),
     )
     if mode in {Mode.ANTHROPIC_TOOLS, Mode.ANTHROPIC_JSON}:
         from anthropic.types import Usage as AnthropicUsage
 
-        total_usage = AnthropicUsage(input_tokens=0, output_tokens=0)
+        total_usage = AnthropicUsage(
+            input_tokens=0,
+            output_tokens=0,
+            cache_read_input_tokens=0,
+            cache_creation_input_tokens=0,
+        )
     return total_usage
 
 

diff --git a/instructor/utils.py b/instructor/utils.py
@@ -169,6 +169,12 @@ def update_total_usage(
         ):
             total_usage.input_tokens += response_usage.input_tokens or 0
             total_usage.output_tokens += response_usage.output_tokens or 0
+            total_usage.cache_creation_input_tokens += (
+                response_usage.cache_creation_input_tokens or 0
+            )
+            total_usage.cache_read_input_tokens += (
+                response_usage.cache_read_input_tokens or 0
+            )
             response.usage = total_usage
             return response
     except ImportError:

diff --git a/pyproject.toml b/pyproject.toml
@@ -56,7 +56,7 @@ test-docs = [
     "mistralai<2.0.0,>=1.0.3",
 ]
 anthropic = [
-    "anthropic<0.41.0,>=0.36.2",
+    "anthropic==0.42.0",
     "xmltodict<0.15,>=0.13",
 ]
 groq = [
@@ -106,9 +106,10 @@ docs = [
     "mkdocs-rss-plugin<2.0.0,>=1.12.0",
     "mkdocs-minify-plugin<1.0.0,>=0.8.0",
     "mkdocs-redirects<2.0.0,>=1.2.1",
+    "material>=0.1",
 ]
 anthropic = [
-    "anthropic<0.41.0,>=0.36.2",
+    "anthropic==0.42.0",
 ]
 test-docs = [
     "fastapi<0.116.0,>=0.109.2",
@@ -118,7 +119,7 @@ test-docs = [
     "tabulate<1.0.0,>=0.9.0",
     "pydantic-extra-types<3.0.0,>=2.6.0",
     "litellm<2.0.0,>=1.35.31",
-    "anthropic<0.41.0,>=0.36.2",
+    "anthropic==0.42.0",
     "xmltodict<0.15,>=0.13",
     "groq<0.14.0,>=0.4.2",
     "phonenumbers<9.0.0,>=8.13.33",

diff --git a/tests/llm/test_anthropic/test_multimodal.py b/tests/llm/test_anthropic/test_multimodal.py
@@ -12,6 +12,9 @@ class ImageDescription(BaseModel):
     colors: list[str] = Field(..., description="The colors in the image")
 
 
+image_url = "https://github.com/google-gemini/cookbook/blob/main/examples/assets/castle.png?raw=true"
+
+
 @pytest.mark.parametrize("model, mode", product(models, modes))
 def test_multimodal_image_description(model, mode, client):
     client = instructor.from_anthropic(client, mode=mode)
@@ -27,9 +30,7 @@ def test_multimodal_image_description(model, mode, client):
                 "role": "user",
                 "content": [
                     "What is this?",
-                    Image.from_url(
-                        "https://pbs.twimg.com/profile_images/1816950591857233920/ZBxrWCbX_400x400.jpg"
-                    ),
+                    Image.from_url(image_url),
                 ],
             },
         ],
@@ -59,7 +60,7 @@ def test_multimodal_image_description_autodetect(model, mode, client):
                 "role": "user",
                 "content": [
                     "What is this?",
-                    "https://pbs.twimg.com/profile_images/1816950591857233920/ZBxrWCbX_400x400.jpg",
+                    image_url,
                 ],
             },
         ],
@@ -94,7 +95,7 @@ def test_multimodal_image_description_autodetect_image_params(model, mode, clien
                     "What is this?",
                     {
                         "type": "image",
-                        "source": "https://pbs.twimg.com/profile_images/1816950591857233920/ZBxrWCbX_400x400.jpg",
+                        "source": image_url,
                     },
                 ],
             },
@@ -117,7 +118,7 @@ def test_multimodal_image_description_autodetect_image_params(model, mode, clien
 def test_multimodal_image_description_autodetect_image_params_cache(
     model, mode, client
 ):
-    client = instructor.from_anthropic(client, mode=mode, enable_prompt_caching=True)
+    client = instructor.from_anthropic(client, mode=mode)
     messages = client.chat.completions.create(
         model=model,  # Ensure this is a vision-capable model
         response_model=None,
@@ -174,7 +175,7 @@ def test_multimodal_image_description_autodetect_no_response_model(model, mode,
             },
             {
                 "role": "user",
-                "content": "https://pbs.twimg.com/profile_images/1816950591857233920/ZBxrWCbX_400x400.jpg",
+                "content": image_url,
             },
         ],
         max_tokens=1000,
@@ -192,7 +193,7 @@ def test_multimodal_image_description_autodetect_no_response_model(model, mode,
         messages=[
             {
                 "role": "user",
-                "content": "https://pbs.twimg.com/profile_images/1816950591857233920/ZBxrWCbX_400x400.jpg",
+                "content": image_url,
             },
         ],
         max_tokens=1000,

diff --git a/tests/llm/test_anthropic/test_system.py b/tests/llm/test_anthropic/test_system.py
@@ -41,7 +41,7 @@ def test_creation(model, mode, client):
 
 @pytest.mark.parametrize("model, mode", product(models, modes))
 def test_creation_with_system_cache(model, mode, client):
-    client = instructor.from_anthropic(client, mode=mode, enable_prompt_caching=True)
+    client = instructor.from_anthropic(client, mode=mode)
     response, message = client.chat.completions.create_with_completion(
         model=model,
         response_model=User,
@@ -83,7 +83,7 @@ def test_creation_with_system_cache(model, mode, client):
 
 @pytest.mark.parametrize("model, mode", product(models, modes))
 def test_creation_with_system_cache_anthropic_style(model, mode, client):
-    client = instructor.from_anthropic(client, mode=mode, enable_prompt_caching=True)
+    client = instructor.from_anthropic(client, mode=mode)
     response, message = client.chat.completions.create_with_completion(
         model=model,
         system=[