diff --git a/docs/concepts/prompt_caching.md b/docs/concepts/prompt_caching.md
index 341340846..4452a52fb 100644
--- a/docs/concepts/prompt_caching.md
+++ b/docs/concepts/prompt_caching.md
@@ -17,23 +17,16 @@ This optimization is especially useful for applications making multiple API call
 
 Prompt Caching is enabled for the following models:
 
-* gpt-4o
-* gpt-4o-mini
-* o1-preview
-* o1-mini
+- gpt-4o
+- gpt-4o-mini
+- o1-preview
+- o1-mini
 
 Caching is based on prefix matching, so if you're using a system prompt that contains a common set of instructions, you're likely to see a cache hit as long as you move all variable parts of the prompt to the end of the message when possible.
 
-
 ## Prompt Caching in Anthropic
 
-The `anthropic.beta.prompt_caching.messages.create` method enables you to:
-
-1. Cache specific prompt portions
-2. Reuse cached content in subsequent calls
-3. Reduce processed data per request
-
-By implementing prompt caching, you can potentially enhance efficiency and reduce costs, especially when dealing with large, shared contexts across multiple API interactions.
+Prompt Caching is now generally avaliable for Anthropic. This enables you to cache specific prompt portions, reuse cached content in subsequent calls, and reduce processed data per request.
 
 ??? note "Source Text"
 
@@ -182,18 +175,11 @@ By implementing prompt caching, you can potentially enhance efficiency and reduc
     ```
 
 ```python
-from instructor import Instructor, Mode, patch
+import instructor
 from anthropic import Anthropic
 from pydantic import BaseModel
 
-client = Instructor(  # (1)!
-    client=Anthropic(),
-    create=patch(
-        create=Anthropic().beta.prompt_caching.messages.create,
-        mode=Mode.ANTHROPIC_TOOLS,
-    ),
-    mode=Mode.ANTHROPIC_TOOLS,
-)
+client = instructor.from_anthropic(Anthropic())
 
 
 class Character(BaseModel):
@@ -204,8 +190,8 @@ class Character(BaseModel):
 with open("./book.txt") as f:
     book = f.read()
 
-resp = client.chat.completions.create(
-    model="claude-3-haiku-20240307",
+resp, completion = client.chat.completions.create_with_completion(
+    model="claude-3-5-sonnet-20240620",
     messages=[
         {
             "role": "user",
@@ -213,7 +199,7 @@ resp = client.chat.completions.create(
                 {
                     "type": "text",
                     "text": "<book>" + book + "</book>",
-                    "cache_control": {"type": "ephemeral"},  # (2)!
+                    "cache_control": {"type": "ephemeral"}, #(1)!
                 },
                 {
                     "type": "text",
@@ -225,11 +211,33 @@ resp = client.chat.completions.create(
     response_model=Character,
     max_tokens=1000,
 )
-```
 
-1. Since the feature is still in beta, we need to manually pass in the function that we're looking to patch.
+print(completion)
+# Message(
+#     id='msg_01QcqjktYc1PXL8nk7y5hkMV',
+#     content=[
+#         ToolUseBlock(
+#             id='toolu_019wABRzQxtSbXeuuRwvJo15',
+#             input={
+#                 'name': 'Jane Austen',
+#                 'description': 'A renowned English novelist of the early 19th century, known for her wit, humor, and keen observations of human nature. She is the author of
+# several classic novels including "Pride and Prejudice," "Emma," "Sense and Sensibility," and "Mansfield Park." Austen\'s writing is characterized by its subtlety, delicate touch,
+# and ability to create memorable characters. Her work often involves social commentary and explores themes of love, marriage, and societal expectations in Regency-era England.'
+#             },
+#             name='Character',
+#             type='tool_use'
+#         )
+#     ],
+#     model='claude-3-5-sonnet-20240620',
+#     role='assistant',
+#     stop_reason='tool_use',
+#     stop_sequence=None,
+#     type='message',
+#     usage=Usage(cache_creation_input_tokens=2777, cache_read_input_tokens=0, input_tokens=30, output_tokens=161)
+# )
+```
 
-2. Anthropic requires that you explicitly pass in the `cache_control` parameter to indicate that you want to cache the content.
+1. Anthropic requires that you explicitly pass in the `cache_control` parameter to indicate that you want to cache the content.
 
 !!! Warning "Caching Considerations"
 
diff --git a/instructor/client_anthropic.py b/instructor/client_anthropic.py
index 0860e1a42..9caf37cdc 100644
--- a/instructor/client_anthropic.py
+++ b/instructor/client_anthropic.py
@@ -42,7 +42,6 @@ def from_anthropic(
         | anthropic.AnthropicVertex
     ),
     mode: instructor.Mode = instructor.Mode.ANTHROPIC_TOOLS,
-    enable_prompt_caching: bool = False,
     beta: bool = False,
     **kwargs: Any,
 ) -> instructor.Instructor | instructor.AsyncInstructor:
@@ -82,14 +81,7 @@ def from_anthropic(
         ),
     ), "Client must be an instance of {anthropic.Anthropic, anthropic.AsyncAnthropic, anthropic.AnthropicBedrock, anthropic.AsyncAnthropicBedrock,  anthropic.AnthropicVertex, anthropic.AsyncAnthropicVertex}"
 
-    if enable_prompt_caching:
-        if isinstance(client, (anthropic.Anthropic, anthropic.AsyncAnthropic)):
-            create = client.beta.prompt_caching.messages.create
-        else:
-            raise TypeError(
-                "Client must be an instance of {anthropic.Anthropic, anthropic.AsyncAnthropic} to enable prompt caching"
-            )
-    elif beta:
+    if beta:
         create = client.beta.messages.create
     else:
         create = client.messages.create
diff --git a/instructor/reask.py b/instructor/reask.py
index 42da71a6d..cd3338993 100644
--- a/instructor/reask.py
+++ b/instructor/reask.py
@@ -23,9 +23,8 @@ def reask_anthropic_tools(
 ):
     kwargs = kwargs.copy()
     from anthropic.types import Message
-    from anthropic.types.beta.prompt_caching import PromptCachingBetaMessage
 
-    assert isinstance(response, Message) or isinstance(response, PromptCachingBetaMessage), "Response must be a Anthropic Message"
+    assert isinstance(response, Message), "Response must be a Anthropic Message"
 
     assistant_content = []
     tool_use_id = None
@@ -71,9 +70,8 @@ def reask_anthropic_json(
 ):
     kwargs = kwargs.copy()
     from anthropic.types import Message
-    from anthropic.types.beta.prompt_caching import PromptCachingBetaMessage
 
-    assert isinstance(response, Message) or isinstance(response, PromptCachingBetaMessage), "Response must be a Anthropic Message"
+    assert isinstance(response, Message), "Response must be a Anthropic Message"
 
     reask_msg = {
         "role": "user",
diff --git a/instructor/retry.py b/instructor/retry.py
index 853a73a54..fb374c4b7 100644
--- a/instructor/retry.py
+++ b/instructor/retry.py
@@ -14,7 +14,11 @@
 from instructor.utils import update_total_usage
 from instructor.validators import AsyncValidationError
 from openai.types.chat import ChatCompletion
-from openai.types.completion_usage import CompletionUsage, CompletionTokensDetails, PromptTokensDetails
+from openai.types.completion_usage import (
+    CompletionUsage,
+    CompletionTokensDetails,
+    PromptTokensDetails,
+)
 from pydantic import BaseModel, ValidationError
 from tenacity import (
     AsyncRetrying,
@@ -71,14 +75,24 @@ def initialize_usage(mode: Mode) -> CompletionUsage | Any:
     Returns:
         CompletionUsage | Any: Initialized usage object.
     """
-    total_usage = CompletionUsage(completion_tokens=0, prompt_tokens=0, total_tokens=0,
-        completion_tokens_details = CompletionTokensDetails(audio_tokens=0, reasoning_tokens=0),
-        prompt_tokens_details = PromptTokensDetails(audio_tokens=0, cached_tokens=0)
+    total_usage = CompletionUsage(
+        completion_tokens=0,
+        prompt_tokens=0,
+        total_tokens=0,
+        completion_tokens_details=CompletionTokensDetails(
+            audio_tokens=0, reasoning_tokens=0
+        ),
+        prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0),
     )
     if mode in {Mode.ANTHROPIC_TOOLS, Mode.ANTHROPIC_JSON}:
         from anthropic.types import Usage as AnthropicUsage
 
-        total_usage = AnthropicUsage(input_tokens=0, output_tokens=0)
+        total_usage = AnthropicUsage(
+            input_tokens=0,
+            output_tokens=0,
+            cache_read_input_tokens=0,
+            cache_creation_input_tokens=0,
+        )
     return total_usage
 
 
diff --git a/instructor/utils.py b/instructor/utils.py
index 55d746760..efdb590a3 100644
--- a/instructor/utils.py
+++ b/instructor/utils.py
@@ -169,6 +169,12 @@ def update_total_usage(
         ):
             total_usage.input_tokens += response_usage.input_tokens or 0
             total_usage.output_tokens += response_usage.output_tokens or 0
+            total_usage.cache_creation_input_tokens += (
+                response_usage.cache_creation_input_tokens or 0
+            )
+            total_usage.cache_read_input_tokens += (
+                response_usage.cache_read_input_tokens or 0
+            )
             response.usage = total_usage
             return response
     except ImportError:
diff --git a/pyproject.toml b/pyproject.toml
index 9cdba29eb..a81b1e4ef 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -56,7 +56,7 @@ test-docs = [
     "mistralai<2.0.0,>=1.0.3",
 ]
 anthropic = [
-    "anthropic<0.41.0,>=0.36.2",
+    "anthropic==0.42.0",
     "xmltodict<0.15,>=0.13",
 ]
 groq = [
@@ -106,9 +106,10 @@ docs = [
     "mkdocs-rss-plugin<2.0.0,>=1.12.0",
     "mkdocs-minify-plugin<1.0.0,>=0.8.0",
     "mkdocs-redirects<2.0.0,>=1.2.1",
+    "material>=0.1",
 ]
 anthropic = [
-    "anthropic<0.41.0,>=0.36.2",
+    "anthropic==0.42.0",
 ]
 test-docs = [
     "fastapi<0.116.0,>=0.109.2",
@@ -118,7 +119,7 @@ test-docs = [
     "tabulate<1.0.0,>=0.9.0",
     "pydantic-extra-types<3.0.0,>=2.6.0",
     "litellm<2.0.0,>=1.35.31",
-    "anthropic<0.41.0,>=0.36.2",
+    "anthropic==0.42.0",
     "xmltodict<0.15,>=0.13",
     "groq<0.14.0,>=0.4.2",
     "phonenumbers<9.0.0,>=8.13.33",
diff --git a/tests/llm/test_anthropic/test_multimodal.py b/tests/llm/test_anthropic/test_multimodal.py
index 5cbf311d2..991ea304d 100644
--- a/tests/llm/test_anthropic/test_multimodal.py
+++ b/tests/llm/test_anthropic/test_multimodal.py
@@ -12,6 +12,9 @@ class ImageDescription(BaseModel):
     colors: list[str] = Field(..., description="The colors in the image")
 
 
+image_url = "https://github.com/google-gemini/cookbook/blob/main/examples/assets/castle.png?raw=true"
+
+
 @pytest.mark.parametrize("model, mode", product(models, modes))
 def test_multimodal_image_description(model, mode, client):
     client = instructor.from_anthropic(client, mode=mode)
@@ -27,9 +30,7 @@ def test_multimodal_image_description(model, mode, client):
                 "role": "user",
                 "content": [
                     "What is this?",
-                    Image.from_url(
-                        "https://pbs.twimg.com/profile_images/1816950591857233920/ZBxrWCbX_400x400.jpg"
-                    ),
+                    Image.from_url(image_url),
                 ],
             },
         ],
@@ -59,7 +60,7 @@ def test_multimodal_image_description_autodetect(model, mode, client):
                 "role": "user",
                 "content": [
                     "What is this?",
-                    "https://pbs.twimg.com/profile_images/1816950591857233920/ZBxrWCbX_400x400.jpg",
+                    image_url,
                 ],
             },
         ],
@@ -94,7 +95,7 @@ def test_multimodal_image_description_autodetect_image_params(model, mode, clien
                     "What is this?",
                     {
                         "type": "image",
-                        "source": "https://pbs.twimg.com/profile_images/1816950591857233920/ZBxrWCbX_400x400.jpg",
+                        "source": image_url,
                     },
                 ],
             },
@@ -117,7 +118,7 @@ def test_multimodal_image_description_autodetect_image_params(model, mode, clien
 def test_multimodal_image_description_autodetect_image_params_cache(
     model, mode, client
 ):
-    client = instructor.from_anthropic(client, mode=mode, enable_prompt_caching=True)
+    client = instructor.from_anthropic(client, mode=mode)
     messages = client.chat.completions.create(
         model=model,  # Ensure this is a vision-capable model
         response_model=None,
@@ -174,7 +175,7 @@ def test_multimodal_image_description_autodetect_no_response_model(model, mode,
             },
             {
                 "role": "user",
-                "content": "https://pbs.twimg.com/profile_images/1816950591857233920/ZBxrWCbX_400x400.jpg",
+                "content": image_url,
             },
         ],
         max_tokens=1000,
@@ -192,7 +193,7 @@ def test_multimodal_image_description_autodetect_no_response_model(model, mode,
         messages=[
             {
                 "role": "user",
-                "content": "https://pbs.twimg.com/profile_images/1816950591857233920/ZBxrWCbX_400x400.jpg",
+                "content": image_url,
             },
         ],
         max_tokens=1000,
diff --git a/tests/llm/test_anthropic/test_system.py b/tests/llm/test_anthropic/test_system.py
index eb3158902..305a0df8e 100644
--- a/tests/llm/test_anthropic/test_system.py
+++ b/tests/llm/test_anthropic/test_system.py
@@ -41,7 +41,7 @@ def test_creation(model, mode, client):
 
 @pytest.mark.parametrize("model, mode", product(models, modes))
 def test_creation_with_system_cache(model, mode, client):
-    client = instructor.from_anthropic(client, mode=mode, enable_prompt_caching=True)
+    client = instructor.from_anthropic(client, mode=mode)
     response, message = client.chat.completions.create_with_completion(
         model=model,
         response_model=User,
@@ -83,7 +83,7 @@ def test_creation_with_system_cache(model, mode, client):
 
 @pytest.mark.parametrize("model, mode", product(models, modes))
 def test_creation_with_system_cache_anthropic_style(model, mode, client):
-    client = instructor.from_anthropic(client, mode=mode, enable_prompt_caching=True)
+    client = instructor.from_anthropic(client, mode=mode)
     response, message = client.chat.completions.create_with_completion(
         model=model,
         system=[
diff --git a/uv.lock b/uv.lock
index fd718d732..f9ce7ff48 100644
--- a/uv.lock
+++ b/uv.lock
@@ -132,7 +132,7 @@ wheels = [
 
 [[package]]
 name = "anthropic"
-version = "0.40.0"
+version = "0.42.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
@@ -143,9 +143,9 @@ dependencies = [
     { name = "sniffio" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/4d/d9/c39005f04c602607d68d48d1c917b35af8d16b687b7ca427ca787c39d8b9/anthropic-0.40.0.tar.gz", hash = "sha256:3efeca6d9e97813f93ed34322c6c7ea2279bf0824cd0aa71b59ce222665e2b87", size = 190939 }
+sdist = { url = "https://files.pythonhosted.org/packages/e7/7c/91b79f5ae4a52497a4e330d66ea5929aec2878ee2c9f8a998dbe4f4c7f01/anthropic-0.42.0.tar.gz", hash = "sha256:bf8b0ed8c8cb2c2118038f29c58099d2f99f7847296cafdaa853910bfff4edf4", size = 192361 }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/cb/18/a68cfb9a11990377650c36c25b5dfb0baece900e9e505b68e1aa06ad0227/anthropic-0.40.0-py3-none-any.whl", hash = "sha256:442028ae8790ff9e3b6f8912043918755af1230d193904ae2ef78cc22995280c", size = 199484 },
+    { url = "https://files.pythonhosted.org/packages/ba/33/b907a6d27dd0d8d3adb4edb5c9e9c85a189719ec6855051cce3814c8ef13/anthropic-0.42.0-py3-none-any.whl", hash = "sha256:46775f65b723c078a2ac9e9de44a46db5c6a4fabeacfd165e5ea78e6817f4eff", size = 203365 },
 ]
 
 [[package]]
@@ -1664,6 +1664,7 @@ dev = [
     { name = "pytest-examples" },
 ]
 docs = [
+    { name = "material" },
     { name = "mkdocs" },
     { name = "mkdocs-jupyter" },
     { name = "mkdocs-material", extra = ["imaging"] },
@@ -1712,7 +1713,7 @@ vertexai = [
 [package.metadata]
 requires-dist = [
     { name = "aiohttp", specifier = ">=3.9.1,<4.0.0" },
-    { name = "anthropic", marker = "extra == 'anthropic'", specifier = ">=0.36.2,<0.41.0" },
+    { name = "anthropic", marker = "extra == 'anthropic'", specifier = "==0.42.0" },
     { name = "cerebras-cloud-sdk", marker = "extra == 'cerebras-cloud-sdk'", specifier = ">=1.5.0,<2.0.0" },
     { name = "cohere", marker = "extra == 'cohere'", specifier = ">=5.1.8,<6.0.0" },
     { name = "diskcache", marker = "extra == 'test-docs'", specifier = ">=5.6.3,<6.0.0" },
@@ -1744,7 +1745,7 @@ requires-dist = [
 ]
 
 [package.metadata.requires-dev]
-anthropic = [{ name = "anthropic", specifier = ">=0.36.2,<0.41.0" }]
+anthropic = [{ name = "anthropic", specifier = "==0.42.0" }]
 dev = [
     { name = "black", specifier = ">=24.10.0,<25.0.0" },
     { name = "coverage", specifier = ">=7.3.2,<8.0.0" },
@@ -1755,6 +1756,7 @@ dev = [
     { name = "pytest-examples", specifier = ">=0.0.15" },
 ]
 docs = [
+    { name = "material", specifier = ">=0.1" },
     { name = "mkdocs", specifier = ">=1.4.3,<2.0.0" },
     { name = "mkdocs-jupyter", specifier = ">=0.24.6,<0.26.0" },
     { name = "mkdocs-material", extras = ["imaging"], specifier = ">=9.5.9,<10.0.0" },
@@ -1771,7 +1773,7 @@ google-generativeai = [
 ]
 litellm = [{ name = "litellm", specifier = ">=1.35.31,<2.0.0" }]
 test-docs = [
-    { name = "anthropic", specifier = ">=0.36.2,<0.41.0" },
+    { name = "anthropic", specifier = "==0.42.0" },
     { name = "cerebras-cloud-sdk", specifier = ">=1.5.0,<2.0.0" },
     { name = "cohere", specifier = ">=5.1.8,<6.0.0" },
     { name = "datasets", specifier = ">=3.0.1,<4.0.0" },
@@ -2061,7 +2063,7 @@ wheels = [
 
 [[package]]
 name = "litellm"
-version = "1.55.11"
+version = "1.55.12"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "aiohttp" },
@@ -2076,9 +2078,9 @@ dependencies = [
     { name = "tiktoken" },
     { name = "tokenizers" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/82/09/347533465a145359b909dc262356960b1c70c892f181b1c0ee8fed947b67/litellm-1.55.11.tar.gz", hash = "sha256:f2a90a157ba93797c86deb9b41b9f34a47714a9e04118fc2da7886f3c5581bfc", size = 6207717 }
+sdist = { url = "https://files.pythonhosted.org/packages/50/0e/1704168f8f658df61fbfdc0fa5d8a9ce8350616dc74b4481da2284a8f4c9/litellm-1.55.12.tar.gz", hash = "sha256:6d93025ebf29ae3d6929a90c44d648ba6804fdbc1bb1c3a72d7dddd08f6229dc", size = 6209840 }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/fd/55/8d10bd070cdd2978e4066a1578fb1cd7269d25fb74db3a2867839ac195b0/litellm-1.55.11-py3-none-any.whl", hash = "sha256:1631f060cd625cd1097c5383c71693f4b9b95a8e761b726d4863a9c325a2fa50", size = 6484499 },
+    { url = "https://files.pythonhosted.org/packages/30/5a/f3b621795e2288d8dc3d663a0db030700ee6cc127a6bb1ccc55ace061fbb/litellm-1.55.12-py3-none-any.whl", hash = "sha256:d034c3d6cdd5b8dd2c56f1100eb14f41f920b7673ac943d38fb2d7155ae5774b", size = 6488096 },
 ]
 
 [[package]]
@@ -2295,6 +2297,12 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b3/73/085399401383ce949f727afec55ec3abd76648d04b9f22e1c0e99cb4bec3/MarkupSafe-3.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:6e296a513ca3d94054c2c881cc913116e90fd030ad1c656b3869762b754f5f8a", size = 15506 },
 ]
 
+[[package]]
+name = "material"
+version = "0.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/6b/df/39108218c4d4da0e2bbbefb241bf00dcaaaf6622963a886e65042d26a797/material-0.1.tar.gz", hash = "sha256:99a6a68527b0faff78d9ae85d3838d7eb350922f8b15d536945c515cd8418c42", size = 1182 }
+
 [[package]]
 name = "matplotlib-inline"
 version = "0.1.7"