From 7a0b8798c704bf4d05be8e89649289a9e47977cc Mon Sep 17 00:00:00 2001
From: iakuf <iakuf@gmail.com>
Date: Thu, 26 Feb 2026 22:10:32 +0800
Subject: [PATCH 1/3] fix(ai-proxy): support Anthropic token field names in
 openai-base driver

When using openai-compatible provider with Anthropic-format endpoints
(e.g. DeepSeek's /anthropic/v1/messages), the response returns
input_tokens/output_tokens instead of prompt_tokens/completion_tokens.

This patch adds fallback support for both field names in both
streaming and non-streaming paths, so token usage statistics work
correctly regardless of which format the upstream LLM returns.

Fixes token stats being 0 when proxying to Anthropic-compatible endpoints.
---
 apisix/plugins/ai-drivers/openai-base.lua | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/apisix/plugins/ai-drivers/openai-base.lua b/apisix/plugins/ai-drivers/openai-base.lua
index 4f279bbc3eab..6fc7e8bcb36d 100644
--- a/apisix/plugins/ai-drivers/openai-base.lua
+++ b/apisix/plugins/ai-drivers/openai-base.lua
@@ -132,8 +132,10 @@ local function read_response(conf, ctx, res, response_filter)
                                             core.json.delay_encode(data.usage))
                         ctx.llm_raw_usage = data.usage
                         ctx.ai_token_usage = {
-                            prompt_tokens = data.usage.prompt_tokens or 0,
-                            completion_tokens = data.usage.completion_tokens or 0,
+                            prompt_tokens = data.usage.prompt_tokens
+                                            or data.usage.input_tokens or 0,
+                            completion_tokens = data.usage.completion_tokens
+                                               or data.usage.output_tokens or 0,
                             total_tokens = data.usage.total_tokens or 0,
                         }
                         ctx.var.llm_prompt_tokens = ctx.ai_token_usage.prompt_tokens
@@ -188,8 +190,10 @@ local function read_response(conf, ctx, res, response_filter)
         ctx.ai_token_usage = {}
         if type(res_body.usage) == "table" then
             ctx.llm_raw_usage = res_body.usage
-            ctx.ai_token_usage.prompt_tokens = res_body.usage.prompt_tokens or 0
-            ctx.ai_token_usage.completion_tokens = res_body.usage.completion_tokens or 0
+            ctx.ai_token_usage.prompt_tokens = res_body.usage.prompt_tokens
+                                               or res_body.usage.input_tokens or 0
+            ctx.ai_token_usage.completion_tokens = res_body.usage.completion_tokens
+                                                   or res_body.usage.output_tokens or 0
             ctx.ai_token_usage.total_tokens = res_body.usage.total_tokens or 0
         end
         ctx.var.llm_prompt_tokens = ctx.ai_token_usage.prompt_tokens or 0

From 03608d902f03ff15ca465b7594bea97ab5eeb667 Mon Sep 17 00:00:00 2001
From: iakuf <iakuf@gmail.com>
Date: Thu, 26 Feb 2026 22:30:32 +0800
Subject: [PATCH 2/3] fix(ai-proxy): compute total_tokens fallback for
 Anthropic format in streaming path

---
 apisix/plugins/ai-drivers/openai-base.lua | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/apisix/plugins/ai-drivers/openai-base.lua b/apisix/plugins/ai-drivers/openai-base.lua
index 6fc7e8bcb36d..5e23c1c995d6 100644
--- a/apisix/plugins/ai-drivers/openai-base.lua
+++ b/apisix/plugins/ai-drivers/openai-base.lua
@@ -131,12 +131,12 @@ local function read_response(conf, ctx, res, response_filter)
                         core.log.info("got token usage from ai service: ",
                                             core.json.delay_encode(data.usage))
                         ctx.llm_raw_usage = data.usage
+                        local pt = data.usage.prompt_tokens or data.usage.input_tokens or 0
+                        local ct = data.usage.completion_tokens or data.usage.output_tokens or 0
                         ctx.ai_token_usage = {
-                            prompt_tokens = data.usage.prompt_tokens
-                                            or data.usage.input_tokens or 0,
-                            completion_tokens = data.usage.completion_tokens
-                                               or data.usage.output_tokens or 0,
-                            total_tokens = data.usage.total_tokens or 0,
+                            prompt_tokens = pt,
+                            completion_tokens = ct,
+                            total_tokens = data.usage.total_tokens or (pt + ct),
                         }
                         ctx.var.llm_prompt_tokens = ctx.ai_token_usage.prompt_tokens
                         ctx.var.llm_completion_tokens = ctx.ai_token_usage.completion_tokens

From d88f36035a6856f1710cc70260aa4313d6ef3a96 Mon Sep 17 00:00:00 2001
From: iakuf <iakuf@gmail.com>
Date: Thu, 26 Feb 2026 22:37:49 +0800
Subject: [PATCH 3/3] fix(ai-proxy): also compute total_tokens fallback in
 non-streaming path

---
 apisix/plugins/ai-drivers/openai-base.lua | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/apisix/plugins/ai-drivers/openai-base.lua b/apisix/plugins/ai-drivers/openai-base.lua
index 5e23c1c995d6..959c14b85c23 100644
--- a/apisix/plugins/ai-drivers/openai-base.lua
+++ b/apisix/plugins/ai-drivers/openai-base.lua
@@ -194,7 +194,9 @@ local function read_response(conf, ctx, res, response_filter)
                                                or res_body.usage.input_tokens or 0
             ctx.ai_token_usage.completion_tokens = res_body.usage.completion_tokens
                                                    or res_body.usage.output_tokens or 0
-            ctx.ai_token_usage.total_tokens = res_body.usage.total_tokens or 0
+            ctx.ai_token_usage.total_tokens = res_body.usage.total_tokens
+                                              or (ctx.ai_token_usage.prompt_tokens
+                                                 + ctx.ai_token_usage.completion_tokens)
         end
         ctx.var.llm_prompt_tokens = ctx.ai_token_usage.prompt_tokens or 0
         ctx.var.llm_completion_tokens = ctx.ai_token_usage.completion_tokens or 0