Skip to content

Commit

Permalink
new version
Browse files Browse the repository at this point in the history
  • Loading branch information
AntoineJac committed Jun 17, 2024
1 parent ce19b11 commit e88453b
Show file tree
Hide file tree
Showing 7 changed files with 77 additions and 53 deletions.
14 changes: 9 additions & 5 deletions kong/llm/drivers/shared.lua
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,10 @@ local log_entry_keys = {
COST_REQUEST = "cost_request",

-- cache keys
DB_NAME = "db_name",
CACHE_TYPE = "cache_type",
VECTOR_DB = "vector_db",
EMBEDDINGS_PROVIDER = "embeddings_provider",
EMBEDDINGS_MODEL = "embeddings_model",
CACHE_STATUS = "cache_status",
}

local openai_override = os.getenv("OPENAI_TEST_PORT")
Expand Down Expand Up @@ -487,8 +489,10 @@ function _M.post_request(conf, response_object)
[log_entry_keys.COST_REQUEST] = 0,
},
[log_entry_keys.CACHE_CONTAINER] = {
[log_entry_keys.DB_NAME] = "",
[log_entry_keys.CACHE_TYPE] = "not_cached",
[log_entry_keys.VECTOR_DB] = "",
[log_entry_keys.EMBEDDINGS_PROVIDER] = "",
[log_entry_keys.EMBEDDINGS_MODEL] = "",
[log_entry_keys.CACHE_STATUS] = "",
},
}

Expand Down Expand Up @@ -521,7 +525,7 @@ function _M.post_request(conf, response_object)
and conf.model.options.input_cost and conf.model.options.output_cost then
request_analytics_plugin[log_entry_keys.TOKENS_CONTAINER][log_entry_keys.COST_REQUEST] =
(response_object.usage.prompt_tokens * conf.model.options.input_cost
+ response_object.usage.completion_tokens * conf.model.options.output_cost) / 1000
+ response_object.usage.completion_tokens * conf.model.options.output_cost) / 1000000 -- 1 million
end
end

Expand Down
8 changes: 4 additions & 4 deletions kong/llm/schemas/init.lua
Original file line number Diff line number Diff line change
Expand Up @@ -51,14 +51,14 @@ local model_options_schema = {
default = 256 }},
{ input_cost = {
type = "number",
description = "Defines the cost per 1000 tokens in your prompt.",
description = "Defines the cost per 1M tokens in your prompt.",
required = false,
between = { 0.0, 0.1 }}},
gt = 0}},
{ output_cost = {
type = "number",
description = "Defines the cost per 1000 tokens in the output of the AI.",
description = "Defines the cost per 1M tokens in the output of the AI.",
required = false,
between = { 0.0, 0.1 }}},
gt = 0}},
{ temperature = {
type = "number",
description = "Defines the matching temperature, if using chat or completion models.",
Expand Down
50 changes: 32 additions & 18 deletions kong/plugins/prometheus/exporter.lua
Original file line number Diff line number Diff line change
Expand Up @@ -151,15 +151,15 @@ local function init()
if ai_request then
metrics.ai_llm_requests = prometheus:counter("ai_llm_requests_total",
"AI requests total per ai_provider in Kong",
{"ai_provider", "ai_model", "cache", "db_name", "workspace"})
{"ai_provider", "ai_model", "cache", "vector_db", "embeddings_provider", "embeddings_model", "workspace"})

metrics.ai_llm_cost = prometheus:counter("ai_llm_cost_total",
"AI requests cost per ai_provider/cache in Kong",
{"ai_provider", "ai_model", "cache", "db_name", "workspace"})
{"ai_provider", "ai_model", "cache", "vector_db", "embeddings_provider", "embeddings_model", "workspace"})

metrics.ai_llm_tokens = prometheus:counter("ai_llm_tokens_total",
"AI requests cost per ai_provider/cache in Kong",
{"ai_provider", "ai_model", "cache", "db_name", "token_type", "workspace"})
{"ai_provider", "ai_model", "cache", "vector_db", "embeddings_provider", "embeddings_model", "token_type", "workspace"})
end

-- Hybrid mode status
Expand Down Expand Up @@ -225,8 +225,8 @@ local upstream_target_addr_health_table = {
{ value = 0, labels = { 0, 0, 0, "dns_error", ngx.config.subsystem } },
}
-- ai
local labels_table_ai_llm_status = {0, 0, 0, 0, 0}
local labels_table_ai_llm_tokens = {0, 0, 0, 0, 0, 0}
local labels_table_ai_llm_status = {0, 0, 0, 0, 0, 0, 0}
local labels_table_ai_llm_tokens = {0, 0, 0, 0, 0, 0, 0, 0}

local function set_healthiness_metrics(table, upstream, target, address, status, metrics_bucket)
for i = 1, #table do
Expand Down Expand Up @@ -336,21 +336,33 @@ local function log(message, serialized)

if serialized.ai_metrics then
for _, ai_plugin in pairs(serialized.ai_metrics) do
local cache_type
if ai_plugin.cache and ai_plugin.cache.cache_type then
cache_type = ai_plugin.cache.cache_type
local cache_status
if ai_plugin.cache and ai_plugin.cache.cache_status then
cache_status = ai_plugin.cache.cache_status
end

local db_name
if ai_plugin.cache and ai_plugin.cache.db_name then
db_name = ai_plugin.cache.db_name
local vector_db, embeddings_provider, embeddings_model
if ai_plugin.cache then
if ai_plugin.cache.vector_db then
vector_db = ai_plugin.cache.vector_db
end

if ai_plugin.cache.embeddings_provider then
vector_db = ai_plugin.cache.embeddings_provider
end

if ai_plugin.cache.embeddings_model then
vector_db = ai_plugin.cache.embeddings_model
end
end

labels_table_ai_llm_status[1] = ai_plugin.meta.provider_name
labels_table_ai_llm_status[2] = ai_plugin.meta.request_model
labels_table_ai_llm_status[3] = cache_type
labels_table_ai_llm_status[4] = db_name
labels_table_ai_llm_status[5] = workspace
labels_table_ai_llm_status[3] = cache_status
labels_table_ai_llm_status[4] = vector_db
labels_table_ai_llm_status[5] = embeddings_provider
labels_table_ai_llm_status[6] = embeddings_model
labels_table_ai_llm_status[7] = workspace
metrics.ai_llm_requests:inc(1, labels_table_ai_llm_status)

if ai_plugin.usage.cost_request and ai_plugin.usage.cost_request > 0 then
Expand All @@ -359,12 +371,14 @@ local function log(message, serialized)

labels_table_ai_llm_tokens[1] = ai_plugin.meta.provider_name
labels_table_ai_llm_tokens[2] = ai_plugin.meta.request_model
labels_table_ai_llm_tokens[3] = cache_type
labels_table_ai_llm_tokens[4] = db_name
labels_table_ai_llm_tokens[6] = workspace
labels_table_ai_llm_tokens[3] = cache_status
labels_table_ai_llm_tokens[4] = vector_db
labels_table_ai_llm_tokens[5] = embeddings_provider
labels_table_ai_llm_tokens[6] = embeddings_model
labels_table_ai_llm_tokens[8] = workspace

if ai_plugin.usage.prompt_tokens and ai_plugin.usage.prompt_tokens > 0 then
labels_table_ai_llm_tokens[5] = "prompt_tokens"
labels_table_ai_llm_tokens[7] = "prompt_tokens"
metrics.ai_llm_tokens:inc(ai_plugin.usage.prompt_tokens, labels_table_ai_llm_tokens)
end

Expand Down
28 changes: 14 additions & 14 deletions spec/03-plugins/26-prometheus/02-access_spec.lua
Original file line number Diff line number Diff line change
Expand Up @@ -702,8 +702,8 @@ describe("Plugin: prometheus (access) AI metrics", function()
max_tokens = 256,
temperature = 1.0,
upstream_url = "http://"..helpers.mock_upstream_host..":"..MOCK_PORT.."/llm/v1/chat/good",
input_cost = 0.01,
output_cost = 0.01,
input_cost = 10.0,
output_cost = 10.0,
},
},
},
Expand Down Expand Up @@ -764,13 +764,13 @@ describe("Plugin: prometheus (access) AI metrics", function()
assert.matches('kong_nginx_metric_errors_total 0', body, nil, true)
assert.matches('http_requests_total{service="empty_service",route="http-route",code="200",source="service",workspace="default",consumer=""} 1', body, nil, true)

assert.matches('ai_llm_requests_total{ai_provider="openai",ai_model="gpt-3.5-turbo",cache="not_cached",db_name="",workspace="default"} 1', body, nil, true)
assert.matches('ai_llm_requests_total{ai_provider="openai",ai_model="gpt-3.5-turbo",cache="not_cached",vector_db="",embeddings_provider="",embeddings_model="",workspace="default"} 1', body, nil, true)

assert.matches('ai_llm_cost_total{ai_provider="openai",ai_model="gpt-3.5-turbo",cache="not_cached",db_name="",workspace="default"} 0.00037', body, nil, true)
assert.matches('ai_llm_cost_total{ai_provider="openai",ai_model="gpt-3.5-turbo",cache="not_cached",vector_db="",embeddings_provider="",embeddings_model="",workspace="default"} 0.00037', body, nil, true)

assert.matches('ai_llm_tokens_total{ai_provider="openai",ai_model="gpt-3.5-turbo",cache="not_cached",db_name="",token_type="completion_tokens",workspace="default"} 12', body, nil, true)
assert.matches('ai_llm_tokens_total{ai_provider="openai",ai_model="gpt-3.5-turbo",cache="not_cached",db_name="",token_type="prompt_tokens",workspace="default"} 25', body, nil, true)
assert.matches('ai_llm_tokens_total{ai_provider="openai",ai_model="gpt-3.5-turbo",cache="not_cached",db_name="",token_type="total_tokens",workspace="default"} 37', body, nil, true)
assert.matches('ai_llm_tokens_total{ai_provider="openai",ai_model="gpt-3.5-turbo",cache="not_cached",vector_db="",embeddings_provider="",embeddings_model="",token_type="completion_tokens",workspace="default"} 12', body, nil, true)
assert.matches('ai_llm_tokens_total{ai_provider="openai",ai_model="gpt-3.5-turbo",cache="not_cached",vector_db="",embeddings_provider="",embeddings_model="",token_type="prompt_tokens",workspace="default"} 25', body, nil, true)
assert.matches('ai_llm_tokens_total{ai_provider="openai",ai_model="gpt-3.5-turbo",cache="not_cached",vector_db="",embeddings_provider="",embeddings_model="",token_type="total_tokens",workspace="default"} 37', body, nil, true)
end)

it("increments the count for proxied AI requests", function()
Expand Down Expand Up @@ -800,13 +800,13 @@ describe("Plugin: prometheus (access) AI metrics", function()
assert.matches('kong_nginx_metric_errors_total 0', body, nil, true)
assert.matches('http_requests_total{service="empty_service",route="http-route",code="200",source="service",workspace="default",consumer=""} 2', body, nil, true)

assert.matches('ai_llm_requests_total{ai_provider="openai",ai_model="gpt-3.5-turbo",cache="not_cached",db_name="",workspace="default"} 2', body, nil, true)
assert.matches('ai_llm_requests_total{ai_provider="openai",ai_model="gpt-3.5-turbo",cache="not_cached",vector_db="",embeddings_provider="",embeddings_model="",workspace="default"} 2', body, nil, true)

assert.matches('ai_llm_cost_total{ai_provider="openai",ai_model="gpt-3.5-turbo",cache="not_cached",db_name="",workspace="default"} 0.00074', body, nil, true)
assert.matches('ai_llm_cost_total{ai_provider="openai",ai_model="gpt-3.5-turbo",cache="not_cached",vector_db="",embeddings_provider="",embeddings_model="",workspace="default"} 0.00074', body, nil, true)

assert.matches('ai_llm_tokens_total{ai_provider="openai",ai_model="gpt-3.5-turbo",cache="not_cached",db_name="",token_type="completion_tokens",workspace="default"} 24', body, nil, true)
assert.matches('ai_llm_tokens_total{ai_provider="openai",ai_model="gpt-3.5-turbo",cache="not_cached",db_name="",token_type="prompt_tokens",workspace="default"} 50', body, nil, true)
assert.matches('ai_llm_tokens_total{ai_provider="openai",ai_model="gpt-3.5-turbo",cache="not_cached",db_name="",token_type="total_tokens",workspace="default"} 74', body, nil, true)
assert.matches('ai_llm_tokens_total{ai_provider="openai",ai_model="gpt-3.5-turbo",cache="not_cached",vector_db="",embeddings_provider="",embeddings_model="",token_type="completion_tokens",workspace="default"} 24', body, nil, true)
assert.matches('ai_llm_tokens_total{ai_provider="openai",ai_model="gpt-3.5-turbo",cache="not_cached",vector_db="",embeddings_provider="",embeddings_model="",token_type="prompt_tokens",workspace="default"} 50', body, nil, true)
assert.matches('ai_llm_tokens_total{ai_provider="openai",ai_model="gpt-3.5-turbo",cache="not_cached",vector_db="",embeddings_provider="",embeddings_model="",token_type="total_tokens",workspace="default"} 74', body, nil, true)
end)

it("behave correctly if AI metrics are not found", function()
Expand All @@ -832,7 +832,7 @@ describe("Plugin: prometheus (access) AI metrics", function()
assert.matches('http_requests_total{service="empty_service",route="http-route",code="400",source="kong",workspace="default",consumer=""} 1', body, nil, true)
assert.matches('kong_nginx_metric_errors_total 0', body, nil, true)

assert.matches('ai_llm_requests_total{ai_provider="openai",ai_model="gpt-3.5-turbo",cache="not_cached",db_name="",workspace="default"} 2', body, nil, true)
assert.matches('ai_llm_cost_total{ai_provider="openai",ai_model="gpt-3.5-turbo",cache="not_cached",db_name="",workspace="default"} 0.00074', body, nil, true)
assert.matches('ai_llm_requests_total{ai_provider="openai",ai_model="gpt-3.5-turbo",cache="not_cached",vector_db="",embeddings_provider="",embeddings_model="",workspace="default"} 2', body, nil, true)
assert.matches('ai_llm_cost_total{ai_provider="openai",ai_model="gpt-3.5-turbo",cache="not_cached",vector_db="",embeddings_provider="",embeddings_model="",workspace="default"} 0.00074', body, nil, true)
end)
end)
10 changes: 6 additions & 4 deletions spec/03-plugins/38-ai-proxy/02-openai_integration_spec.lua
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,10 @@ local _EXPECTED_CHAT_STATS = {
total_tokens = 37,
},
cache = {
cache_type = 'not_cached',
db_name = '',
cache_status = '',
vector_db = '',
embeddings_provider = '',
embeddings_model = '',
}
},
}
Expand Down Expand Up @@ -253,8 +255,8 @@ for _, strategy in helpers.all_strategies() do if strategy ~= "cassandra" then
max_tokens = 256,
temperature = 1.0,
upstream_url = "http://"..helpers.mock_upstream_host..":"..MOCK_PORT.."/llm/v1/chat/good",
input_cost = 0.01,
output_cost = 0.01,
input_cost = 10.0,
output_cost = 10.0,
},
},
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ local OPENAI_FLAT_RESPONSE = {
max_tokens = 512,
temperature = 0.5,
upstream_url = "http://"..helpers.mock_upstream_host..":"..MOCK_PORT.."/flat",
input_cost = 0.01,
output_cost = 0.01,
input_cost = 10.0,
output_cost = 10.0,
},
},
auth = {
Expand Down Expand Up @@ -131,8 +131,10 @@ local _EXPECTED_CHAT_STATS = {
total_tokens = 37,
},
cache = {
cache_type = 'not_cached',
db_name = '',
cache_status = '',
vector_db = '',
embeddings_provider = '',
embeddings_model = '',
}
},
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ local OPENAI_FLAT_RESPONSE = {
max_tokens = 512,
temperature = 0.5,
upstream_url = "http://"..helpers.mock_upstream_host..":"..MOCK_PORT.."/flat",
input_cost = 0.01,
output_cost = 0.01,
input_cost = 10.0,
output_cost = 10.0,
},
},
auth = {
Expand Down Expand Up @@ -188,8 +188,10 @@ local _EXPECTED_CHAT_STATS = {
total_tokens = 37,
},
cache = {
cache_type = 'not_cached',
db_name = '',
cache_status = '',
vector_db = '',
embeddings_provider = '',
embeddings_model = '',
}
},
}
Expand Down

0 comments on commit e88453b

Please sign in to comment.