Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion cli/planoai/config_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,10 +236,19 @@ def validate_and_render_schema():
for routing_preference in model_provider.get("routing_preferences", []):
if routing_preference.get("name") in model_usage_name_keys:
raise Exception(
f"Duplicate routing preference name \"{routing_preference.get('name')}\", please provide unique name for each routing preference"
f'Duplicate routing preference name "{routing_preference.get("name")}", please provide unique name for each routing preference'
)
model_usage_name_keys.add(routing_preference.get("name"))

# Warn if both passthrough_auth and access_key are configured
if model_provider.get("passthrough_auth") and model_provider.get(
"access_key"
):
print(
f"WARNING: Model provider '{model_provider.get('name')}' has both 'passthrough_auth: true' and 'access_key' configured. "
f"The access_key will be ignored and the client's Authorization header will be forwarded instead."
)

model_provider["model"] = model_id
model_provider["provider_interface"] = provider
model_provider_name_set.add(model_provider.get("name"))
Expand Down
14 changes: 10 additions & 4 deletions config/arch_config_schema.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
$schema: "http://json-schema.org/draft-07/schema#"
$schema: 'http://json-schema.org/draft-07/schema#'
type: object
properties:
version:
Expand Down Expand Up @@ -109,12 +109,12 @@ properties:
endpoints:
type: object
patternProperties:
"^[a-zA-Z][a-zA-Z0-9_]*$":
'^[a-zA-Z][a-zA-Z0-9_]*$':
type: object
properties:
endpoint:
type: string
pattern: "^.*$"
pattern: '^.*$'
connect_timeout:
type: string
protocol:
Expand Down Expand Up @@ -143,6 +143,9 @@ properties:
type: boolean
base_url:
type: string
passthrough_auth:
type: boolean
description: "When true, forwards the client's Authorization header to upstream instead of using the configured access_key. Useful for routing to services like LiteLLM that validate their own virtual keys."
http_host:
type: string
provider_interface:
Expand Down Expand Up @@ -187,6 +190,9 @@ properties:
type: boolean
base_url:
type: string
passthrough_auth:
type: boolean
description: "When true, forwards the client's Authorization header to upstream instead of using the configured access_key. Useful for routing to services like LiteLLM that validate their own virtual keys."
http_host:
type: string
provider_interface:
Expand Down Expand Up @@ -219,7 +225,7 @@ properties:
model_aliases:
type: object
patternProperties:
"^.*$":
'^.*$':
type: object
properties:
target:
Expand Down
37 changes: 37 additions & 0 deletions config/test_passthrough.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Test configuration for passthrough_auth feature
# This config demonstrates forwarding client's Authorization header to upstream
# instead of using a configured access_key.
#
# Use case: Deploying Plano in front of LiteLLM, OpenRouter, or other LLM proxies
# that manage their own API key validation.
#
# To test:
# docker build -t plano-passthrough-test .
# docker run -d -p 10000:10000 -v $(pwd)/config/test_passthrough.yaml:/app/arch_config.yaml plano-passthrough-test
#
# curl http://localhost:10000/v1/chat/completions \
# -H "Authorization: Bearer sk-your-virtual-key" \
# -H "Content-Type: application/json" \
# -d '{"model": "gpt-4o", "messages": [{"role": "user", "content": "Hello"}]}'

version: v0.3.0

listeners:
- name: llm
type: model
port: 10000

model_providers:
# Passthrough auth example - forwards client's Authorization header
# Replace base_url with your LiteLLM or proxy endpoint
- model: openai/gpt-4o
base_url: 'https://litellm.example.com'
passthrough_auth: true
default: true

# Example with both passthrough_auth and access_key (access_key will be ignored)
# This configuration will trigger a warning during startup
- model: openai/gpt-4o-mini
base_url: 'https://litellm.example.com'
passthrough_auth: true
access_key: 'this-will-be-ignored'
2 changes: 2 additions & 0 deletions crates/common/src/configuration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,7 @@ pub struct LlmProvider {
pub cluster_name: Option<String>,
pub base_url_path_prefix: Option<String>,
pub internal: Option<bool>,
pub passthrough_auth: Option<bool>,
}

pub trait IntoModels {
Expand Down Expand Up @@ -367,6 +368,7 @@ impl Default for LlmProvider {
cluster_name: None,
base_url_path_prefix: None,
internal: None,
passthrough_auth: None,
}
}
}
Expand Down
32 changes: 22 additions & 10 deletions crates/llm_gateway/src/stream_context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,23 @@ impl StreamContext {
}

fn modify_auth_headers(&mut self) -> Result<(), ServerError> {
if self.llm_provider().passthrough_auth == Some(true) {
// Check if client provided an Authorization header
if self.get_http_request_header("Authorization").is_none() {
warn!(
"[PLANO_REQ_ID:{}] AUTH_PASSTHROUGH: passthrough_auth enabled but no Authorization header present in client request",
self.request_identifier()
);
} else {
debug!(
"[PLANO_REQ_ID:{}] AUTH_PASSTHROUGH: preserving client Authorization header for provider '{}'",
self.request_identifier(),
self.llm_provider().name
);
}
return Ok(());
}

let llm_provider_api_key_value =
self.llm_provider()
.access_key
Expand Down Expand Up @@ -778,16 +795,11 @@ impl HttpContext for StreamContext {
//We need to update the upstream path if there is a variation for a provider like Gemini/Groq, etc.
self.update_upstream_path(&request_path);

if self.llm_provider().endpoint.is_some() {
self.add_http_request_header(
ARCH_ROUTING_HEADER,
&self
.llm_provider()
.cluster_name
.as_ref()
.unwrap()
.to_string(),
);
// Clone cluster_name to avoid borrowing self while calling add_http_request_header (which requires mut self)
let cluster_name_opt = self.llm_provider().cluster_name.clone();

if let Some(cluster_name) = cluster_name_opt {
self.add_http_request_header(ARCH_ROUTING_HEADER, &cluster_name);
} else {
self.add_http_request_header(
ARCH_ROUTING_HEADER,
Expand Down
69 changes: 69 additions & 0 deletions docs/source/concepts/llm_providers/supported_providers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -728,6 +728,75 @@ Configure routing preferences for dynamic model selection:
- name: creative_writing
description: creative content generation, storytelling, and writing assistance

.. _passthrough_auth:

Passthrough Authentication
~~~~~~~~~~~~~~~~~~~~~~~~~~

When deploying Plano in front of LLM proxy services that manage their own API key validation (such as LiteLLM, OpenRouter, or custom gateways), you may want to forward the client's original ``Authorization`` header instead of replacing it with a configured ``access_key``.

The ``passthrough_auth`` option enables this behavior:

.. code-block:: yaml

llm_providers:
# Forward client's Authorization header to LiteLLM
- model: openai/gpt-4o-litellm
base_url: https://litellm.example.com
passthrough_auth: true
default: true

# Forward to OpenRouter
- model: openai/claude-3-opus
base_url: https://openrouter.ai/api/v1
passthrough_auth: true

**How it works:**

1. Client sends a request with ``Authorization: Bearer <virtual-key>``
2. Plano preserves this header instead of replacing it with ``access_key``
3. The upstream service (e.g., LiteLLM) validates the virtual key
4. Response flows back through Plano to the client

**Use Cases:**

- **LiteLLM Integration**: Route requests to LiteLLM which manages virtual keys and rate limits
- **OpenRouter**: Forward requests to OpenRouter with per-user API keys
- **Custom API Gateways**: Integrate with internal gateways that have their own authentication
- **Multi-tenant Deployments**: Allow different clients to use their own credentials

**Important Notes:**

- When ``passthrough_auth: true`` is set, the ``access_key`` field is ignored (a warning is logged if both are configured)
- If the client doesn't provide an ``Authorization`` header, the request is forwarded without authentication (upstream will likely return 401)
- The ``base_url`` is typically required when using ``passthrough_auth``

**Configuration with LiteLLM example:**

.. code-block:: yaml

# plano_config.yaml
version: v0.3.0

listeners:
- name: llm
type: model
port: 10000

model_providers:
- model: openai/gpt-4o
base_url: https://litellm.example.com
passthrough_auth: true
default: true

.. code-block:: bash

# Client request - virtual key is forwarded to upstream
curl http://localhost:10000/v1/chat/completions \
-H "Authorization: Bearer sk-litellm-virtual-key-abc123" \
-H "Content-Type: application/json" \
-d '{"model": "gpt-4o", "messages": [{"role": "user", "content": "Hello"}]}'

Model Selection Guidelines
--------------------------

Expand Down
20 changes: 9 additions & 11 deletions docs/source/resources/includes/arch_config_full_reference.yaml
Original file line number Diff line number Diff line change
@@ -1,26 +1,22 @@

# Arch Gateway configuration version
version: v0.3.0


# External HTTP agents - API type is controlled by request path (/v1/responses, /v1/messages, /v1/chat/completions)
agents:
- id: weather_agent # Example agent for weather
- id: weather_agent # Example agent for weather
url: http://host.docker.internal:10510

- id: flight_agent # Example agent for flights
- id: flight_agent # Example agent for flights
url: http://host.docker.internal:10520


# MCP filters applied to requests/responses (e.g., input validation, query rewriting)
filters:
- id: input_guards # Example filter for input validation
- id: input_guards # Example filter for input validation
url: http://host.docker.internal:10500
# type: mcp (default)
# transport: streamable-http (default)
# tool: input_guards (default - same as filter id)


# LLM provider configurations with API keys and model routing
model_providers:
- model: openai/gpt-4o
Expand All @@ -36,6 +32,12 @@ model_providers:
- model: mistral/ministral-3b-latest
access_key: $MISTRAL_API_KEY

# Example: Passthrough authentication for LiteLLM or similar proxies
# When passthrough_auth is true, client's Authorization header is forwarded
# instead of using the configured access_key
- model: openai/gpt-4o-litellm
base_url: https://litellm.example.com
passthrough_auth: true

# Model aliases - use friendly names instead of full provider model names
model_aliases:
Expand All @@ -45,7 +47,6 @@ model_aliases:
smart-llm:
target: gpt-4o


# HTTP listeners - entry points for agent routing, prompt targets, and direct LLM access
listeners:
# Agent listener for routing requests to multiple agents
Expand Down Expand Up @@ -73,7 +74,6 @@ listeners:
port: 10000
# This listener is used for prompt_targets and function calling


# Reusable service endpoints
endpoints:
app_server:
Expand All @@ -83,7 +83,6 @@ endpoints:
mistral_local:
endpoint: 127.0.0.1:8001


# Prompt targets for function calling and API orchestration
prompt_targets:
- name: get_current_weather
Expand All @@ -103,7 +102,6 @@ prompt_targets:
path: /weather
http_method: POST


# OpenTelemetry tracing configuration
tracing:
# Random sampling percentage (1-100)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,15 @@ listeners:
model: ministral-3b-latest
name: mistral/ministral-3b-latest
provider_interface: mistral
- base_url: https://litellm.example.com
cluster_name: openai_litellm.example.com
endpoint: litellm.example.com
model: gpt-4o-litellm
name: openai/gpt-4o-litellm
passthrough_auth: true
port: 443
protocol: https
provider_interface: openai
name: egress_traffic
port: 12000
timeout: 30s
Expand Down Expand Up @@ -91,6 +100,15 @@ model_providers:
model: ministral-3b-latest
name: mistral/ministral-3b-latest
provider_interface: mistral
- base_url: https://litellm.example.com
cluster_name: openai_litellm.example.com
endpoint: litellm.example.com
model: gpt-4o-litellm
name: openai/gpt-4o-litellm
passthrough_auth: true
port: 443
protocol: https
provider_interface: openai
- internal: true
model: Arch-Function
name: arch-function
Expand Down
Loading