diff --git a/cli/planoai/config_generator.py b/cli/planoai/config_generator.py index 3b920181a..636e2d222 100644 --- a/cli/planoai/config_generator.py +++ b/cli/planoai/config_generator.py @@ -236,10 +236,19 @@ def validate_and_render_schema(): for routing_preference in model_provider.get("routing_preferences", []): if routing_preference.get("name") in model_usage_name_keys: raise Exception( - f"Duplicate routing preference name \"{routing_preference.get('name')}\", please provide unique name for each routing preference" + f'Duplicate routing preference name "{routing_preference.get("name")}", please provide unique name for each routing preference' ) model_usage_name_keys.add(routing_preference.get("name")) + # Warn if both passthrough_auth and access_key are configured + if model_provider.get("passthrough_auth") and model_provider.get( + "access_key" + ): + print( + f"WARNING: Model provider '{model_provider.get('name')}' has both 'passthrough_auth: true' and 'access_key' configured. " + f"The access_key will be ignored and the client's Authorization header will be forwarded instead." + ) + model_provider["model"] = model_id model_provider["provider_interface"] = provider model_provider_name_set.add(model_provider.get("name")) diff --git a/config/arch_config_schema.yaml b/config/arch_config_schema.yaml index 78856adfb..71a4e3e96 100644 --- a/config/arch_config_schema.yaml +++ b/config/arch_config_schema.yaml @@ -1,4 +1,4 @@ -$schema: "http://json-schema.org/draft-07/schema#" +$schema: 'http://json-schema.org/draft-07/schema#' type: object properties: version: @@ -109,12 +109,12 @@ properties: endpoints: type: object patternProperties: - "^[a-zA-Z][a-zA-Z0-9_]*$": + '^[a-zA-Z][a-zA-Z0-9_]*$': type: object properties: endpoint: type: string - pattern: "^.*$" + pattern: '^.*$' connect_timeout: type: string protocol: @@ -143,6 +143,9 @@ properties: type: boolean base_url: type: string + passthrough_auth: + type: boolean + description: "When true, forwards the client's Authorization header to upstream instead of using the configured access_key. Useful for routing to services like LiteLLM that validate their own virtual keys." http_host: type: string provider_interface: @@ -187,6 +190,9 @@ properties: type: boolean base_url: type: string + passthrough_auth: + type: boolean + description: "When true, forwards the client's Authorization header to upstream instead of using the configured access_key. Useful for routing to services like LiteLLM that validate their own virtual keys." http_host: type: string provider_interface: @@ -219,7 +225,7 @@ properties: model_aliases: type: object patternProperties: - "^.*$": + '^.*$': type: object properties: target: diff --git a/config/test_passthrough.yaml b/config/test_passthrough.yaml new file mode 100644 index 000000000..7e59370e9 --- /dev/null +++ b/config/test_passthrough.yaml @@ -0,0 +1,37 @@ +# Test configuration for passthrough_auth feature +# This config demonstrates forwarding client's Authorization header to upstream +# instead of using a configured access_key. +# +# Use case: Deploying Plano in front of LiteLLM, OpenRouter, or other LLM proxies +# that manage their own API key validation. +# +# To test: +# docker build -t plano-passthrough-test . +# docker run -d -p 10000:10000 -v $(pwd)/config/test_passthrough.yaml:/app/arch_config.yaml plano-passthrough-test +# +# curl http://localhost:10000/v1/chat/completions \ +# -H "Authorization: Bearer sk-your-virtual-key" \ +# -H "Content-Type: application/json" \ +# -d '{"model": "gpt-4o", "messages": [{"role": "user", "content": "Hello"}]}' + +version: v0.3.0 + +listeners: + - name: llm + type: model + port: 10000 + +model_providers: + # Passthrough auth example - forwards client's Authorization header + # Replace base_url with your LiteLLM or proxy endpoint + - model: openai/gpt-4o + base_url: 'https://litellm.example.com' + passthrough_auth: true + default: true + + # Example with both passthrough_auth and access_key (access_key will be ignored) + # This configuration will trigger a warning during startup + - model: openai/gpt-4o-mini + base_url: 'https://litellm.example.com' + passthrough_auth: true + access_key: 'this-will-be-ignored' diff --git a/crates/common/src/configuration.rs b/crates/common/src/configuration.rs index 58ea1e3e6..60fd20d04 100644 --- a/crates/common/src/configuration.rs +++ b/crates/common/src/configuration.rs @@ -324,6 +324,7 @@ pub struct LlmProvider { pub cluster_name: Option, pub base_url_path_prefix: Option, pub internal: Option, + pub passthrough_auth: Option, } pub trait IntoModels { @@ -367,6 +368,7 @@ impl Default for LlmProvider { cluster_name: None, base_url_path_prefix: None, internal: None, + passthrough_auth: None, } } } diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs index 420a10353..8da0f92af 100644 --- a/crates/llm_gateway/src/stream_context.rs +++ b/crates/llm_gateway/src/stream_context.rs @@ -149,6 +149,23 @@ impl StreamContext { } fn modify_auth_headers(&mut self) -> Result<(), ServerError> { + if self.llm_provider().passthrough_auth == Some(true) { + // Check if client provided an Authorization header + if self.get_http_request_header("Authorization").is_none() { + warn!( + "[PLANO_REQ_ID:{}] AUTH_PASSTHROUGH: passthrough_auth enabled but no Authorization header present in client request", + self.request_identifier() + ); + } else { + debug!( + "[PLANO_REQ_ID:{}] AUTH_PASSTHROUGH: preserving client Authorization header for provider '{}'", + self.request_identifier(), + self.llm_provider().name + ); + } + return Ok(()); + } + let llm_provider_api_key_value = self.llm_provider() .access_key @@ -778,16 +795,11 @@ impl HttpContext for StreamContext { //We need to update the upstream path if there is a variation for a provider like Gemini/Groq, etc. self.update_upstream_path(&request_path); - if self.llm_provider().endpoint.is_some() { - self.add_http_request_header( - ARCH_ROUTING_HEADER, - &self - .llm_provider() - .cluster_name - .as_ref() - .unwrap() - .to_string(), - ); + // Clone cluster_name to avoid borrowing self while calling add_http_request_header (which requires mut self) + let cluster_name_opt = self.llm_provider().cluster_name.clone(); + + if let Some(cluster_name) = cluster_name_opt { + self.add_http_request_header(ARCH_ROUTING_HEADER, &cluster_name); } else { self.add_http_request_header( ARCH_ROUTING_HEADER, diff --git a/docs/source/concepts/llm_providers/supported_providers.rst b/docs/source/concepts/llm_providers/supported_providers.rst index acdb8381d..188f35a01 100644 --- a/docs/source/concepts/llm_providers/supported_providers.rst +++ b/docs/source/concepts/llm_providers/supported_providers.rst @@ -728,6 +728,75 @@ Configure routing preferences for dynamic model selection: - name: creative_writing description: creative content generation, storytelling, and writing assistance +.. _passthrough_auth: + +Passthrough Authentication +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +When deploying Plano in front of LLM proxy services that manage their own API key validation (such as LiteLLM, OpenRouter, or custom gateways), you may want to forward the client's original ``Authorization`` header instead of replacing it with a configured ``access_key``. + +The ``passthrough_auth`` option enables this behavior: + +.. code-block:: yaml + + llm_providers: + # Forward client's Authorization header to LiteLLM + - model: openai/gpt-4o-litellm + base_url: https://litellm.example.com + passthrough_auth: true + default: true + + # Forward to OpenRouter + - model: openai/claude-3-opus + base_url: https://openrouter.ai/api/v1 + passthrough_auth: true + +**How it works:** + +1. Client sends a request with ``Authorization: Bearer `` +2. Plano preserves this header instead of replacing it with ``access_key`` +3. The upstream service (e.g., LiteLLM) validates the virtual key +4. Response flows back through Plano to the client + +**Use Cases:** + +- **LiteLLM Integration**: Route requests to LiteLLM which manages virtual keys and rate limits +- **OpenRouter**: Forward requests to OpenRouter with per-user API keys +- **Custom API Gateways**: Integrate with internal gateways that have their own authentication +- **Multi-tenant Deployments**: Allow different clients to use their own credentials + +**Important Notes:** + +- When ``passthrough_auth: true`` is set, the ``access_key`` field is ignored (a warning is logged if both are configured) +- If the client doesn't provide an ``Authorization`` header, the request is forwarded without authentication (upstream will likely return 401) +- The ``base_url`` is typically required when using ``passthrough_auth`` + +**Configuration with LiteLLM example:** + +.. code-block:: yaml + + # plano_config.yaml + version: v0.3.0 + + listeners: + - name: llm + type: model + port: 10000 + + model_providers: + - model: openai/gpt-4o + base_url: https://litellm.example.com + passthrough_auth: true + default: true + +.. code-block:: bash + + # Client request - virtual key is forwarded to upstream + curl http://localhost:10000/v1/chat/completions \ + -H "Authorization: Bearer sk-litellm-virtual-key-abc123" \ + -H "Content-Type: application/json" \ + -d '{"model": "gpt-4o", "messages": [{"role": "user", "content": "Hello"}]}' + Model Selection Guidelines -------------------------- diff --git a/docs/source/resources/includes/arch_config_full_reference.yaml b/docs/source/resources/includes/arch_config_full_reference.yaml index aa186c267..be3c18a21 100644 --- a/docs/source/resources/includes/arch_config_full_reference.yaml +++ b/docs/source/resources/includes/arch_config_full_reference.yaml @@ -1,26 +1,22 @@ - # Arch Gateway configuration version version: v0.3.0 - # External HTTP agents - API type is controlled by request path (/v1/responses, /v1/messages, /v1/chat/completions) agents: - - id: weather_agent # Example agent for weather + - id: weather_agent # Example agent for weather url: http://host.docker.internal:10510 - - id: flight_agent # Example agent for flights + - id: flight_agent # Example agent for flights url: http://host.docker.internal:10520 - # MCP filters applied to requests/responses (e.g., input validation, query rewriting) filters: - - id: input_guards # Example filter for input validation + - id: input_guards # Example filter for input validation url: http://host.docker.internal:10500 # type: mcp (default) # transport: streamable-http (default) # tool: input_guards (default - same as filter id) - # LLM provider configurations with API keys and model routing model_providers: - model: openai/gpt-4o @@ -36,6 +32,12 @@ model_providers: - model: mistral/ministral-3b-latest access_key: $MISTRAL_API_KEY + # Example: Passthrough authentication for LiteLLM or similar proxies + # When passthrough_auth is true, client's Authorization header is forwarded + # instead of using the configured access_key + - model: openai/gpt-4o-litellm + base_url: https://litellm.example.com + passthrough_auth: true # Model aliases - use friendly names instead of full provider model names model_aliases: @@ -45,7 +47,6 @@ model_aliases: smart-llm: target: gpt-4o - # HTTP listeners - entry points for agent routing, prompt targets, and direct LLM access listeners: # Agent listener for routing requests to multiple agents @@ -73,7 +74,6 @@ listeners: port: 10000 # This listener is used for prompt_targets and function calling - # Reusable service endpoints endpoints: app_server: @@ -83,7 +83,6 @@ endpoints: mistral_local: endpoint: 127.0.0.1:8001 - # Prompt targets for function calling and API orchestration prompt_targets: - name: get_current_weather @@ -103,7 +102,6 @@ prompt_targets: path: /weather http_method: POST - # OpenTelemetry tracing configuration tracing: # Random sampling percentage (1-100) diff --git a/docs/source/resources/includes/arch_config_full_reference_rendered.yaml b/docs/source/resources/includes/arch_config_full_reference_rendered.yaml index 4ba89a926..a33878b64 100644 --- a/docs/source/resources/includes/arch_config_full_reference_rendered.yaml +++ b/docs/source/resources/includes/arch_config_full_reference_rendered.yaml @@ -64,6 +64,15 @@ listeners: model: ministral-3b-latest name: mistral/ministral-3b-latest provider_interface: mistral + - base_url: https://litellm.example.com + cluster_name: openai_litellm.example.com + endpoint: litellm.example.com + model: gpt-4o-litellm + name: openai/gpt-4o-litellm + passthrough_auth: true + port: 443 + protocol: https + provider_interface: openai name: egress_traffic port: 12000 timeout: 30s @@ -91,6 +100,15 @@ model_providers: model: ministral-3b-latest name: mistral/ministral-3b-latest provider_interface: mistral +- base_url: https://litellm.example.com + cluster_name: openai_litellm.example.com + endpoint: litellm.example.com + model: gpt-4o-litellm + name: openai/gpt-4o-litellm + passthrough_auth: true + port: 443 + protocol: https + provider_interface: openai - internal: true model: Arch-Function name: arch-function