diff --git a/integrations/anthropic-sdk.mdx b/integrations/anthropic-sdk.mdx index 8c719c3..b4c393b 100644 --- a/integrations/anthropic-sdk.mdx +++ b/integrations/anthropic-sdk.mdx @@ -44,11 +44,11 @@ The Anthropic SDK provides official Python and TypeScript clients for interactin ] ) - print(message.content[0].text) + print(message.content) - # Access token usage and cost metrics - print(f"Tokens saved: {message.usage.input_tokens_original - message.usage.input_tokens}") - print(f"Total tokens: {message.usage.input_tokens + message.usage.output_tokens}") + # Access token usage + print(f"Input tokens: {message.usage.input_tokens}") + print(f"Output tokens: {message.usage.output_tokens}") ``` @@ -71,11 +71,11 @@ The Anthropic SDK provides official Python and TypeScript clients for interactin ] }); - console.log(message.content[0].text); + console.log(message.content); - // Access token usage and cost metrics - console.log(`Tokens saved: ${message.usage.input_tokens_original - message.usage.input_tokens}`); - console.log(`Total tokens: ${message.usage.input_tokens + message.usage.output_tokens}`); + // Access token usage + console.log(`Input tokens: ${message.usage.input_tokens}`); + console.log(`Output tokens: ${message.usage.output_tokens}`); ``` @@ -136,9 +136,9 @@ Stream responses for real-time token delivery: -## Cost Tracking & Compression +## Token Usage Tracking -Every Edgee response includes token compression metrics in a dedicated `compression` field: +Access standard Anthropic token usage metrics in every response: @@ -146,7 +146,7 @@ Every Edgee response includes token compression metrics in a dedicated `compress from anthropic import Anthropic client = Anthropic( - base_url="https://api.edgee.ai/v1", + base_url="https://api.edgee.ai", api_key=os.environ.get("EDGEE_API_KEY"), ) @@ -156,15 +156,9 @@ Every Edgee response includes token compression metrics in a dedicated `compress messages=[{"role": "user", "content": "Analyze this long document..."}] ) - print(message.content[0].text) - - # Compression metrics (if compression was applied) - if hasattr(message, 'compression') and message.compression: - compression = message.compression - print(f"Original input tokens: {compression.input_tokens}") - print(f"Compressed input tokens: {message.usage.input_tokens}") - print(f"Tokens saved: {compression.saved_tokens}") - print(f"Compression rate: {compression.rate * 100:.1f}%") + print(message.content) + print(f"Input tokens: {message.usage.input_tokens}") + print(f"Output tokens: {message.usage.output_tokens}") ``` @@ -173,7 +167,7 @@ Every Edgee response includes token compression metrics in a dedicated `compress import Anthropic from '@anthropic-ai/sdk'; const client = new Anthropic({ - baseURL: 'https://api.edgee.ai/v1', + baseURL: 'https://api.edgee.ai', apiKey: process.env.EDGEE_API_KEY, }); @@ -183,24 +177,119 @@ Every Edgee response includes token compression metrics in a dedicated `compress messages: [{ role: 'user', content: 'Analyze this long document...' }] }); - console.log(message.content[0].text); - - // Compression metrics (if compression was applied) - if (message.compression) { - const compression = message.compression; - console.log(`Original input tokens: ${compression.input_tokens}`); - console.log(`Compressed input tokens: ${message.usage.input_tokens}`); - console.log(`Tokens saved: ${compression.saved_tokens}`); - console.log(`Compression rate: ${(compression.rate * 100).toFixed(1)}%`); - } + console.log(message.content); + console.log(`Input tokens: ${message.usage.input_tokens}`); + console.log(`Output tokens: ${message.usage.output_tokens}`); ``` - Edgee extends the Anthropic API response with a `compression` field containing compression metrics (`input_tokens`, `saved_tokens`, `rate`). All standard Anthropic fields remain unchanged. + When compression is enabled, `input_tokens` reflects the compressed token count. View detailed compression metrics in the [Edgee dashboard](/features/observability). +## Compression & Tags via Headers + +When using the Anthropic SDK with Edgee, you can control token compression and add tags using HTTP headers: + +### Enabling Compression + + + + ```python + from anthropic import Anthropic + + client = Anthropic( + base_url="https://api.edgee.ai", + api_key=os.environ.get("EDGEE_API_KEY"), + default_headers={ + "x-edgee-enable-compression": "true", + "x-edgee-compression-rate": "0.8", # Target 80% compression (0.0-1.0) + } + ) + + # All requests will use compression with 80% target rate + message = client.messages.create( + model="claude-sonnet-4.5", + max_tokens=1024, + messages=[{"role": "user", "content": "Analyze this document..."}] + ) + ``` + + + + ```typescript + import Anthropic from '@anthropic-ai/sdk'; + + const client = new Anthropic({ + baseURL: 'https://api.edgee.ai', + apiKey: process.env.EDGEE_API_KEY, + defaultHeaders: { + 'x-edgee-enable-compression': 'true', + 'x-edgee-compression-rate': '0.8', // Target 80% compression (0.0-1.0) + } + }); + + // All requests will use compression + const message = await client.messages.create({ + model: 'claude-sonnet-4.5', + max_tokens: 1024, + messages: [{ role: 'user', content: 'Analyze this document...' }] + }); + ``` + + + +### Adding Tags for Analytics + +Combine compression with tags to track requests in your dashboard: + + + + ```python + from anthropic import Anthropic + + client = Anthropic( + base_url="https://api.edgee.ai", + api_key=os.environ.get("EDGEE_API_KEY"), + default_headers={ + "x-edgee-enable-compression": "true", + "x-edgee-compression-rate": "0.8", + "x-edgee-tags": "production,anthropic-sdk,user-123" + } + ) + ``` + + + + ```typescript + import Anthropic from '@anthropic-ai/sdk'; + + const client = new Anthropic({ + baseURL: 'https://api.edgee.ai', + apiKey: process.env.EDGEE_API_KEY, + defaultHeaders: { + 'x-edgee-enable-compression': 'true', + 'x-edgee-compression-rate': '0.8', + 'x-edgee-tags': 'production,anthropic-sdk,user-123' + } + }); + ``` + + + +**Available Headers:** + +| Header | Type | Description | +|--------|------|-------------| +| `x-edgee-enable-compression` | `"true"` or `"false"` | Enable token compression for requests (overrides console settings) | +| `x-edgee-compression-rate` | `string` | Target compression rate (0.0-1.0, default 0.75) | +| `x-edgee-tags` | `string` | Comma-separated tags for analytics and filtering | + + + You can also enable compression organization-wide or per API key in the [Edgee console](/features/token-compression#enabling-token-compression). Headers override console settings for specific requests. + + ## Multi-Provider Access With Edgee, you can access models from multiple providers using the same Anthropic SDK client and compare costs across providers: @@ -359,58 +448,6 @@ Use Claude's tool calling with Edgee: -## Tags for Observability - -Add custom tags to track and filter requests in Edgee's dashboard: - - - - ```python - from anthropic import Anthropic - - client = Anthropic( - base_url="https://api.edgee.ai", - api_key=os.environ.get("EDGEE_API_KEY"), - default_headers={ - "x-edgee-tags": "production,anthropic-sdk,user-123" - } - ) - - # All requests from this client will include these tags - message = client.messages.create( - model="claude-sonnet-4.5", - max_tokens=1024, - messages=[{"role": "user", "content": "Hello!"}] - ) - ``` - - - - ```typescript - import Anthropic from '@anthropic-ai/sdk'; - - const client = new Anthropic({ - baseURL: 'https://api.edgee.ai', - apiKey: process.env.EDGEE_API_KEY, - defaultHeaders: { - 'x-edgee-tags': 'production,anthropic-sdk,user-123' - } - }); - - // All requests from this client will include these tags - const message = await client.messages.create({ - model: 'claude-sonnet-4.5', - max_tokens: 1024, - messages: [{ role: 'user', content: 'Hello!' }] - }); - ``` - - - - - Tags are comma-separated strings that help you categorize and filter requests in Edgee's analytics dashboard. - - ## Error Handling and Retries The Anthropic SDK includes built-in retry logic, which works seamlessly with Edgee's automatic failover: @@ -432,7 +469,7 @@ The Anthropic SDK includes built-in retry logic, which works seamlessly with Edg max_tokens=1024, messages=[{"role": "user", "content": "Hello!"}] ) - print(message.content[0].text) + print(message.content) except APIError as e: print(f"API Error: {e}") ``` @@ -454,7 +491,7 @@ The Anthropic SDK includes built-in retry logic, which works seamlessly with Edg max_tokens: 1024, messages: [{ role: 'user', content: 'Hello!' }] }); - console.log(message.content[0].text); + console.log(message.content); } catch (error) { console.error('API Error:', error); } diff --git a/integrations/claude-code.mdx b/integrations/claude-code.mdx index 1601d02..746e3c2 100644 --- a/integrations/claude-code.mdx +++ b/integrations/claude-code.mdx @@ -69,30 +69,44 @@ claude --model gpt-4o "Refactor this function" ## Advanced Configuration -### Custom Headers +### Compression & Tags via Headers -You can add custom headers for analytics and filtering by setting additional environment variables: +Control token compression and add tags using HTTP headers: + +**Using Environment Variables:** ```bash -export ANTHROPIC_HEADERS='{"x-edgee-tags": "development,claude-code,team-backend"}' +export ANTHROPIC_HEADERS='{ + "x-edgee-enable-compression": "true", + "x-edgee-compression-rate": "0.8", + "x-edgee-tags": "development,claude-code,team-backend" +}' ``` -### Tags for Observability - -Tags help you categorize and filter requests in Edgee's analytics dashboard: +**Using Settings File (`~/.claude/settings.json`):** ```json { "apiEndpoint": "https://api.edgee.ai", "apiKey": "sk-edgee-...", "customHeaders": { + "x-edgee-enable-compression": "true", + "x-edgee-compression-rate": "0.8", "x-edgee-tags": "production,claude-code,user-123" } } ``` +**Available Headers:** + +| Header | Type | Description | +|--------|------|-------------| +| `x-edgee-enable-compression` | `"true"` or `"false"` | Enable token compression (overrides console settings) | +| `x-edgee-compression-rate` | `string` | Target compression rate (0.0-1.0, default 0.75) | +| `x-edgee-tags` | `string` | Comma-separated tags for analytics and filtering | + - Tags are comma-separated strings that appear in your Edgee observability dashboard, making it easy to track Claude Code usage separately from other applications. + You can also enable compression organization-wide or per API key in the [Edgee console](/features/token-compression#enabling-token-compression). Headers override console settings. ## Streaming Responses diff --git a/integrations/langchain.mdx b/integrations/langchain.mdx index 5931fde..dc7ae70 100644 --- a/integrations/langchain.mdx +++ b/integrations/langchain.mdx @@ -114,6 +114,38 @@ uv run langchain_script.py \ --system "You are a creative poet" ``` +## Compression & Tags via Headers + +Control token compression and add tags for observability using the `default_headers` parameter: + +```python +from langchain_openai import ChatOpenAI +import os + +llm = ChatOpenAI( + base_url="https://api.edgee.ai/v1", + api_key=os.getenv("API_KEY"), + model="gpt-4o", + default_headers={ + "x-edgee-enable-compression": "true", + "x-edgee-compression-rate": "0.8", # Target 80% compression + "x-edgee-tags": "production,langchain,rag-pipeline", + } +) +``` + +**Available Headers:** + +| Header | Type | Description | +|--------|------|-------------| +| `x-edgee-enable-compression` | `"true"` or `"false"` | Enable token compression (overrides console settings) | +| `x-edgee-compression-rate` | `string` | Target compression rate (0.0-1.0, default 0.75) | +| `x-edgee-tags` | `string` | Comma-separated tags for analytics and filtering | + + + You can also enable compression organization-wide or per API key in the [Edgee console](/features/token-compression#enabling-token-compression). Headers override console settings. + + ## Advanced Features ### Chains diff --git a/integrations/openai-sdk.mdx b/integrations/openai-sdk.mdx index 9657d4c..80eebf3 100644 --- a/integrations/openai-sdk.mdx +++ b/integrations/openai-sdk.mdx @@ -9,13 +9,12 @@ Edgee provides an **OpenAI-compatible API**, which means you can use the officia ## Why Use OpenAI SDK with Edgee? -- **Up to 50% Cost Reduction**: Automatic token compression on every request -- **Real-Time Savings**: See exactly how many tokens and dollars you've saved +- **Up to 50% Cost Reduction**: Automatic token compression when enabled via headers or console - **No Code Changes**: Use your existing OpenAI SDK code as-is - **Multi-Provider Access**: Route to OpenAI, Anthropic, Google, and more through one API - **Automatic Failover**: Built-in reliability with fallback providers -- **Cost Tracking**: Real-time visibility into token usage and costs -- **Observability**: Request tracing and logging across all providers +- **Cost Tracking**: Real-time visibility into token usage in the Edgee dashboard +- **Observability**: Request tracing, compression metrics, and logging across all providers ## Installation @@ -87,9 +86,9 @@ print(completion.choices[0].message.content) -## Cost Tracking & Compression +## Token Usage Tracking -Every response includes token compression and cost metrics through the standard OpenAI `usage` field: +Access standard OpenAI token usage metrics in every response: @@ -109,13 +108,9 @@ const completion = await openai.chat.completions.create({ }); console.log(completion.choices[0].message.content); - -// Access compression metrics (if compression was applied) -if (completion.compression) { - console.log(`Tokens saved: ${completion.compression.saved_tokens}`); - console.log(`Compression rate: ${(completion.compression.rate * 100).toFixed(1)}%`); -} -console.log(`Total tokens: ${completion.usage.total_tokens}`); +console.log(`Prompt tokens: ${completion.usage?.prompt_tokens}`); +console.log(`Completion tokens: ${completion.usage?.completion_tokens}`); +console.log(`Total tokens: ${completion.usage?.total_tokens}`); ``` ```python title="Python" @@ -135,20 +130,119 @@ completion = client.chat.completions.create( ) print(completion.choices[0].message.content) - -# Access compression metrics (if compression was applied) -if hasattr(completion, 'compression') and completion.compression: - print(f"Tokens saved: {completion.compression.saved_tokens}") - print(f"Compression rate: {completion.compression.rate * 100:.1f}%") +print(f"Prompt tokens: {completion.usage.prompt_tokens}") +print(f"Completion tokens: {completion.usage.completion_tokens}") print(f"Total tokens: {completion.usage.total_tokens}") ``` - Edgee extends the OpenAI API response with a `compression` field containing compression metrics (`input_tokens`, `saved_tokens`, `rate`). All standard OpenAI fields remain unchanged. + When compression is enabled, `prompt_tokens` reflects the compressed token count. View detailed compression metrics in the [Edgee dashboard](/features/observability). +## Compression & Tags via Headers + +When using the OpenAI SDK with Edgee, you can control token compression and add tags using HTTP headers: + +### Enabling Compression + + + +```typescript title="TypeScript" +import OpenAI from "openai"; + +const openai = new OpenAI({ + baseURL: "https://api.edgee.ai/v1", + apiKey: process.env.EDGEE_API_KEY, + defaultHeaders: { + "x-edgee-enable-compression": "true", + "x-edgee-compression-rate": "0.8", // Target 80% compression (0.0-1.0) + }, +}); + +// All requests will use compression with 80% target rate +const completion = await openai.chat.completions.create({ + model: "gpt-4o", + messages: [ + { role: "user", content: "Analyze this long document..." } + ], +}); +``` + +```python title="Python" +from openai import OpenAI +from os import getenv + +client = OpenAI( + base_url="https://api.edgee.ai/v1", + api_key=getenv("EDGEE_API_KEY"), + default_headers={ + "x-edgee-enable-compression": "true", + "x-edgee-compression-rate": "0.8", # Target 80% compression (0.0-1.0) + }, +) + +# All requests will use compression +completion = client.chat.completions.create( + model="gpt-4o", + messages=[ + {"role": "user", "content": "Analyze this long document..."} + ], +) +``` + + + +### Adding Tags for Analytics + +Combine compression with tags to track requests in your dashboard: + + + +```typescript title="TypeScript" +import OpenAI from "openai"; + +const openai = new OpenAI({ + baseURL: "https://api.edgee.ai/v1", + apiKey: process.env.EDGEE_API_KEY, + defaultHeaders: { + "x-edgee-enable-compression": "true", + "x-edgee-compression-rate": "0.8", + "x-edgee-tags": "production,openai-sdk,user-123", + }, +}); +``` + +```python title="Python" +from openai import OpenAI +from os import getenv + +client = OpenAI( + base_url="https://api.edgee.ai/v1", + api_key=getenv("EDGEE_API_KEY"), + default_headers={ + "x-edgee-enable-compression": "true", + "x-edgee-compression-rate": "0.8", + "x-edgee-tags": "production,openai-sdk,user-123", + }, +) +``` + + + +**Available Headers:** + +| Header | Type | Description | +|--------|------|-------------| +| `x-edgee-enable-compression` | `"true"` or `"false"` | Enable token compression for requests (overrides console settings) | +| `x-edgee-compression-rate` | `string` | Target compression rate (0.0-1.0, default 0.75) | +| `x-edgee-tags` | `string` | Comma-separated tags for analytics and filtering | + + + You can also enable compression organization-wide or per API key in the [Edgee console](/features/token-compression#enabling-token-compression). Headers override console settings for specific requests. + + ## Advanced Usage ### Function Calling (Tools) @@ -250,63 +344,6 @@ else: -### Tags - -You can add tags to your requests for analytics and filtering using the `x-edgee-tags` header: - - - -```typescript title="TypeScript" -import OpenAI from "openai"; - -const openai = new OpenAI({ - baseURL: "https://api.edgee.ai/v1", - apiKey: process.env.EDGEE_API_KEY, - defaultHeaders: { - "x-edgee-tags": "production,user-123,summarization", - }, -}); - -// All requests will include these tags -const completion = await openai.chat.completions.create({ - model: "gpt-4o", - messages: [ - { role: "user", content: "What is the capital of France?" } - ], -}); -``` - -```python title="Python" -from openai import OpenAI -from os import getenv - -# Tags applied to all requests via default headers -client = OpenAI( - base_url="https://api.edgee.ai/v1", - api_key=getenv("EDGEE_API_KEY"), - default_headers={ - "x-edgee-tags": "production,user-123,summarization", - }, -) - -# Or per-request using extra_headers -completion = client.chat.completions.create( - model="gpt-4o", - messages=[ - {"role": "user", "content": "What is the capital of France?"}, - ], - extra_headers={ - "x-edgee-tags": "one-off-tag,experiment", - }, -) -``` - - - - - Tags are comma-separated strings in the header. They help you categorize and filter requests in Edgee's analytics dashboard. - - ### Streaming Responses Edgee supports streaming responses for real-time token delivery: