diff --git a/integrations/anthropic-sdk.mdx b/integrations/anthropic-sdk.mdx
index 8c719c3..b4c393b 100644
--- a/integrations/anthropic-sdk.mdx
+++ b/integrations/anthropic-sdk.mdx
@@ -44,11 +44,11 @@ The Anthropic SDK provides official Python and TypeScript clients for interactin
]
)
- print(message.content[0].text)
+ print(message.content)
- # Access token usage and cost metrics
- print(f"Tokens saved: {message.usage.input_tokens_original - message.usage.input_tokens}")
- print(f"Total tokens: {message.usage.input_tokens + message.usage.output_tokens}")
+ # Access token usage
+ print(f"Input tokens: {message.usage.input_tokens}")
+ print(f"Output tokens: {message.usage.output_tokens}")
```
@@ -71,11 +71,11 @@ The Anthropic SDK provides official Python and TypeScript clients for interactin
]
});
- console.log(message.content[0].text);
+ console.log(message.content);
- // Access token usage and cost metrics
- console.log(`Tokens saved: ${message.usage.input_tokens_original - message.usage.input_tokens}`);
- console.log(`Total tokens: ${message.usage.input_tokens + message.usage.output_tokens}`);
+ // Access token usage
+ console.log(`Input tokens: ${message.usage.input_tokens}`);
+ console.log(`Output tokens: ${message.usage.output_tokens}`);
```
@@ -136,9 +136,9 @@ Stream responses for real-time token delivery:
-## Cost Tracking & Compression
+## Token Usage Tracking
-Every Edgee response includes token compression metrics in a dedicated `compression` field:
+Access standard Anthropic token usage metrics in every response:
@@ -146,7 +146,7 @@ Every Edgee response includes token compression metrics in a dedicated `compress
from anthropic import Anthropic
client = Anthropic(
- base_url="https://api.edgee.ai/v1",
+ base_url="https://api.edgee.ai",
api_key=os.environ.get("EDGEE_API_KEY"),
)
@@ -156,15 +156,9 @@ Every Edgee response includes token compression metrics in a dedicated `compress
messages=[{"role": "user", "content": "Analyze this long document..."}]
)
- print(message.content[0].text)
-
- # Compression metrics (if compression was applied)
- if hasattr(message, 'compression') and message.compression:
- compression = message.compression
- print(f"Original input tokens: {compression.input_tokens}")
- print(f"Compressed input tokens: {message.usage.input_tokens}")
- print(f"Tokens saved: {compression.saved_tokens}")
- print(f"Compression rate: {compression.rate * 100:.1f}%")
+ print(message.content)
+ print(f"Input tokens: {message.usage.input_tokens}")
+ print(f"Output tokens: {message.usage.output_tokens}")
```
@@ -173,7 +167,7 @@ Every Edgee response includes token compression metrics in a dedicated `compress
import Anthropic from '@anthropic-ai/sdk';
const client = new Anthropic({
- baseURL: 'https://api.edgee.ai/v1',
+ baseURL: 'https://api.edgee.ai',
apiKey: process.env.EDGEE_API_KEY,
});
@@ -183,24 +177,119 @@ Every Edgee response includes token compression metrics in a dedicated `compress
messages: [{ role: 'user', content: 'Analyze this long document...' }]
});
- console.log(message.content[0].text);
-
- // Compression metrics (if compression was applied)
- if (message.compression) {
- const compression = message.compression;
- console.log(`Original input tokens: ${compression.input_tokens}`);
- console.log(`Compressed input tokens: ${message.usage.input_tokens}`);
- console.log(`Tokens saved: ${compression.saved_tokens}`);
- console.log(`Compression rate: ${(compression.rate * 100).toFixed(1)}%`);
- }
+ console.log(message.content);
+ console.log(`Input tokens: ${message.usage.input_tokens}`);
+ console.log(`Output tokens: ${message.usage.output_tokens}`);
```
- Edgee extends the Anthropic API response with a `compression` field containing compression metrics (`input_tokens`, `saved_tokens`, `rate`). All standard Anthropic fields remain unchanged.
+ When compression is enabled, `input_tokens` reflects the compressed token count. View detailed compression metrics in the [Edgee dashboard](/features/observability).
+## Compression & Tags via Headers
+
+When using the Anthropic SDK with Edgee, you can control token compression and add tags using HTTP headers:
+
+### Enabling Compression
+
+
+
+ ```python
+ from anthropic import Anthropic
+
+ client = Anthropic(
+ base_url="https://api.edgee.ai",
+ api_key=os.environ.get("EDGEE_API_KEY"),
+ default_headers={
+ "x-edgee-enable-compression": "true",
+ "x-edgee-compression-rate": "0.8", # Target 80% compression (0.0-1.0)
+ }
+ )
+
+ # All requests will use compression with 80% target rate
+ message = client.messages.create(
+ model="claude-sonnet-4.5",
+ max_tokens=1024,
+ messages=[{"role": "user", "content": "Analyze this document..."}]
+ )
+ ```
+
+
+
+ ```typescript
+ import Anthropic from '@anthropic-ai/sdk';
+
+ const client = new Anthropic({
+ baseURL: 'https://api.edgee.ai',
+ apiKey: process.env.EDGEE_API_KEY,
+ defaultHeaders: {
+ 'x-edgee-enable-compression': 'true',
+ 'x-edgee-compression-rate': '0.8', // Target 80% compression (0.0-1.0)
+ }
+ });
+
+ // All requests will use compression
+ const message = await client.messages.create({
+ model: 'claude-sonnet-4.5',
+ max_tokens: 1024,
+ messages: [{ role: 'user', content: 'Analyze this document...' }]
+ });
+ ```
+
+
+
+### Adding Tags for Analytics
+
+Combine compression with tags to track requests in your dashboard:
+
+
+
+ ```python
+ from anthropic import Anthropic
+
+ client = Anthropic(
+ base_url="https://api.edgee.ai",
+ api_key=os.environ.get("EDGEE_API_KEY"),
+ default_headers={
+ "x-edgee-enable-compression": "true",
+ "x-edgee-compression-rate": "0.8",
+ "x-edgee-tags": "production,anthropic-sdk,user-123"
+ }
+ )
+ ```
+
+
+
+ ```typescript
+ import Anthropic from '@anthropic-ai/sdk';
+
+ const client = new Anthropic({
+ baseURL: 'https://api.edgee.ai',
+ apiKey: process.env.EDGEE_API_KEY,
+ defaultHeaders: {
+ 'x-edgee-enable-compression': 'true',
+ 'x-edgee-compression-rate': '0.8',
+ 'x-edgee-tags': 'production,anthropic-sdk,user-123'
+ }
+ });
+ ```
+
+
+
+**Available Headers:**
+
+| Header | Type | Description |
+|--------|------|-------------|
+| `x-edgee-enable-compression` | `"true"` or `"false"` | Enable token compression for requests (overrides console settings) |
+| `x-edgee-compression-rate` | `string` | Target compression rate (0.0-1.0, default 0.75) |
+| `x-edgee-tags` | `string` | Comma-separated tags for analytics and filtering |
+
+
+ You can also enable compression organization-wide or per API key in the [Edgee console](/features/token-compression#enabling-token-compression). Headers override console settings for specific requests.
+
+
## Multi-Provider Access
With Edgee, you can access models from multiple providers using the same Anthropic SDK client and compare costs across providers:
@@ -359,58 +448,6 @@ Use Claude's tool calling with Edgee:
-## Tags for Observability
-
-Add custom tags to track and filter requests in Edgee's dashboard:
-
-
-
- ```python
- from anthropic import Anthropic
-
- client = Anthropic(
- base_url="https://api.edgee.ai",
- api_key=os.environ.get("EDGEE_API_KEY"),
- default_headers={
- "x-edgee-tags": "production,anthropic-sdk,user-123"
- }
- )
-
- # All requests from this client will include these tags
- message = client.messages.create(
- model="claude-sonnet-4.5",
- max_tokens=1024,
- messages=[{"role": "user", "content": "Hello!"}]
- )
- ```
-
-
-
- ```typescript
- import Anthropic from '@anthropic-ai/sdk';
-
- const client = new Anthropic({
- baseURL: 'https://api.edgee.ai',
- apiKey: process.env.EDGEE_API_KEY,
- defaultHeaders: {
- 'x-edgee-tags': 'production,anthropic-sdk,user-123'
- }
- });
-
- // All requests from this client will include these tags
- const message = await client.messages.create({
- model: 'claude-sonnet-4.5',
- max_tokens: 1024,
- messages: [{ role: 'user', content: 'Hello!' }]
- });
- ```
-
-
-
-
- Tags are comma-separated strings that help you categorize and filter requests in Edgee's analytics dashboard.
-
-
## Error Handling and Retries
The Anthropic SDK includes built-in retry logic, which works seamlessly with Edgee's automatic failover:
@@ -432,7 +469,7 @@ The Anthropic SDK includes built-in retry logic, which works seamlessly with Edg
max_tokens=1024,
messages=[{"role": "user", "content": "Hello!"}]
)
- print(message.content[0].text)
+ print(message.content)
except APIError as e:
print(f"API Error: {e}")
```
@@ -454,7 +491,7 @@ The Anthropic SDK includes built-in retry logic, which works seamlessly with Edg
max_tokens: 1024,
messages: [{ role: 'user', content: 'Hello!' }]
});
- console.log(message.content[0].text);
+ console.log(message.content);
} catch (error) {
console.error('API Error:', error);
}
diff --git a/integrations/claude-code.mdx b/integrations/claude-code.mdx
index 1601d02..746e3c2 100644
--- a/integrations/claude-code.mdx
+++ b/integrations/claude-code.mdx
@@ -69,30 +69,44 @@ claude --model gpt-4o "Refactor this function"
## Advanced Configuration
-### Custom Headers
+### Compression & Tags via Headers
-You can add custom headers for analytics and filtering by setting additional environment variables:
+Control token compression and add tags using HTTP headers:
+
+**Using Environment Variables:**
```bash
-export ANTHROPIC_HEADERS='{"x-edgee-tags": "development,claude-code,team-backend"}'
+export ANTHROPIC_HEADERS='{
+ "x-edgee-enable-compression": "true",
+ "x-edgee-compression-rate": "0.8",
+ "x-edgee-tags": "development,claude-code,team-backend"
+}'
```
-### Tags for Observability
-
-Tags help you categorize and filter requests in Edgee's analytics dashboard:
+**Using Settings File (`~/.claude/settings.json`):**
```json
{
"apiEndpoint": "https://api.edgee.ai",
"apiKey": "sk-edgee-...",
"customHeaders": {
+ "x-edgee-enable-compression": "true",
+ "x-edgee-compression-rate": "0.8",
"x-edgee-tags": "production,claude-code,user-123"
}
}
```
+**Available Headers:**
+
+| Header | Type | Description |
+|--------|------|-------------|
+| `x-edgee-enable-compression` | `"true"` or `"false"` | Enable token compression (overrides console settings) |
+| `x-edgee-compression-rate` | `string` | Target compression rate (0.0-1.0, default 0.75) |
+| `x-edgee-tags` | `string` | Comma-separated tags for analytics and filtering |
+
- Tags are comma-separated strings that appear in your Edgee observability dashboard, making it easy to track Claude Code usage separately from other applications.
+ You can also enable compression organization-wide or per API key in the [Edgee console](/features/token-compression#enabling-token-compression). Headers override console settings.
## Streaming Responses
diff --git a/integrations/langchain.mdx b/integrations/langchain.mdx
index 5931fde..dc7ae70 100644
--- a/integrations/langchain.mdx
+++ b/integrations/langchain.mdx
@@ -114,6 +114,38 @@ uv run langchain_script.py \
--system "You are a creative poet"
```
+## Compression & Tags via Headers
+
+Control token compression and add tags for observability using the `default_headers` parameter:
+
+```python
+from langchain_openai import ChatOpenAI
+import os
+
+llm = ChatOpenAI(
+ base_url="https://api.edgee.ai/v1",
+ api_key=os.getenv("API_KEY"),
+ model="gpt-4o",
+ default_headers={
+ "x-edgee-enable-compression": "true",
+ "x-edgee-compression-rate": "0.8", # Target 80% compression
+ "x-edgee-tags": "production,langchain,rag-pipeline",
+ }
+)
+```
+
+**Available Headers:**
+
+| Header | Type | Description |
+|--------|------|-------------|
+| `x-edgee-enable-compression` | `"true"` or `"false"` | Enable token compression (overrides console settings) |
+| `x-edgee-compression-rate` | `string` | Target compression rate (0.0-1.0, default 0.75) |
+| `x-edgee-tags` | `string` | Comma-separated tags for analytics and filtering |
+
+
+ You can also enable compression organization-wide or per API key in the [Edgee console](/features/token-compression#enabling-token-compression). Headers override console settings.
+
+
## Advanced Features
### Chains
diff --git a/integrations/openai-sdk.mdx b/integrations/openai-sdk.mdx
index 9657d4c..80eebf3 100644
--- a/integrations/openai-sdk.mdx
+++ b/integrations/openai-sdk.mdx
@@ -9,13 +9,12 @@ Edgee provides an **OpenAI-compatible API**, which means you can use the officia
## Why Use OpenAI SDK with Edgee?
-- **Up to 50% Cost Reduction**: Automatic token compression on every request
-- **Real-Time Savings**: See exactly how many tokens and dollars you've saved
+- **Up to 50% Cost Reduction**: Automatic token compression when enabled via headers or console
- **No Code Changes**: Use your existing OpenAI SDK code as-is
- **Multi-Provider Access**: Route to OpenAI, Anthropic, Google, and more through one API
- **Automatic Failover**: Built-in reliability with fallback providers
-- **Cost Tracking**: Real-time visibility into token usage and costs
-- **Observability**: Request tracing and logging across all providers
+- **Cost Tracking**: Real-time visibility into token usage in the Edgee dashboard
+- **Observability**: Request tracing, compression metrics, and logging across all providers
## Installation
@@ -87,9 +86,9 @@ print(completion.choices[0].message.content)
-## Cost Tracking & Compression
+## Token Usage Tracking
-Every response includes token compression and cost metrics through the standard OpenAI `usage` field:
+Access standard OpenAI token usage metrics in every response:
@@ -109,13 +108,9 @@ const completion = await openai.chat.completions.create({
});
console.log(completion.choices[0].message.content);
-
-// Access compression metrics (if compression was applied)
-if (completion.compression) {
- console.log(`Tokens saved: ${completion.compression.saved_tokens}`);
- console.log(`Compression rate: ${(completion.compression.rate * 100).toFixed(1)}%`);
-}
-console.log(`Total tokens: ${completion.usage.total_tokens}`);
+console.log(`Prompt tokens: ${completion.usage?.prompt_tokens}`);
+console.log(`Completion tokens: ${completion.usage?.completion_tokens}`);
+console.log(`Total tokens: ${completion.usage?.total_tokens}`);
```
```python title="Python"
@@ -135,20 +130,119 @@ completion = client.chat.completions.create(
)
print(completion.choices[0].message.content)
-
-# Access compression metrics (if compression was applied)
-if hasattr(completion, 'compression') and completion.compression:
- print(f"Tokens saved: {completion.compression.saved_tokens}")
- print(f"Compression rate: {completion.compression.rate * 100:.1f}%")
+print(f"Prompt tokens: {completion.usage.prompt_tokens}")
+print(f"Completion tokens: {completion.usage.completion_tokens}")
print(f"Total tokens: {completion.usage.total_tokens}")
```
- Edgee extends the OpenAI API response with a `compression` field containing compression metrics (`input_tokens`, `saved_tokens`, `rate`). All standard OpenAI fields remain unchanged.
+ When compression is enabled, `prompt_tokens` reflects the compressed token count. View detailed compression metrics in the [Edgee dashboard](/features/observability).
+## Compression & Tags via Headers
+
+When using the OpenAI SDK with Edgee, you can control token compression and add tags using HTTP headers:
+
+### Enabling Compression
+
+
+
+```typescript title="TypeScript"
+import OpenAI from "openai";
+
+const openai = new OpenAI({
+ baseURL: "https://api.edgee.ai/v1",
+ apiKey: process.env.EDGEE_API_KEY,
+ defaultHeaders: {
+ "x-edgee-enable-compression": "true",
+ "x-edgee-compression-rate": "0.8", // Target 80% compression (0.0-1.0)
+ },
+});
+
+// All requests will use compression with 80% target rate
+const completion = await openai.chat.completions.create({
+ model: "gpt-4o",
+ messages: [
+ { role: "user", content: "Analyze this long document..." }
+ ],
+});
+```
+
+```python title="Python"
+from openai import OpenAI
+from os import getenv
+
+client = OpenAI(
+ base_url="https://api.edgee.ai/v1",
+ api_key=getenv("EDGEE_API_KEY"),
+ default_headers={
+ "x-edgee-enable-compression": "true",
+ "x-edgee-compression-rate": "0.8", # Target 80% compression (0.0-1.0)
+ },
+)
+
+# All requests will use compression
+completion = client.chat.completions.create(
+ model="gpt-4o",
+ messages=[
+ {"role": "user", "content": "Analyze this long document..."}
+ ],
+)
+```
+
+
+
+### Adding Tags for Analytics
+
+Combine compression with tags to track requests in your dashboard:
+
+
+
+```typescript title="TypeScript"
+import OpenAI from "openai";
+
+const openai = new OpenAI({
+ baseURL: "https://api.edgee.ai/v1",
+ apiKey: process.env.EDGEE_API_KEY,
+ defaultHeaders: {
+ "x-edgee-enable-compression": "true",
+ "x-edgee-compression-rate": "0.8",
+ "x-edgee-tags": "production,openai-sdk,user-123",
+ },
+});
+```
+
+```python title="Python"
+from openai import OpenAI
+from os import getenv
+
+client = OpenAI(
+ base_url="https://api.edgee.ai/v1",
+ api_key=getenv("EDGEE_API_KEY"),
+ default_headers={
+ "x-edgee-enable-compression": "true",
+ "x-edgee-compression-rate": "0.8",
+ "x-edgee-tags": "production,openai-sdk,user-123",
+ },
+)
+```
+
+
+
+**Available Headers:**
+
+| Header | Type | Description |
+|--------|------|-------------|
+| `x-edgee-enable-compression` | `"true"` or `"false"` | Enable token compression for requests (overrides console settings) |
+| `x-edgee-compression-rate` | `string` | Target compression rate (0.0-1.0, default 0.75) |
+| `x-edgee-tags` | `string` | Comma-separated tags for analytics and filtering |
+
+
+ You can also enable compression organization-wide or per API key in the [Edgee console](/features/token-compression#enabling-token-compression). Headers override console settings for specific requests.
+
+
## Advanced Usage
### Function Calling (Tools)
@@ -250,63 +344,6 @@ else:
-### Tags
-
-You can add tags to your requests for analytics and filtering using the `x-edgee-tags` header:
-
-
-
-```typescript title="TypeScript"
-import OpenAI from "openai";
-
-const openai = new OpenAI({
- baseURL: "https://api.edgee.ai/v1",
- apiKey: process.env.EDGEE_API_KEY,
- defaultHeaders: {
- "x-edgee-tags": "production,user-123,summarization",
- },
-});
-
-// All requests will include these tags
-const completion = await openai.chat.completions.create({
- model: "gpt-4o",
- messages: [
- { role: "user", content: "What is the capital of France?" }
- ],
-});
-```
-
-```python title="Python"
-from openai import OpenAI
-from os import getenv
-
-# Tags applied to all requests via default headers
-client = OpenAI(
- base_url="https://api.edgee.ai/v1",
- api_key=getenv("EDGEE_API_KEY"),
- default_headers={
- "x-edgee-tags": "production,user-123,summarization",
- },
-)
-
-# Or per-request using extra_headers
-completion = client.chat.completions.create(
- model="gpt-4o",
- messages=[
- {"role": "user", "content": "What is the capital of France?"},
- ],
- extra_headers={
- "x-edgee-tags": "one-off-tag,experiment",
- },
-)
-```
-
-
-
-
- Tags are comma-separated strings in the header. They help you categorize and filter requests in Edgee's analytics dashboard.
-
-
### Streaming Responses
Edgee supports streaming responses for real-time token delivery: