edgee-ai · CLEMENTINATOR · Feb 4, 2026 · Feb 3, 2026 · Feb 4, 2026 · Feb 4, 2026
@@ -46,6 +46,17 @@ println!("{:?}", response.finish_reason());         // Finish reason
 if let Some(tool_calls) = response.tool_calls() {    // Tool calls (if any)
     println!("{:?}", tool_calls);
 }
+
+// Access usage and compression info
+if let Some(usage) = &response.usage {
+    println!("Tokens used: {}", usage.total_tokens);
+}
+
+if let Some(compression) = &response.compression {
+    println!("Input tokens: {}", compression.input_tokens);
+    println!("Saved tokens: {}", compression.saved_tokens);
+    println!("Compression rate: {:.2}", compression.rate);
+}
 ```
 
 ## Stream Method
@@ -80,6 +91,7 @@ while let Some(result) = stream.next().await {
 - ✅ **OpenAI-compatible** - Works with any model supported by Edgee
 - ✅ **Streaming** - First-class support with `Stream` trait
 - ✅ **Tool calling** - Full support for function calling
+- ✅ **Compression info** - Access token compression metrics in responses
 - ✅ **Zero-cost abstractions** - Efficient implementation with minimal overhead
 
 ## Documentation

@@ -0,0 +1,132 @@
+//! Example: Token compression with Edgee Gateway SDK
+//!
+//! This example demonstrates how to:
+//! 1. Enable compression for a request with a large input context using the builder pattern
+//! 2. Set a custom compression rate
+//! 3. Access compression metrics from the response
+//!
+//! IMPORTANT: Only USER messages are compressed. System messages are not compressed.
+//! This example includes a large context in the user message to demonstrate meaningful
+//! compression savings.
+
+use edgee::{Edgee, InputObject, Message};
+
+// Large context document to demonstrate input compression
+const LARGE_CONTEXT: &str = r#"
+The History and Impact of Artificial Intelligence
+
+Artificial intelligence (AI) has evolved from a theoretical concept to a 
+transformative technology that influences nearly every aspect of modern life. 
+The field began in earnest in the 1950s when pioneers like Alan Turing and 
+John McCarthy laid the groundwork for machine intelligence.
+
+Early developments focused on symbolic reasoning and expert systems. These 
+rule-based approaches dominated the field through the 1970s and 1980s, with 
+systems like MYCIN demonstrating practical applications in medical diagnosis. 
+However, these early systems were limited by their inability to learn from data 
+and adapt to new situations.
+
+The resurgence of neural networks in the 1980s and 1990s, particularly with 
+backpropagation algorithms, opened new possibilities. Yet it wasn't until the 
+2010s, with the advent of deep learning and the availability of massive datasets 
+and computational power, that AI truly began to revolutionize industries.
+
+Modern AI applications span numerous domains:
+- Natural language processing enables machines to understand and generate human language
+- Computer vision allows machines to interpret visual information from the world
+- Robotics combines AI with mechanical systems for autonomous operation
+- Healthcare uses AI for diagnosis, drug discovery, and personalized treatment
+- Finance leverages AI for fraud detection, algorithmic trading, and risk assessment
+- Transportation is being transformed by autonomous vehicles and traffic optimization
+
+The development of large language models like GPT, BERT, and others has 
+particularly accelerated progress in natural language understanding and generation. 
+These models, trained on vast amounts of text data, can perform a wide range of 
+language tasks with remarkable proficiency.
+
+Despite remarkable progress, significant challenges remain. Issues of bias, 
+interpretability, safety, and ethical considerations continue to be areas of 
+active research and debate. The AI community is working to ensure that these 
+powerful technologies are developed and deployed responsibly, with consideration 
+for their societal impact.
+
+Looking forward, AI is expected to continue advancing rapidly, with potential 
+breakthroughs in areas like artificial general intelligence, quantum machine 
+learning, and brain-computer interfaces. The integration of AI into daily life 
+will likely deepen, raising important questions about human-AI collaboration, 
+workforce transformation, and the future of human cognition itself.
+"#;
+
+#[tokio::main]
+async fn main() -> Result<(), Box<dyn std::error::Error>> {
+    // Create client from environment variables (EDGEE_API_KEY)
+    let client = Edgee::from_env()?;
+
+    println!("{}", "=".repeat(70));
+    println!("Edgee Token Compression Example");
+    println!("{}", "=".repeat(70));
+    println!();
+
+    // Example: Request with compression enabled and large input
+    println!("Example: Large user message with compression enabled");
+    println!("{}", "-".repeat(70));
+    println!("Input context length: {} characters", LARGE_CONTEXT.len());
+    println!();
+
+    // NOTE: Only USER messages are compressed
+    // Put the large context in the user message to demonstrate compression
+    let user_message = format!(
+        "Here is some context about AI:\n\n{}\n\nBased on this context, summarize the key milestones in AI development in 3 bullet points.",
+        LARGE_CONTEXT
+    );
+
+    // Create input with compression settings using builder pattern
+    let input = InputObject::new(vec![Message::user(user_message)])
+        .with_compression(true)
+        .with_compression_rate(0.5);
+
+    let response = client.send("gpt-4o", input).await?;
+
+    println!("Response: {}", response.text().unwrap_or(""));
+    println!();
+
+    // Display usage information
+    if let Some(usage) = &response.usage {
+        println!("Token Usage:");
+        println!("  Prompt tokens:     {}", usage.prompt_tokens);
+        println!("  Completion tokens: {}", usage.completion_tokens);
+        println!("  Total tokens:      {}", usage.total_tokens);
+        println!();
+    }
+
+    // Display compression information
+    if let Some(compression) = &response.compression {
+        println!("Compression Metrics:");
+        println!("  Input tokens:  {}", compression.input_tokens);
+        println!("  Saved tokens:  {}", compression.saved_tokens);
+        println!("  Compression rate: {:.2}%", compression.rate * 100.0);
+
+        let savings_pct = if compression.input_tokens > 0 {
+            (compression.saved_tokens as f64 / compression.input_tokens as f64) * 100.0
+        } else {
+            0.0
+        };
+        println!("  Savings: {:.1}% of input tokens saved!", savings_pct);
+        println!();
+        println!("  💡 Without compression, this request would have used");
+        println!("     {} input tokens.", compression.input_tokens);
+        println!(
+            "     With compression, only {} tokens were processed!",
+            compression.input_tokens - compression.saved_tokens
+        );
+    } else {
+        println!("No compression data available in response.");
+        println!("Note: Compression data is only returned when compression is enabled");
+        println!("      and supported by your API key configuration.");
+    }
+
+    println!();
+    println!("{}", "=".repeat(70));
+
+    Ok(())
+}
@@ -47,6 +47,8 @@ struct ParsedInput {
     tools: Option<Vec<Tool>>,
     tool_choice: Option<serde_json::Value>,
     tags: Option<Vec<String>>,
+    enable_compression: Option<bool>,
+    compression_rate: Option<f64>,
 }
 
 /// Main client for interacting with the Edgee AI Gateway
@@ -117,6 +119,12 @@ impl Edgee {
         if let Some(tags) = parsed.tags {
             body["tags"] = json!(tags);
         }
+        if let Some(enable_compression) = parsed.enable_compression {
+            body["enable_compression"] = json!(enable_compression);
+        }
+        if let Some(compression_rate) = parsed.compression_rate {
+            body["compression_rate"] = json!(compression_rate);
+        }
 
         let response = self
             .client
@@ -190,6 +198,12 @@ impl Edgee {
         if let Some(tags) = parsed.tags {
             body["tags"] = json!(tags);
         }
+        if let Some(enable_compression) = parsed.enable_compression {
+            body["enable_compression"] = json!(enable_compression);
+        }
+        if let Some(compression_rate) = parsed.compression_rate {
+            body["compression_rate"] = json!(compression_rate);
+        }
 
         let response = self
             .client
@@ -270,12 +284,16 @@ impl Edgee {
                 tools: None,
                 tool_choice: None,
                 tags: None,
+                enable_compression: None,
+                compression_rate: None,
             },
             Input::Object(obj) => ParsedInput {
                 messages: obj.messages,
                 tools: obj.tools,
                 tool_choice: obj.tool_choice,
                 tags: obj.tags,
+                enable_compression: obj.enable_compression,
+                compression_rate: obj.compression_rate,
             },
         }
     }

@@ -193,6 +193,14 @@ pub struct InputObject {
     pub tool_choice: Option<serde_json::Value>,
     #[serde(skip_serializing_if = "Option::is_none")]
     pub tags: Option<Vec<String>>,
+    /// Enable token compression for this request (overrides API key settings if present)
+    /// This is a gateway-internal field and is never sent to providers.
+    #[serde(default, skip_serializing)]
+    pub enable_compression: Option<bool>,
+    /// Compression rate for this request (0.0-1.0, overrides API key settings if present)
+    /// This is a gateway-internal field and is never sent to providers.
+    #[serde(default, skip_serializing)]
+    pub compression_rate: Option<f64>,
 }
 
 impl InputObject {
@@ -203,6 +211,8 @@ impl InputObject {
             tools: None,
             tool_choice: None,
             tags: None,
+            enable_compression: None,
+            compression_rate: None,
         }
     }
 
@@ -223,6 +233,18 @@ impl InputObject {
         self.tags = Some(tags);
         self
     }
+
+    /// Enable or disable token compression for this request
+    pub fn with_compression(mut self, enable: bool) -> Self {
+        self.enable_compression = Some(enable);
+        self
+    }
+
+    /// Set compression rate for this request (0.0-1.0)
+    pub fn with_compression_rate(mut self, rate: f64) -> Self {
+        self.compression_rate = Some(rate);
+        self
+    }
 }
 
 /// Token usage information
@@ -233,6 +255,14 @@ pub struct Usage {
     pub total_tokens: u32,
 }
 
+/// Compression information
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Compression {
+    pub input_tokens: u32,
+    pub saved_tokens: u32,
+    pub rate: f64,
+}
+
 /// Choice in a non-streaming response
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct Choice {
@@ -251,6 +281,8 @@ pub struct SendResponse {
     pub choices: Vec<Choice>,
     #[serde(skip_serializing_if = "Option::is_none")]
     pub usage: Option<Usage>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub compression: Option<Compression>,
 }
 
 impl SendResponse {
@@ -331,3 +363,68 @@ impl StreamChunk {
             .and_then(|c| c.finish_reason.as_deref())
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_send_response_with_compression() {
+        let json = r#"{
+            "id": "test-id",
+            "object": "chat.completion",
+            "created": 1234567890,
+            "model": "gpt-4",
+            "choices": [{
+                "index": 0,
+                "message": {"role": "assistant", "content": "Response"},
+                "finish_reason": "stop"
+            }],
+            "usage": {
+                "prompt_tokens": 100,
+                "completion_tokens": 50,
+                "total_tokens": 150
+            },
+            "compression": {
+                "input_tokens": 100,
+                "saved_tokens": 42,
+                "rate": 0.6102003642987249
+            }
+        }"#;
+
+        let response: SendResponse = serde_json::from_str(json).unwrap();
+        assert!(response.compression.is_some());
+        let compression = response.compression.unwrap();
+        assert_eq!(compression.input_tokens, 100);
+        assert_eq!(compression.saved_tokens, 42);
+        assert_eq!(compression.rate, 0.6102003642987249);
+    }
+
+    #[test]
+    fn test_send_response_without_compression() {
+        let json = r#"{
+            "id": "test-id",
+            "object": "chat.completion",
+            "created": 1234567890,
+            "model": "gpt-4",
+            "choices": [{
+                "index": 0,
+                "message": {"role": "assistant", "content": "Response"},
+                "finish_reason": "stop"
+            }]
+        }"#;
+
+        let response: SendResponse = serde_json::from_str(json).unwrap();
+        assert!(response.compression.is_none());
+    }
+
+    #[test]
+    fn test_input_object_with_compression_builder() {
+        let input = InputObject::new(vec![Message::user("Hello")])
+            .with_compression(true)
+            .with_compression_rate(0.5);
+
+        assert_eq!(input.enable_compression, Some(true));
+        assert_eq!(input.compression_rate, Some(0.5));
+    }
+}