From f15dfe12438f52b211cc80a9e561fb3603635e46 Mon Sep 17 00:00:00 2001
From: Clement Bouvet <clement@cbouvet.fr>
Date: Tue, 3 Feb 2026 17:40:38 +0100
Subject: [PATCH 1/8] feat: add compression fields to Rust SDK
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add optional enable_compression and compression_rate fields to support
token compression configuration in the AI gateway.

- Add enable_compression (bool) and compression_rate (f64) to InputObject
- Add builder methods with_enable_compression() and with_compression_rate()
- Include fields in both send() and stream() request bodies
- Fields are gateway-internal and not serialized to providers

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 src/client.rs | 18 ++++++++++++++++++
 src/models.rs | 22 ++++++++++++++++++++++
 2 files changed, 40 insertions(+)
diff --git a/src/client.rs b/src/client.rs
index 5d04b4e8..4a88c6a0 100644
--- a/src/client.rs
+++ b/src/client.rs
@@ -47,6 +47,8 @@ struct ParsedInput {
     tools: Option<Vec<Tool>>,
     tool_choice: Option<serde_json::Value>,
     tags: Option<Vec<String>>,
+    enable_compression: Option<bool>,
+    compression_rate: Option<f64>,
 }
 
 /// Main client for interacting with the Edgee AI Gateway
@@ -117,6 +119,12 @@ impl Edgee {
         if let Some(tags) = parsed.tags {
             body["tags"] = json!(tags);
         }
+        if let Some(enable_compression) = parsed.enable_compression {
+            body["enable_compression"] = json!(enable_compression);
+        }
+        if let Some(compression_rate) = parsed.compression_rate {
+            body["compression_rate"] = json!(compression_rate);
+        }
 
         let response = self
             .client
@@ -190,6 +198,12 @@ impl Edgee {
         if let Some(tags) = parsed.tags {
             body["tags"] = json!(tags);
         }
+        if let Some(enable_compression) = parsed.enable_compression {
+            body["enable_compression"] = json!(enable_compression);
+        }
+        if let Some(compression_rate) = parsed.compression_rate {
+            body["compression_rate"] = json!(compression_rate);
+        }
 
         let response = self
             .client
@@ -270,12 +284,16 @@ impl Edgee {
                 tools: None,
                 tool_choice: None,
                 tags: None,
+                enable_compression: None,
+                compression_rate: None,
             },
             Input::Object(obj) => ParsedInput {
                 messages: obj.messages,
                 tools: obj.tools,
                 tool_choice: obj.tool_choice,
                 tags: obj.tags,
+                enable_compression: obj.enable_compression,
+                compression_rate: obj.compression_rate,
             },
         }
     }
diff --git a/src/models.rs b/src/models.rs
index b5f78451..7731c3ce 100644
--- a/src/models.rs
+++ b/src/models.rs
@@ -193,6 +193,14 @@ pub struct InputObject {
     pub tool_choice: Option<serde_json::Value>,
     #[serde(skip_serializing_if = "Option::is_none")]
     pub tags: Option<Vec<String>>,
+    /// Enable token compression for this request (overrides API key settings if present)
+    /// This is a gateway-internal field and is never sent to providers.
+    #[serde(default, skip_serializing)]
+    pub enable_compression: Option<bool>,
+    /// Compression rate for this request (0.0-1.0, overrides API key settings if present)
+    /// This is a gateway-internal field and is never sent to providers.
+    #[serde(default, skip_serializing)]
+    pub compression_rate: Option<f64>,
 }
 
 impl InputObject {
@@ -203,6 +211,8 @@ impl InputObject {
             tools: None,
             tool_choice: None,
             tags: None,
+            enable_compression: None,
+            compression_rate: None,
         }
     }
 
@@ -223,6 +233,18 @@ impl InputObject {
         self.tags = Some(tags);
         self
     }
+
+    /// Enable or disable token compression for this request
+    pub fn with_enable_compression(mut self, enable: bool) -> Self {
+        self.enable_compression = Some(enable);
+        self
+    }
+
+    /// Set compression rate for this request (0.0-1.0)
+    pub fn with_compression_rate(mut self, rate: f64) -> Self {
+        self.compression_rate = Some(rate);
+        self
+    }
 }
 
 /// Token usage information

From bee672f9b32b6b51b7876976a190cfb23f0990e7 Mon Sep 17 00:00:00 2001
From: Clement Bouvet <clement@cbouvet.fr>
Date: Wed, 4 Feb 2026 15:48:22 +0100
Subject: [PATCH 2/8] feat: add compression response field and rename builder
 method

- Rename with_enable_compression to with_compression for better API consistency
- Add Compression struct with input_tokens, saved_tokens, and rate fields
- Add optional compression field to SendResponse for root-level compression data
---
 src/models.rs | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/models.rs b/src/models.rs
index 7731c3ce..9f2d78d8 100644
--- a/src/models.rs
+++ b/src/models.rs
@@ -235,7 +235,7 @@ impl InputObject {
     }
 
     /// Enable or disable token compression for this request
-    pub fn with_enable_compression(mut self, enable: bool) -> Self {
+    pub fn with_compression(mut self, enable: bool) -> Self {
         self.enable_compression = Some(enable);
         self
     }
@@ -255,6 +255,14 @@ pub struct Usage {
     pub total_tokens: u32,
 }
 
+/// Compression information
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Compression {
+    pub input_tokens: u32,
+    pub saved_tokens: u32,
+    pub rate: f64,
+}
+
 /// Choice in a non-streaming response
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct Choice {
@@ -273,6 +281,8 @@ pub struct SendResponse {
     pub choices: Vec<Choice>,
     #[serde(skip_serializing_if = "Option::is_none")]
     pub usage: Option<Usage>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub compression: Option<Compression>,
 }
 
 impl SendResponse {

From 1be772beb99b485687a918d778ce4003c9cb9bb9 Mon Sep 17 00:00:00 2001
From: Clement Bouvet <clement@cbouvet.fr>
Date: Wed, 4 Feb 2026 16:04:47 +0100
Subject: [PATCH 3/8] test: add compression response field tests

- Add test for response with compression field deserialization
- Add test for response without compression field
- Add test for InputObject with_compression builder method
---
 src/models.rs | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 65 insertions(+)

diff --git a/src/models.rs b/src/models.rs
index 9f2d78d8..66c5d407 100644
--- a/src/models.rs
+++ b/src/models.rs
@@ -363,3 +363,68 @@ impl StreamChunk {
             .and_then(|c| c.finish_reason.as_deref())
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_send_response_with_compression() {
+        let json = r#"{
+            "id": "test-id",
+            "object": "chat.completion",
+            "created": 1234567890,
+            "model": "gpt-4",
+            "choices": [{
+                "index": 0,
+                "message": {"role": "assistant", "content": "Response"},
+                "finish_reason": "stop"
+            }],
+            "usage": {
+                "prompt_tokens": 100,
+                "completion_tokens": 50,
+                "total_tokens": 150
+            },
+            "compression": {
+                "input_tokens": 100,
+                "saved_tokens": 42,
+                "rate": 0.6102003642987249
+            }
+        }"#;
+
+        let response: SendResponse = serde_json::from_str(json).unwrap();
+        assert!(response.compression.is_some());
+        let compression = response.compression.unwrap();
+        assert_eq!(compression.input_tokens, 100);
+        assert_eq!(compression.saved_tokens, 42);
+        assert_eq!(compression.rate, 0.6102003642987249);
+    }
+
+    #[test]
+    fn test_send_response_without_compression() {
+        let json = r#"{
+            "id": "test-id",
+            "object": "chat.completion",
+            "created": 1234567890,
+            "model": "gpt-4",
+            "choices": [{
+                "index": 0,
+                "message": {"role": "assistant", "content": "Response"},
+                "finish_reason": "stop"
+            }]
+        }"#;
+
+        let response: SendResponse = serde_json::from_str(json).unwrap();
+        assert!(response.compression.is_none());
+    }
+
+    #[test]
+    fn test_input_object_with_compression_builder() {
+        let input = InputObject::new(vec![Message::user("Hello")])
+            .with_compression(true)
+            .with_compression_rate(0.5);
+
+        assert_eq!(input.enable_compression, Some(true));
+        assert_eq!(input.compression_rate, Some(0.5));
+    }
+}

From 9352dec8e85d662ca307cd8814f81bec571c494c Mon Sep 17 00:00:00 2001
From: Clement Bouvet <clement@cbouvet.fr>
Date: Wed, 4 Feb 2026 16:16:00 +0100
Subject: [PATCH 4/8] docs: add compression field documentation to README

- Add example showing how to access compression data in responses
- Add compression info to features list
---
 README.md | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/README.md b/README.md
index 42fb4964..915de558 100644
--- a/README.md
+++ b/README.md
@@ -46,6 +46,17 @@ println!("{:?}", response.finish_reason());         // Finish reason
 if let Some(tool_calls) = response.tool_calls() {    // Tool calls (if any)
     println!("{:?}", tool_calls);
 }
+
+// Access usage and compression info
+if let Some(usage) = &response.usage {
+    println!("Tokens used: {}", usage.total_tokens);
+}
+
+if let Some(compression) = &response.compression {
+    println!("Input tokens: {}", compression.input_tokens);
+    println!("Saved tokens: {}", compression.saved_tokens);
+    println!("Compression rate: {:.2}", compression.rate);
+}
 ```
 
 ## Stream Method
@@ -80,6 +91,7 @@ while let Some(result) = stream.next().await {
 - ✅ **OpenAI-compatible** - Works with any model supported by Edgee
 - ✅ **Streaming** - First-class support with `Stream` trait
 - ✅ **Tool calling** - Full support for function calling
+- ✅ **Compression info** - Access token compression metrics in responses
 - ✅ **Zero-cost abstractions** - Efficient implementation with minimal overhead
 
 ## Documentation

From 21399dc72fc7618db2e97b3526c482ef8ebb9fba Mon Sep 17 00:00:00 2001
From: Clement Bouvet <clement@cbouvet.fr>
Date: Wed, 4 Feb 2026 16:23:32 +0100
Subject: [PATCH 5/8] docs: add compression example

- Add example showing how to enable compression using builder pattern
- Demonstrate accessing compression metrics from response
- Show usage information alongside compression data
---
 examples/compression.rs | 62 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)
 create mode 100644 examples/compression.rs

diff --git a/examples/compression.rs b/examples/compression.rs
new file mode 100644
index 00000000..4c2a40aa
--- /dev/null
+++ b/examples/compression.rs
@@ -0,0 +1,62 @@
+//! Example: Token compression with Edgee Gateway SDK
+//!
+//! This example demonstrates how to:
+//! 1. Enable compression for a request using the builder pattern
+//! 2. Set a custom compression rate
+//! 3. Access compression metrics from the response
+
+use edgee::{Edgee, InputObject, Message};
+
+#[tokio::main]
+async fn main() -> Result<(), Box<dyn std::error::Error>> {
+    // Create client from environment variables (EDGEE_API_KEY)
+    let client = Edgee::from_env()?;
+
+    println!("{}", "=".repeat(70));
+    println!("Edgee Token Compression Example");
+    println!("{}", "=".repeat(70));
+    println!();
+
+    // Example: Request with compression enabled
+    println!("Example: Request with compression enabled");
+    println!("{}", "-".repeat(70));
+
+    // Create input with compression settings using builder pattern
+    let input = InputObject::new(vec![Message::user(
+        "Explain quantum computing in simple terms.",
+    )])
+    .with_compression(true)
+    .with_compression_rate(0.5);
+
+    let response = client.send("gpt-4o", input).await?;
+
+    println!("Response: {}", response.text().unwrap_or(""));
+    println!();
+
+    // Display usage information
+    if let Some(usage) = &response.usage {
+        println!("Token Usage:");
+        println!("  Prompt tokens:     {}", usage.prompt_tokens);
+        println!("  Completion tokens: {}", usage.completion_tokens);
+        println!("  Total tokens:      {}", usage.total_tokens);
+        println!();
+    }
+
+    // Display compression information
+    if let Some(compression) = &response.compression {
+        println!("Compression Metrics:");
+        println!("  Input tokens:  {}", compression.input_tokens);
+        println!("  Saved tokens:  {}", compression.saved_tokens);
+        println!("  Compression rate: {:.2}%", compression.rate * 100.0);
+        println!("  Token savings: {} tokens saved!", compression.saved_tokens);
+    } else {
+        println!("No compression data available in response.");
+        println!("Note: Compression data is only returned when compression is enabled");
+        println!("      and supported by your API key configuration.");
+    }
+
+    println!();
+    println!("{}", "=".repeat(70));
+
+    Ok(())
+}

From a299f5eaaf2070a4b71c5351c3daa027884f8cdf Mon Sep 17 00:00:00 2001
From: Clement Bouvet <clement@cbouvet.fr>
Date: Wed, 4 Feb 2026 16:26:39 +0100
Subject: [PATCH 6/8] docs: update compression example with larger input
 context

- Add substantial AI history document as context (~3000+ chars)
- Demonstrate meaningful compression on large input
- Show percentage of tokens saved
- Explain that compression works on input tokens
---
 examples/compression.rs | 80 +++++++++++++++++++++++++++++++++++++----
 1 file changed, 73 insertions(+), 7 deletions(-)

diff --git a/examples/compression.rs b/examples/compression.rs
index 4c2a40aa..8d788e85 100644
--- a/examples/compression.rs
+++ b/examples/compression.rs
@@ -1,12 +1,61 @@
 //! Example: Token compression with Edgee Gateway SDK
 //!
 //! This example demonstrates how to:
-//! 1. Enable compression for a request using the builder pattern
+//! 1. Enable compression for a request with a large input context using the builder pattern
 //! 2. Set a custom compression rate
 //! 3. Access compression metrics from the response
+//!
+//! Note: Compression works on INPUT tokens, so this example includes a large
+//! context document to demonstrate meaningful compression savings.
 
 use edgee::{Edgee, InputObject, Message};
 
+// Large context document to demonstrate input compression
+const LARGE_CONTEXT: &str = r#"
+The History and Impact of Artificial Intelligence
+
+Artificial intelligence (AI) has evolved from a theoretical concept to a 
+transformative technology that influences nearly every aspect of modern life. 
+The field began in earnest in the 1950s when pioneers like Alan Turing and 
+John McCarthy laid the groundwork for machine intelligence.
+
+Early developments focused on symbolic reasoning and expert systems. These 
+rule-based approaches dominated the field through the 1970s and 1980s, with 
+systems like MYCIN demonstrating practical applications in medical diagnosis. 
+However, these early systems were limited by their inability to learn from data 
+and adapt to new situations.
+
+The resurgence of neural networks in the 1980s and 1990s, particularly with 
+backpropagation algorithms, opened new possibilities. Yet it wasn't until the 
+2010s, with the advent of deep learning and the availability of massive datasets 
+and computational power, that AI truly began to revolutionize industries.
+
+Modern AI applications span numerous domains:
+- Natural language processing enables machines to understand and generate human language
+- Computer vision allows machines to interpret visual information from the world
+- Robotics combines AI with mechanical systems for autonomous operation
+- Healthcare uses AI for diagnosis, drug discovery, and personalized treatment
+- Finance leverages AI for fraud detection, algorithmic trading, and risk assessment
+- Transportation is being transformed by autonomous vehicles and traffic optimization
+
+The development of large language models like GPT, BERT, and others has 
+particularly accelerated progress in natural language understanding and generation. 
+These models, trained on vast amounts of text data, can perform a wide range of 
+language tasks with remarkable proficiency.
+
+Despite remarkable progress, significant challenges remain. Issues of bias, 
+interpretability, safety, and ethical considerations continue to be areas of 
+active research and debate. The AI community is working to ensure that these 
+powerful technologies are developed and deployed responsibly, with consideration 
+for their societal impact.
+
+Looking forward, AI is expected to continue advancing rapidly, with potential 
+breakthroughs in areas like artificial general intelligence, quantum machine 
+learning, and brain-computer interfaces. The integration of AI into daily life 
+will likely deepen, raising important questions about human-AI collaboration, 
+workforce transformation, and the future of human cognition itself.
+"#;
+
 #[tokio::main]
 async fn main() -> Result<(), Box<dyn std::error::Error>> {
     // Create client from environment variables (EDGEE_API_KEY)
@@ -17,14 +66,17 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
     println!("{}", "=".repeat(70));
     println!();
 
-    // Example: Request with compression enabled
-    println!("Example: Request with compression enabled");
+    // Example: Request with compression enabled and large input
+    println!("Example: Large context with compression enabled");
     println!("{}", "-".repeat(70));
+    println!("Input context length: {} characters", LARGE_CONTEXT.len());
+    println!();
 
     // Create input with compression settings using builder pattern
-    let input = InputObject::new(vec![Message::user(
-        "Explain quantum computing in simple terms.",
-    )])
+    let input = InputObject::new(vec![
+        Message::system(LARGE_CONTEXT),
+        Message::user("Based on the context above, summarize the key milestones in AI development in 3 bullet points."),
+    ])
     .with_compression(true)
     .with_compression_rate(0.5);
 
@@ -48,7 +100,20 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
         println!("  Input tokens:  {}", compression.input_tokens);
         println!("  Saved tokens:  {}", compression.saved_tokens);
         println!("  Compression rate: {:.2}%", compression.rate * 100.0);
-        println!("  Token savings: {} tokens saved!", compression.saved_tokens);
+        
+        let savings_pct = if compression.input_tokens > 0 {
+            (compression.saved_tokens as f64 / compression.input_tokens as f64) * 100.0
+        } else {
+            0.0
+        };
+        println!("  Savings: {:.1}% of input tokens saved!", savings_pct);
+        println!();
+        println!("  💡 Without compression, this request would have used");
+        println!("     {} input tokens.", compression.input_tokens);
+        println!(
+            "     With compression, only {} tokens were processed!",
+            compression.input_tokens - compression.saved_tokens
+        );
     } else {
         println!("No compression data available in response.");
         println!("Note: Compression data is only returned when compression is enabled");
@@ -60,3 +125,4 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
 
     Ok(())
 }
+

From 9f257d7545c2073e9578b0161421294a5e0ac8d2 Mon Sep 17 00:00:00 2001
From: Clement Bouvet <clement@cbouvet.fr>
Date: Wed, 4 Feb 2026 16:35:32 +0100
Subject: [PATCH 7/8] fix: move large context to user message in compression
 example

- Only USER messages are compressed, not system messages
- Update example to put context in user message
- Add clarifying comment about compression behavior
---
 examples/compression.rs | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/examples/compression.rs b/examples/compression.rs
index 8d788e85..8e439d1d 100644
--- a/examples/compression.rs
+++ b/examples/compression.rs
@@ -5,8 +5,9 @@
 //! 2. Set a custom compression rate
 //! 3. Access compression metrics from the response
 //!
-//! Note: Compression works on INPUT tokens, so this example includes a large
-//! context document to demonstrate meaningful compression savings.
+//! IMPORTANT: Only USER messages are compressed. System messages are not compressed.
+//! This example includes a large context in the user message to demonstrate meaningful
+//! compression savings.
 
 use edgee::{Edgee, InputObject, Message};
 
@@ -67,18 +68,22 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
     println!();
 
     // Example: Request with compression enabled and large input
-    println!("Example: Large context with compression enabled");
+    println!("Example: Large user message with compression enabled");
     println!("{}", "-".repeat(70));
     println!("Input context length: {} characters", LARGE_CONTEXT.len());
     println!();
 
+    // NOTE: Only USER messages are compressed
+    // Put the large context in the user message to demonstrate compression
+    let user_message = format!(
+        "Here is some context about AI:\n\n{}\n\nBased on this context, summarize the key milestones in AI development in 3 bullet points.",
+        LARGE_CONTEXT
+    );
+
     // Create input with compression settings using builder pattern
-    let input = InputObject::new(vec![
-        Message::system(LARGE_CONTEXT),
-        Message::user("Based on the context above, summarize the key milestones in AI development in 3 bullet points."),
-    ])
-    .with_compression(true)
-    .with_compression_rate(0.5);
+    let input = InputObject::new(vec![Message::user(user_message)])
+        .with_compression(true)
+        .with_compression_rate(0.5);
 
     let response = client.send("gpt-4o", input).await?;
 

From 04e7784b4c44f6bf8e6323d3b3a51300dca214cc Mon Sep 17 00:00:00 2001
From: Clement Bouvet <clement@cbouvet.fr>
Date: Wed, 4 Feb 2026 16:39:07 +0100
Subject: [PATCH 8/8] fmt

---
 examples/compression.rs | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/examples/compression.rs b/examples/compression.rs
index 8e439d1d..da86c45a 100644
--- a/examples/compression.rs
+++ b/examples/compression.rs
@@ -105,7 +105,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
         println!("  Input tokens:  {}", compression.input_tokens);
         println!("  Saved tokens:  {}", compression.saved_tokens);
         println!("  Compression rate: {:.2}%", compression.rate * 100.0);
-        
+
         let savings_pct = if compression.input_tokens > 0 {
             (compression.saved_tokens as f64 / compression.input_tokens as f64) * 100.0
         } else {
@@ -130,4 +130,3 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
 
     Ok(())
 }
-