From f15dfe12438f52b211cc80a9e561fb3603635e46 Mon Sep 17 00:00:00 2001 From: Clement Bouvet Date: Tue, 3 Feb 2026 17:40:38 +0100 Subject: [PATCH 1/8] feat: add compression fields to Rust SDK MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add optional enable_compression and compression_rate fields to support token compression configuration in the AI gateway. - Add enable_compression (bool) and compression_rate (f64) to InputObject - Add builder methods with_enable_compression() and with_compression_rate() - Include fields in both send() and stream() request bodies - Fields are gateway-internal and not serialized to providers 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- src/client.rs | 18 ++++++++++++++++++ src/models.rs | 22 ++++++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/src/client.rs b/src/client.rs index 5d04b4e8..4a88c6a0 100644 --- a/src/client.rs +++ b/src/client.rs @@ -47,6 +47,8 @@ struct ParsedInput { tools: Option>, tool_choice: Option, tags: Option>, + enable_compression: Option, + compression_rate: Option, } /// Main client for interacting with the Edgee AI Gateway @@ -117,6 +119,12 @@ impl Edgee { if let Some(tags) = parsed.tags { body["tags"] = json!(tags); } + if let Some(enable_compression) = parsed.enable_compression { + body["enable_compression"] = json!(enable_compression); + } + if let Some(compression_rate) = parsed.compression_rate { + body["compression_rate"] = json!(compression_rate); + } let response = self .client @@ -190,6 +198,12 @@ impl Edgee { if let Some(tags) = parsed.tags { body["tags"] = json!(tags); } + if let Some(enable_compression) = parsed.enable_compression { + body["enable_compression"] = json!(enable_compression); + } + if let Some(compression_rate) = parsed.compression_rate { + body["compression_rate"] = json!(compression_rate); + } let response = self .client @@ -270,12 +284,16 @@ impl Edgee { tools: None, tool_choice: None, tags: None, + enable_compression: None, + compression_rate: None, }, Input::Object(obj) => ParsedInput { messages: obj.messages, tools: obj.tools, tool_choice: obj.tool_choice, tags: obj.tags, + enable_compression: obj.enable_compression, + compression_rate: obj.compression_rate, }, } } diff --git a/src/models.rs b/src/models.rs index b5f78451..7731c3ce 100644 --- a/src/models.rs +++ b/src/models.rs @@ -193,6 +193,14 @@ pub struct InputObject { pub tool_choice: Option, #[serde(skip_serializing_if = "Option::is_none")] pub tags: Option>, + /// Enable token compression for this request (overrides API key settings if present) + /// This is a gateway-internal field and is never sent to providers. + #[serde(default, skip_serializing)] + pub enable_compression: Option, + /// Compression rate for this request (0.0-1.0, overrides API key settings if present) + /// This is a gateway-internal field and is never sent to providers. + #[serde(default, skip_serializing)] + pub compression_rate: Option, } impl InputObject { @@ -203,6 +211,8 @@ impl InputObject { tools: None, tool_choice: None, tags: None, + enable_compression: None, + compression_rate: None, } } @@ -223,6 +233,18 @@ impl InputObject { self.tags = Some(tags); self } + + /// Enable or disable token compression for this request + pub fn with_enable_compression(mut self, enable: bool) -> Self { + self.enable_compression = Some(enable); + self + } + + /// Set compression rate for this request (0.0-1.0) + pub fn with_compression_rate(mut self, rate: f64) -> Self { + self.compression_rate = Some(rate); + self + } } /// Token usage information From bee672f9b32b6b51b7876976a190cfb23f0990e7 Mon Sep 17 00:00:00 2001 From: Clement Bouvet Date: Wed, 4 Feb 2026 15:48:22 +0100 Subject: [PATCH 2/8] feat: add compression response field and rename builder method - Rename with_enable_compression to with_compression for better API consistency - Add Compression struct with input_tokens, saved_tokens, and rate fields - Add optional compression field to SendResponse for root-level compression data --- src/models.rs | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/models.rs b/src/models.rs index 7731c3ce..9f2d78d8 100644 --- a/src/models.rs +++ b/src/models.rs @@ -235,7 +235,7 @@ impl InputObject { } /// Enable or disable token compression for this request - pub fn with_enable_compression(mut self, enable: bool) -> Self { + pub fn with_compression(mut self, enable: bool) -> Self { self.enable_compression = Some(enable); self } @@ -255,6 +255,14 @@ pub struct Usage { pub total_tokens: u32, } +/// Compression information +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Compression { + pub input_tokens: u32, + pub saved_tokens: u32, + pub rate: f64, +} + /// Choice in a non-streaming response #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Choice { @@ -273,6 +281,8 @@ pub struct SendResponse { pub choices: Vec, #[serde(skip_serializing_if = "Option::is_none")] pub usage: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub compression: Option, } impl SendResponse { From 1be772beb99b485687a918d778ce4003c9cb9bb9 Mon Sep 17 00:00:00 2001 From: Clement Bouvet Date: Wed, 4 Feb 2026 16:04:47 +0100 Subject: [PATCH 3/8] test: add compression response field tests - Add test for response with compression field deserialization - Add test for response without compression field - Add test for InputObject with_compression builder method --- src/models.rs | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/src/models.rs b/src/models.rs index 9f2d78d8..66c5d407 100644 --- a/src/models.rs +++ b/src/models.rs @@ -363,3 +363,68 @@ impl StreamChunk { .and_then(|c| c.finish_reason.as_deref()) } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_send_response_with_compression() { + let json = r#"{ + "id": "test-id", + "object": "chat.completion", + "created": 1234567890, + "model": "gpt-4", + "choices": [{ + "index": 0, + "message": {"role": "assistant", "content": "Response"}, + "finish_reason": "stop" + }], + "usage": { + "prompt_tokens": 100, + "completion_tokens": 50, + "total_tokens": 150 + }, + "compression": { + "input_tokens": 100, + "saved_tokens": 42, + "rate": 0.6102003642987249 + } + }"#; + + let response: SendResponse = serde_json::from_str(json).unwrap(); + assert!(response.compression.is_some()); + let compression = response.compression.unwrap(); + assert_eq!(compression.input_tokens, 100); + assert_eq!(compression.saved_tokens, 42); + assert_eq!(compression.rate, 0.6102003642987249); + } + + #[test] + fn test_send_response_without_compression() { + let json = r#"{ + "id": "test-id", + "object": "chat.completion", + "created": 1234567890, + "model": "gpt-4", + "choices": [{ + "index": 0, + "message": {"role": "assistant", "content": "Response"}, + "finish_reason": "stop" + }] + }"#; + + let response: SendResponse = serde_json::from_str(json).unwrap(); + assert!(response.compression.is_none()); + } + + #[test] + fn test_input_object_with_compression_builder() { + let input = InputObject::new(vec![Message::user("Hello")]) + .with_compression(true) + .with_compression_rate(0.5); + + assert_eq!(input.enable_compression, Some(true)); + assert_eq!(input.compression_rate, Some(0.5)); + } +} From 9352dec8e85d662ca307cd8814f81bec571c494c Mon Sep 17 00:00:00 2001 From: Clement Bouvet Date: Wed, 4 Feb 2026 16:16:00 +0100 Subject: [PATCH 4/8] docs: add compression field documentation to README - Add example showing how to access compression data in responses - Add compression info to features list --- README.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/README.md b/README.md index 42fb4964..915de558 100644 --- a/README.md +++ b/README.md @@ -46,6 +46,17 @@ println!("{:?}", response.finish_reason()); // Finish reason if let Some(tool_calls) = response.tool_calls() { // Tool calls (if any) println!("{:?}", tool_calls); } + +// Access usage and compression info +if let Some(usage) = &response.usage { + println!("Tokens used: {}", usage.total_tokens); +} + +if let Some(compression) = &response.compression { + println!("Input tokens: {}", compression.input_tokens); + println!("Saved tokens: {}", compression.saved_tokens); + println!("Compression rate: {:.2}", compression.rate); +} ``` ## Stream Method @@ -80,6 +91,7 @@ while let Some(result) = stream.next().await { - ✅ **OpenAI-compatible** - Works with any model supported by Edgee - ✅ **Streaming** - First-class support with `Stream` trait - ✅ **Tool calling** - Full support for function calling +- ✅ **Compression info** - Access token compression metrics in responses - ✅ **Zero-cost abstractions** - Efficient implementation with minimal overhead ## Documentation From 21399dc72fc7618db2e97b3526c482ef8ebb9fba Mon Sep 17 00:00:00 2001 From: Clement Bouvet Date: Wed, 4 Feb 2026 16:23:32 +0100 Subject: [PATCH 5/8] docs: add compression example - Add example showing how to enable compression using builder pattern - Demonstrate accessing compression metrics from response - Show usage information alongside compression data --- examples/compression.rs | 62 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 examples/compression.rs diff --git a/examples/compression.rs b/examples/compression.rs new file mode 100644 index 00000000..4c2a40aa --- /dev/null +++ b/examples/compression.rs @@ -0,0 +1,62 @@ +//! Example: Token compression with Edgee Gateway SDK +//! +//! This example demonstrates how to: +//! 1. Enable compression for a request using the builder pattern +//! 2. Set a custom compression rate +//! 3. Access compression metrics from the response + +use edgee::{Edgee, InputObject, Message}; + +#[tokio::main] +async fn main() -> Result<(), Box> { + // Create client from environment variables (EDGEE_API_KEY) + let client = Edgee::from_env()?; + + println!("{}", "=".repeat(70)); + println!("Edgee Token Compression Example"); + println!("{}", "=".repeat(70)); + println!(); + + // Example: Request with compression enabled + println!("Example: Request with compression enabled"); + println!("{}", "-".repeat(70)); + + // Create input with compression settings using builder pattern + let input = InputObject::new(vec![Message::user( + "Explain quantum computing in simple terms.", + )]) + .with_compression(true) + .with_compression_rate(0.5); + + let response = client.send("gpt-4o", input).await?; + + println!("Response: {}", response.text().unwrap_or("")); + println!(); + + // Display usage information + if let Some(usage) = &response.usage { + println!("Token Usage:"); + println!(" Prompt tokens: {}", usage.prompt_tokens); + println!(" Completion tokens: {}", usage.completion_tokens); + println!(" Total tokens: {}", usage.total_tokens); + println!(); + } + + // Display compression information + if let Some(compression) = &response.compression { + println!("Compression Metrics:"); + println!(" Input tokens: {}", compression.input_tokens); + println!(" Saved tokens: {}", compression.saved_tokens); + println!(" Compression rate: {:.2}%", compression.rate * 100.0); + println!(" Token savings: {} tokens saved!", compression.saved_tokens); + } else { + println!("No compression data available in response."); + println!("Note: Compression data is only returned when compression is enabled"); + println!(" and supported by your API key configuration."); + } + + println!(); + println!("{}", "=".repeat(70)); + + Ok(()) +} From a299f5eaaf2070a4b71c5351c3daa027884f8cdf Mon Sep 17 00:00:00 2001 From: Clement Bouvet Date: Wed, 4 Feb 2026 16:26:39 +0100 Subject: [PATCH 6/8] docs: update compression example with larger input context - Add substantial AI history document as context (~3000+ chars) - Demonstrate meaningful compression on large input - Show percentage of tokens saved - Explain that compression works on input tokens --- examples/compression.rs | 80 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 73 insertions(+), 7 deletions(-) diff --git a/examples/compression.rs b/examples/compression.rs index 4c2a40aa..8d788e85 100644 --- a/examples/compression.rs +++ b/examples/compression.rs @@ -1,12 +1,61 @@ //! Example: Token compression with Edgee Gateway SDK //! //! This example demonstrates how to: -//! 1. Enable compression for a request using the builder pattern +//! 1. Enable compression for a request with a large input context using the builder pattern //! 2. Set a custom compression rate //! 3. Access compression metrics from the response +//! +//! Note: Compression works on INPUT tokens, so this example includes a large +//! context document to demonstrate meaningful compression savings. use edgee::{Edgee, InputObject, Message}; +// Large context document to demonstrate input compression +const LARGE_CONTEXT: &str = r#" +The History and Impact of Artificial Intelligence + +Artificial intelligence (AI) has evolved from a theoretical concept to a +transformative technology that influences nearly every aspect of modern life. +The field began in earnest in the 1950s when pioneers like Alan Turing and +John McCarthy laid the groundwork for machine intelligence. + +Early developments focused on symbolic reasoning and expert systems. These +rule-based approaches dominated the field through the 1970s and 1980s, with +systems like MYCIN demonstrating practical applications in medical diagnosis. +However, these early systems were limited by their inability to learn from data +and adapt to new situations. + +The resurgence of neural networks in the 1980s and 1990s, particularly with +backpropagation algorithms, opened new possibilities. Yet it wasn't until the +2010s, with the advent of deep learning and the availability of massive datasets +and computational power, that AI truly began to revolutionize industries. + +Modern AI applications span numerous domains: +- Natural language processing enables machines to understand and generate human language +- Computer vision allows machines to interpret visual information from the world +- Robotics combines AI with mechanical systems for autonomous operation +- Healthcare uses AI for diagnosis, drug discovery, and personalized treatment +- Finance leverages AI for fraud detection, algorithmic trading, and risk assessment +- Transportation is being transformed by autonomous vehicles and traffic optimization + +The development of large language models like GPT, BERT, and others has +particularly accelerated progress in natural language understanding and generation. +These models, trained on vast amounts of text data, can perform a wide range of +language tasks with remarkable proficiency. + +Despite remarkable progress, significant challenges remain. Issues of bias, +interpretability, safety, and ethical considerations continue to be areas of +active research and debate. The AI community is working to ensure that these +powerful technologies are developed and deployed responsibly, with consideration +for their societal impact. + +Looking forward, AI is expected to continue advancing rapidly, with potential +breakthroughs in areas like artificial general intelligence, quantum machine +learning, and brain-computer interfaces. The integration of AI into daily life +will likely deepen, raising important questions about human-AI collaboration, +workforce transformation, and the future of human cognition itself. +"#; + #[tokio::main] async fn main() -> Result<(), Box> { // Create client from environment variables (EDGEE_API_KEY) @@ -17,14 +66,17 @@ async fn main() -> Result<(), Box> { println!("{}", "=".repeat(70)); println!(); - // Example: Request with compression enabled - println!("Example: Request with compression enabled"); + // Example: Request with compression enabled and large input + println!("Example: Large context with compression enabled"); println!("{}", "-".repeat(70)); + println!("Input context length: {} characters", LARGE_CONTEXT.len()); + println!(); // Create input with compression settings using builder pattern - let input = InputObject::new(vec![Message::user( - "Explain quantum computing in simple terms.", - )]) + let input = InputObject::new(vec![ + Message::system(LARGE_CONTEXT), + Message::user("Based on the context above, summarize the key milestones in AI development in 3 bullet points."), + ]) .with_compression(true) .with_compression_rate(0.5); @@ -48,7 +100,20 @@ async fn main() -> Result<(), Box> { println!(" Input tokens: {}", compression.input_tokens); println!(" Saved tokens: {}", compression.saved_tokens); println!(" Compression rate: {:.2}%", compression.rate * 100.0); - println!(" Token savings: {} tokens saved!", compression.saved_tokens); + + let savings_pct = if compression.input_tokens > 0 { + (compression.saved_tokens as f64 / compression.input_tokens as f64) * 100.0 + } else { + 0.0 + }; + println!(" Savings: {:.1}% of input tokens saved!", savings_pct); + println!(); + println!(" 💡 Without compression, this request would have used"); + println!(" {} input tokens.", compression.input_tokens); + println!( + " With compression, only {} tokens were processed!", + compression.input_tokens - compression.saved_tokens + ); } else { println!("No compression data available in response."); println!("Note: Compression data is only returned when compression is enabled"); @@ -60,3 +125,4 @@ async fn main() -> Result<(), Box> { Ok(()) } + From 9f257d7545c2073e9578b0161421294a5e0ac8d2 Mon Sep 17 00:00:00 2001 From: Clement Bouvet Date: Wed, 4 Feb 2026 16:35:32 +0100 Subject: [PATCH 7/8] fix: move large context to user message in compression example - Only USER messages are compressed, not system messages - Update example to put context in user message - Add clarifying comment about compression behavior --- examples/compression.rs | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/examples/compression.rs b/examples/compression.rs index 8d788e85..8e439d1d 100644 --- a/examples/compression.rs +++ b/examples/compression.rs @@ -5,8 +5,9 @@ //! 2. Set a custom compression rate //! 3. Access compression metrics from the response //! -//! Note: Compression works on INPUT tokens, so this example includes a large -//! context document to demonstrate meaningful compression savings. +//! IMPORTANT: Only USER messages are compressed. System messages are not compressed. +//! This example includes a large context in the user message to demonstrate meaningful +//! compression savings. use edgee::{Edgee, InputObject, Message}; @@ -67,18 +68,22 @@ async fn main() -> Result<(), Box> { println!(); // Example: Request with compression enabled and large input - println!("Example: Large context with compression enabled"); + println!("Example: Large user message with compression enabled"); println!("{}", "-".repeat(70)); println!("Input context length: {} characters", LARGE_CONTEXT.len()); println!(); + // NOTE: Only USER messages are compressed + // Put the large context in the user message to demonstrate compression + let user_message = format!( + "Here is some context about AI:\n\n{}\n\nBased on this context, summarize the key milestones in AI development in 3 bullet points.", + LARGE_CONTEXT + ); + // Create input with compression settings using builder pattern - let input = InputObject::new(vec![ - Message::system(LARGE_CONTEXT), - Message::user("Based on the context above, summarize the key milestones in AI development in 3 bullet points."), - ]) - .with_compression(true) - .with_compression_rate(0.5); + let input = InputObject::new(vec![Message::user(user_message)]) + .with_compression(true) + .with_compression_rate(0.5); let response = client.send("gpt-4o", input).await?; From 04e7784b4c44f6bf8e6323d3b3a51300dca214cc Mon Sep 17 00:00:00 2001 From: Clement Bouvet Date: Wed, 4 Feb 2026 16:39:07 +0100 Subject: [PATCH 8/8] fmt --- examples/compression.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/examples/compression.rs b/examples/compression.rs index 8e439d1d..da86c45a 100644 --- a/examples/compression.rs +++ b/examples/compression.rs @@ -105,7 +105,7 @@ async fn main() -> Result<(), Box> { println!(" Input tokens: {}", compression.input_tokens); println!(" Saved tokens: {}", compression.saved_tokens); println!(" Compression rate: {:.2}%", compression.rate * 100.0); - + let savings_pct = if compression.input_tokens > 0 { (compression.saved_tokens as f64 / compression.input_tokens as f64) * 100.0 } else { @@ -130,4 +130,3 @@ async fn main() -> Result<(), Box> { Ok(()) } -