diff --git a/README.md b/README.md index 42fb4964..915de558 100644 --- a/README.md +++ b/README.md @@ -46,6 +46,17 @@ println!("{:?}", response.finish_reason()); // Finish reason if let Some(tool_calls) = response.tool_calls() { // Tool calls (if any) println!("{:?}", tool_calls); } + +// Access usage and compression info +if let Some(usage) = &response.usage { + println!("Tokens used: {}", usage.total_tokens); +} + +if let Some(compression) = &response.compression { + println!("Input tokens: {}", compression.input_tokens); + println!("Saved tokens: {}", compression.saved_tokens); + println!("Compression rate: {:.2}", compression.rate); +} ``` ## Stream Method @@ -80,6 +91,7 @@ while let Some(result) = stream.next().await { - ✅ **OpenAI-compatible** - Works with any model supported by Edgee - ✅ **Streaming** - First-class support with `Stream` trait - ✅ **Tool calling** - Full support for function calling +- ✅ **Compression info** - Access token compression metrics in responses - ✅ **Zero-cost abstractions** - Efficient implementation with minimal overhead ## Documentation diff --git a/examples/compression.rs b/examples/compression.rs new file mode 100644 index 00000000..da86c45a --- /dev/null +++ b/examples/compression.rs @@ -0,0 +1,132 @@ +//! Example: Token compression with Edgee Gateway SDK +//! +//! This example demonstrates how to: +//! 1. Enable compression for a request with a large input context using the builder pattern +//! 2. Set a custom compression rate +//! 3. Access compression metrics from the response +//! +//! IMPORTANT: Only USER messages are compressed. System messages are not compressed. +//! This example includes a large context in the user message to demonstrate meaningful +//! compression savings. + +use edgee::{Edgee, InputObject, Message}; + +// Large context document to demonstrate input compression +const LARGE_CONTEXT: &str = r#" +The History and Impact of Artificial Intelligence + +Artificial intelligence (AI) has evolved from a theoretical concept to a +transformative technology that influences nearly every aspect of modern life. +The field began in earnest in the 1950s when pioneers like Alan Turing and +John McCarthy laid the groundwork for machine intelligence. + +Early developments focused on symbolic reasoning and expert systems. These +rule-based approaches dominated the field through the 1970s and 1980s, with +systems like MYCIN demonstrating practical applications in medical diagnosis. +However, these early systems were limited by their inability to learn from data +and adapt to new situations. + +The resurgence of neural networks in the 1980s and 1990s, particularly with +backpropagation algorithms, opened new possibilities. Yet it wasn't until the +2010s, with the advent of deep learning and the availability of massive datasets +and computational power, that AI truly began to revolutionize industries. + +Modern AI applications span numerous domains: +- Natural language processing enables machines to understand and generate human language +- Computer vision allows machines to interpret visual information from the world +- Robotics combines AI with mechanical systems for autonomous operation +- Healthcare uses AI for diagnosis, drug discovery, and personalized treatment +- Finance leverages AI for fraud detection, algorithmic trading, and risk assessment +- Transportation is being transformed by autonomous vehicles and traffic optimization + +The development of large language models like GPT, BERT, and others has +particularly accelerated progress in natural language understanding and generation. +These models, trained on vast amounts of text data, can perform a wide range of +language tasks with remarkable proficiency. + +Despite remarkable progress, significant challenges remain. Issues of bias, +interpretability, safety, and ethical considerations continue to be areas of +active research and debate. The AI community is working to ensure that these +powerful technologies are developed and deployed responsibly, with consideration +for their societal impact. + +Looking forward, AI is expected to continue advancing rapidly, with potential +breakthroughs in areas like artificial general intelligence, quantum machine +learning, and brain-computer interfaces. The integration of AI into daily life +will likely deepen, raising important questions about human-AI collaboration, +workforce transformation, and the future of human cognition itself. +"#; + +#[tokio::main] +async fn main() -> Result<(), Box> { + // Create client from environment variables (EDGEE_API_KEY) + let client = Edgee::from_env()?; + + println!("{}", "=".repeat(70)); + println!("Edgee Token Compression Example"); + println!("{}", "=".repeat(70)); + println!(); + + // Example: Request with compression enabled and large input + println!("Example: Large user message with compression enabled"); + println!("{}", "-".repeat(70)); + println!("Input context length: {} characters", LARGE_CONTEXT.len()); + println!(); + + // NOTE: Only USER messages are compressed + // Put the large context in the user message to demonstrate compression + let user_message = format!( + "Here is some context about AI:\n\n{}\n\nBased on this context, summarize the key milestones in AI development in 3 bullet points.", + LARGE_CONTEXT + ); + + // Create input with compression settings using builder pattern + let input = InputObject::new(vec![Message::user(user_message)]) + .with_compression(true) + .with_compression_rate(0.5); + + let response = client.send("gpt-4o", input).await?; + + println!("Response: {}", response.text().unwrap_or("")); + println!(); + + // Display usage information + if let Some(usage) = &response.usage { + println!("Token Usage:"); + println!(" Prompt tokens: {}", usage.prompt_tokens); + println!(" Completion tokens: {}", usage.completion_tokens); + println!(" Total tokens: {}", usage.total_tokens); + println!(); + } + + // Display compression information + if let Some(compression) = &response.compression { + println!("Compression Metrics:"); + println!(" Input tokens: {}", compression.input_tokens); + println!(" Saved tokens: {}", compression.saved_tokens); + println!(" Compression rate: {:.2}%", compression.rate * 100.0); + + let savings_pct = if compression.input_tokens > 0 { + (compression.saved_tokens as f64 / compression.input_tokens as f64) * 100.0 + } else { + 0.0 + }; + println!(" Savings: {:.1}% of input tokens saved!", savings_pct); + println!(); + println!(" 💡 Without compression, this request would have used"); + println!(" {} input tokens.", compression.input_tokens); + println!( + " With compression, only {} tokens were processed!", + compression.input_tokens - compression.saved_tokens + ); + } else { + println!("No compression data available in response."); + println!("Note: Compression data is only returned when compression is enabled"); + println!(" and supported by your API key configuration."); + } + + println!(); + println!("{}", "=".repeat(70)); + + Ok(()) +} diff --git a/src/client.rs b/src/client.rs index 5d04b4e8..4a88c6a0 100644 --- a/src/client.rs +++ b/src/client.rs @@ -47,6 +47,8 @@ struct ParsedInput { tools: Option>, tool_choice: Option, tags: Option>, + enable_compression: Option, + compression_rate: Option, } /// Main client for interacting with the Edgee AI Gateway @@ -117,6 +119,12 @@ impl Edgee { if let Some(tags) = parsed.tags { body["tags"] = json!(tags); } + if let Some(enable_compression) = parsed.enable_compression { + body["enable_compression"] = json!(enable_compression); + } + if let Some(compression_rate) = parsed.compression_rate { + body["compression_rate"] = json!(compression_rate); + } let response = self .client @@ -190,6 +198,12 @@ impl Edgee { if let Some(tags) = parsed.tags { body["tags"] = json!(tags); } + if let Some(enable_compression) = parsed.enable_compression { + body["enable_compression"] = json!(enable_compression); + } + if let Some(compression_rate) = parsed.compression_rate { + body["compression_rate"] = json!(compression_rate); + } let response = self .client @@ -270,12 +284,16 @@ impl Edgee { tools: None, tool_choice: None, tags: None, + enable_compression: None, + compression_rate: None, }, Input::Object(obj) => ParsedInput { messages: obj.messages, tools: obj.tools, tool_choice: obj.tool_choice, tags: obj.tags, + enable_compression: obj.enable_compression, + compression_rate: obj.compression_rate, }, } } diff --git a/src/models.rs b/src/models.rs index b5f78451..66c5d407 100644 --- a/src/models.rs +++ b/src/models.rs @@ -193,6 +193,14 @@ pub struct InputObject { pub tool_choice: Option, #[serde(skip_serializing_if = "Option::is_none")] pub tags: Option>, + /// Enable token compression for this request (overrides API key settings if present) + /// This is a gateway-internal field and is never sent to providers. + #[serde(default, skip_serializing)] + pub enable_compression: Option, + /// Compression rate for this request (0.0-1.0, overrides API key settings if present) + /// This is a gateway-internal field and is never sent to providers. + #[serde(default, skip_serializing)] + pub compression_rate: Option, } impl InputObject { @@ -203,6 +211,8 @@ impl InputObject { tools: None, tool_choice: None, tags: None, + enable_compression: None, + compression_rate: None, } } @@ -223,6 +233,18 @@ impl InputObject { self.tags = Some(tags); self } + + /// Enable or disable token compression for this request + pub fn with_compression(mut self, enable: bool) -> Self { + self.enable_compression = Some(enable); + self + } + + /// Set compression rate for this request (0.0-1.0) + pub fn with_compression_rate(mut self, rate: f64) -> Self { + self.compression_rate = Some(rate); + self + } } /// Token usage information @@ -233,6 +255,14 @@ pub struct Usage { pub total_tokens: u32, } +/// Compression information +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Compression { + pub input_tokens: u32, + pub saved_tokens: u32, + pub rate: f64, +} + /// Choice in a non-streaming response #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Choice { @@ -251,6 +281,8 @@ pub struct SendResponse { pub choices: Vec, #[serde(skip_serializing_if = "Option::is_none")] pub usage: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub compression: Option, } impl SendResponse { @@ -331,3 +363,68 @@ impl StreamChunk { .and_then(|c| c.finish_reason.as_deref()) } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_send_response_with_compression() { + let json = r#"{ + "id": "test-id", + "object": "chat.completion", + "created": 1234567890, + "model": "gpt-4", + "choices": [{ + "index": 0, + "message": {"role": "assistant", "content": "Response"}, + "finish_reason": "stop" + }], + "usage": { + "prompt_tokens": 100, + "completion_tokens": 50, + "total_tokens": 150 + }, + "compression": { + "input_tokens": 100, + "saved_tokens": 42, + "rate": 0.6102003642987249 + } + }"#; + + let response: SendResponse = serde_json::from_str(json).unwrap(); + assert!(response.compression.is_some()); + let compression = response.compression.unwrap(); + assert_eq!(compression.input_tokens, 100); + assert_eq!(compression.saved_tokens, 42); + assert_eq!(compression.rate, 0.6102003642987249); + } + + #[test] + fn test_send_response_without_compression() { + let json = r#"{ + "id": "test-id", + "object": "chat.completion", + "created": 1234567890, + "model": "gpt-4", + "choices": [{ + "index": 0, + "message": {"role": "assistant", "content": "Response"}, + "finish_reason": "stop" + }] + }"#; + + let response: SendResponse = serde_json::from_str(json).unwrap(); + assert!(response.compression.is_none()); + } + + #[test] + fn test_input_object_with_compression_builder() { + let input = InputObject::new(vec![Message::user("Hello")]) + .with_compression(true) + .with_compression_rate(0.5); + + assert_eq!(input.enable_compression, Some(true)); + assert_eq!(input.compression_rate, Some(0.5)); + } +}