diff --git a/README.md b/README.md index 709dca6..cf0e664 100644 --- a/README.md +++ b/README.md @@ -42,6 +42,17 @@ const response = await edgee.send({ console.log(response.text); // Text content console.log(response.finishReason); // Finish reason console.log(response.toolCalls); // Tool calls (if any) + +// Access usage and compression info +if (response.usage) { + console.log(`Tokens used: ${response.usage.total_tokens}`); +} + +if (response.compression) { + console.log(`Input tokens: ${response.compression.input_tokens}`); + console.log(`Saved tokens: ${response.compression.saved_tokens}`); + console.log(`Compression rate: ${response.compression.rate}`); +} ``` ## Stream Method @@ -67,6 +78,7 @@ for await (const chunk of edgee.stream('gpt-4o', 'Tell me a story')) { - ✅ **Streaming** - Real-time response streaming - ✅ **Tool calling** - Full support for function calling - ✅ **Flexible input** - Accept strings or structured objects +- ✅ **Compression info** - Access token compression metrics in responses - ✅ **Zero dependencies** - Lightweight and fast ## Documentation diff --git a/example/compression.ts b/example/compression.ts new file mode 100644 index 0000000..1e1af71 --- /dev/null +++ b/example/compression.ts @@ -0,0 +1,137 @@ +/** + * Example: Token compression with Edgee Gateway SDK + * + * This example demonstrates how to: + * 1. Enable compression for a request with a large input context + * 2. Set a custom compression rate + * 3. Access compression metrics from the response + * + * IMPORTANT: Only USER messages are compressed. System messages are not compressed. + * This example includes a large context in the user message to demonstrate meaningful + * compression savings. + */ + +import Edgee from "edgee"; + +const edgee = new Edgee(process.env.EDGEE_API_KEY); + +// Large context document to demonstrate input compression +const LARGE_CONTEXT = ` +The History and Impact of Artificial Intelligence + +Artificial intelligence (AI) has evolved from a theoretical concept to a +transformative technology that influences nearly every aspect of modern life. +The field began in earnest in the 1950s when pioneers like Alan Turing and +John McCarthy laid the groundwork for machine intelligence. + +Early developments focused on symbolic reasoning and expert systems. These +rule-based approaches dominated the field through the 1970s and 1980s, with +systems like MYCIN demonstrating practical applications in medical diagnosis. +However, these early systems were limited by their inability to learn from data +and adapt to new situations. + +The resurgence of neural networks in the 1980s and 1990s, particularly with +backpropagation algorithms, opened new possibilities. Yet it wasn't until the +2010s, with the advent of deep learning and the availability of massive datasets +and computational power, that AI truly began to revolutionize industries. + +Modern AI applications span numerous domains: +- Natural language processing enables machines to understand and generate human language +- Computer vision allows machines to interpret visual information from the world +- Robotics combines AI with mechanical systems for autonomous operation +- Healthcare uses AI for diagnosis, drug discovery, and personalized treatment +- Finance leverages AI for fraud detection, algorithmic trading, and risk assessment +- Transportation is being transformed by autonomous vehicles and traffic optimization + +The development of large language models like GPT, BERT, and others has +particularly accelerated progress in natural language understanding and generation. +These models, trained on vast amounts of text data, can perform a wide range of +language tasks with remarkable proficiency. + +Despite remarkable progress, significant challenges remain. Issues of bias, +interpretability, safety, and ethical considerations continue to be areas of +active research and debate. The AI community is working to ensure that these +powerful technologies are developed and deployed responsibly, with consideration +for their societal impact. + +Looking forward, AI is expected to continue advancing rapidly, with potential +breakthroughs in areas like artificial general intelligence, quantum machine +learning, and brain-computer interfaces. The integration of AI into daily life +will likely deepen, raising important questions about human-AI collaboration, +workforce transformation, and the future of human cognition itself. +`; + +console.log("=".repeat(70)); +console.log("Edgee Token Compression Example"); +console.log("=".repeat(70)); +console.log(); + +// Example: Request with compression enabled and large input +console.log("Example: Large user message with compression enabled"); +console.log("-".repeat(70)); +console.log(`Input context length: ${LARGE_CONTEXT.length} characters`); +console.log(); + +// NOTE: Only USER messages are compressed +// Put the large context in the user message to demonstrate compression +const userMessage = `Here is some context about AI: + +${LARGE_CONTEXT} + +Based on this context, summarize the key milestones in AI development in 3 bullet points.`; + +const response = await edgee.send({ + model: "gpt-4o", + input: { + messages: [{ role: "user", content: userMessage }], + enable_compression: true, + compression_rate: 0.5, + }, +}); + +console.log(`Response: ${response.text}`); +console.log(); + +// Display usage information +if (response.usage) { + console.log("Token Usage:"); + console.log(` Prompt tokens: ${response.usage.prompt_tokens}`); + console.log(` Completion tokens: ${response.usage.completion_tokens}`); + console.log(` Total tokens: ${response.usage.total_tokens}`); + console.log(); +} + +// Display compression information +if (response.compression) { + console.log("Compression Metrics:"); + console.log(` Input tokens: ${response.compression.input_tokens}`); + console.log(` Saved tokens: ${response.compression.saved_tokens}`); + console.log( + ` Compression rate: ${(response.compression.rate * 100).toFixed(2)}%` + ); + const savingsPct = + response.compression.input_tokens > 0 + ? (response.compression.saved_tokens / + response.compression.input_tokens) * + 100 + : 0; + console.log(` Savings: ${savingsPct.toFixed(1)}% of input tokens saved!`); + console.log(); + console.log(` 💡 Without compression, this request would have used`); + console.log(` ${response.compression.input_tokens} input tokens.`); + console.log( + ` With compression, only ${ + response.compression.input_tokens - response.compression.saved_tokens + } tokens were processed!` + ); +} else { + console.log("No compression data available in response."); + console.log( + "Note: Compression data is only returned when compression is enabled" + ); + console.log(" and supported by your API key configuration."); +} + +console.log(); +console.log("=".repeat(70)); + diff --git a/src/index.ts b/src/index.ts index dc060e8..ee44504 100644 --- a/src/index.ts +++ b/src/index.ts @@ -43,6 +43,8 @@ export interface InputObject { tools?: Tool[]; tool_choice?: ToolChoice; tags?: string[]; + enable_compression?: boolean; // Enable token compression (gateway-internal, not sent to providers) + compression_rate?: number; // Compression rate 0.0-1.0 (gateway-internal, not sent to providers) } export interface SendOptions { @@ -67,6 +69,11 @@ export class SendResponse { completion_tokens: number; total_tokens: number; }; + compression?: { + input_tokens: number; + saved_tokens: number; + rate: number; + }; constructor( choices: Choice[], @@ -74,10 +81,16 @@ export class SendResponse { prompt_tokens: number; completion_tokens: number; total_tokens: number; + }, + compression?: { + input_tokens: number; + saved_tokens: number; + rate: number; } ) { this.choices = choices; this.usage = usage; + this.compression = compression; } get text(): string | null { @@ -187,6 +200,8 @@ export default class Edgee { if (input.tools) body.tools = input.tools; if (input.tool_choice) body.tool_choice = input.tool_choice; if (input.tags) body.tags = input.tags; + if (input.enable_compression !== undefined) body.enable_compression = input.enable_compression; + if (input.compression_rate !== undefined) body.compression_rate = input.compression_rate; } const res = await fetch(`${this.baseUrl}/v1/chat/completions`, { @@ -209,10 +224,15 @@ export default class Edgee { prompt_tokens: number; completion_tokens: number; total_tokens: number; - } + }; + compression?: { + input_tokens: number; + saved_tokens: number; + rate: number; + }; }; - return new SendResponse(data.choices, data.usage); + return new SendResponse(data.choices, data.usage, data.compression); } private async *_handleStreamingResponse( @@ -288,6 +308,8 @@ export default class Edgee { if (input.tools) body.tools = input.tools; if (input.tool_choice) body.tool_choice = input.tool_choice; if (input.tags) body.tags = input.tags; + if (input.enable_compression !== undefined) body.enable_compression = input.enable_compression; + if (input.compression_rate !== undefined) body.compression_rate = input.compression_rate; } yield* this._handleStreamingResponse( diff --git a/tests/index.test.ts b/tests/index.test.ts index 3b1e646..6ed19d9 100644 --- a/tests/index.test.ts +++ b/tests/index.test.ts @@ -491,6 +491,73 @@ describe('Edgee', () => { ); }); + it('should handle response with compression field', async () => { + const mockResponse: SendResponse = { + choices: [ + { + index: 0, + message: { + role: 'assistant', + content: 'Response', + }, + finish_reason: 'stop', + }, + ], + usage: { + prompt_tokens: 100, + completion_tokens: 50, + total_tokens: 150, + }, + compression: { + input_tokens: 100, + saved_tokens: 42, + rate: 0.6102003642987249, + }, + }; + + mockFetch.mockResolvedValueOnce({ + ok: true, + json: async () => mockResponse, + }); + + const result = await client.send({ + model: 'gpt-4', + input: 'Test', + }); + + expect(result.compression).toBeDefined(); + expect(result.compression?.input_tokens).toBe(100); + expect(result.compression?.saved_tokens).toBe(42); + expect(result.compression?.rate).toBe(0.6102003642987249); + }); + + it('should handle response without compression field', async () => { + const mockResponse: SendResponse = { + choices: [ + { + index: 0, + message: { + role: 'assistant', + content: 'Response', + }, + finish_reason: 'stop', + }, + ], + }; + + mockFetch.mockResolvedValueOnce({ + ok: true, + json: async () => mockResponse, + }); + + const result = await client.send({ + model: 'gpt-4', + input: 'Test', + }); + + expect(result.compression).toBeUndefined(); + }); + it('should throw error when API returns non-OK status', async () => { mockFetch.mockResolvedValueOnce({ ok: false,