From 1eaf20cc255530c1c1bcde47973c7118a5cf58c5 Mon Sep 17 00:00:00 2001 From: Clement Bouvet Date: Tue, 3 Feb 2026 17:56:43 +0100 Subject: [PATCH 1/7] feat: add optional compression fields to completion requests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for compression configuration with optional fields that can be passed per request to override API key settings. - Add optional `enable_compression?: boolean` field to `InputObject` interface - Add optional `compression_rate?: number` field to `InputObject` interface - Include compression fields in request body for both `send()` and `stream()` methods - Compression fields are omitted from request when not provided - Fields are gateway-internal and not sent to providers 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- src/index.ts | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/index.ts b/src/index.ts index dc060e8..15b9134 100644 --- a/src/index.ts +++ b/src/index.ts @@ -43,6 +43,8 @@ export interface InputObject { tools?: Tool[]; tool_choice?: ToolChoice; tags?: string[]; + enable_compression?: boolean; // Enable token compression (gateway-internal, not sent to providers) + compression_rate?: number; // Compression rate 0.0-1.0 (gateway-internal, not sent to providers) } export interface SendOptions { @@ -187,6 +189,8 @@ export default class Edgee { if (input.tools) body.tools = input.tools; if (input.tool_choice) body.tool_choice = input.tool_choice; if (input.tags) body.tags = input.tags; + if (input.enable_compression !== undefined) body.enable_compression = input.enable_compression; + if (input.compression_rate !== undefined) body.compression_rate = input.compression_rate; } const res = await fetch(`${this.baseUrl}/v1/chat/completions`, { @@ -288,6 +292,8 @@ export default class Edgee { if (input.tools) body.tools = input.tools; if (input.tool_choice) body.tool_choice = input.tool_choice; if (input.tags) body.tags = input.tags; + if (input.enable_compression !== undefined) body.enable_compression = input.enable_compression; + if (input.compression_rate !== undefined) body.compression_rate = input.compression_rate; } yield* this._handleStreamingResponse( From d25e3c5d89486a527c08358ab5323ebf564510e0 Mon Sep 17 00:00:00 2001 From: Clement Bouvet Date: Wed, 4 Feb 2026 15:49:13 +0100 Subject: [PATCH 2/7] feat: add compression response field to SendResponse - Add compression interface with input_tokens, saved_tokens, and rate fields - Add optional compression field to SendResponse for root-level compression data - Update SendResponse constructor to accept compression parameter --- src/index.ts | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/src/index.ts b/src/index.ts index 15b9134..ee44504 100644 --- a/src/index.ts +++ b/src/index.ts @@ -69,6 +69,11 @@ export class SendResponse { completion_tokens: number; total_tokens: number; }; + compression?: { + input_tokens: number; + saved_tokens: number; + rate: number; + }; constructor( choices: Choice[], @@ -76,10 +81,16 @@ export class SendResponse { prompt_tokens: number; completion_tokens: number; total_tokens: number; + }, + compression?: { + input_tokens: number; + saved_tokens: number; + rate: number; } ) { this.choices = choices; this.usage = usage; + this.compression = compression; } get text(): string | null { @@ -213,10 +224,15 @@ export default class Edgee { prompt_tokens: number; completion_tokens: number; total_tokens: number; - } + }; + compression?: { + input_tokens: number; + saved_tokens: number; + rate: number; + }; }; - return new SendResponse(data.choices, data.usage); + return new SendResponse(data.choices, data.usage, data.compression); } private async *_handleStreamingResponse( From e76dd98d0a2c187b2e699b40db63c561daa33091 Mon Sep 17 00:00:00 2001 From: Clement Bouvet Date: Wed, 4 Feb 2026 16:04:30 +0100 Subject: [PATCH 3/7] test: add compression response field tests - Add test for response with compression field - Add test for response without compression field --- tests/index.test.ts | 67 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/tests/index.test.ts b/tests/index.test.ts index 3b1e646..6ed19d9 100644 --- a/tests/index.test.ts +++ b/tests/index.test.ts @@ -491,6 +491,73 @@ describe('Edgee', () => { ); }); + it('should handle response with compression field', async () => { + const mockResponse: SendResponse = { + choices: [ + { + index: 0, + message: { + role: 'assistant', + content: 'Response', + }, + finish_reason: 'stop', + }, + ], + usage: { + prompt_tokens: 100, + completion_tokens: 50, + total_tokens: 150, + }, + compression: { + input_tokens: 100, + saved_tokens: 42, + rate: 0.6102003642987249, + }, + }; + + mockFetch.mockResolvedValueOnce({ + ok: true, + json: async () => mockResponse, + }); + + const result = await client.send({ + model: 'gpt-4', + input: 'Test', + }); + + expect(result.compression).toBeDefined(); + expect(result.compression?.input_tokens).toBe(100); + expect(result.compression?.saved_tokens).toBe(42); + expect(result.compression?.rate).toBe(0.6102003642987249); + }); + + it('should handle response without compression field', async () => { + const mockResponse: SendResponse = { + choices: [ + { + index: 0, + message: { + role: 'assistant', + content: 'Response', + }, + finish_reason: 'stop', + }, + ], + }; + + mockFetch.mockResolvedValueOnce({ + ok: true, + json: async () => mockResponse, + }); + + const result = await client.send({ + model: 'gpt-4', + input: 'Test', + }); + + expect(result.compression).toBeUndefined(); + }); + it('should throw error when API returns non-OK status', async () => { mockFetch.mockResolvedValueOnce({ ok: false, From 7c7edf32f1c6c7178a62c3ecb073db04c0842511 Mon Sep 17 00:00:00 2001 From: Clement Bouvet Date: Wed, 4 Feb 2026 16:15:45 +0100 Subject: [PATCH 4/7] docs: add compression field documentation to README - Add example showing how to access compression data in responses - Add compression info to features list --- README.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/README.md b/README.md index 709dca6..cf0e664 100644 --- a/README.md +++ b/README.md @@ -42,6 +42,17 @@ const response = await edgee.send({ console.log(response.text); // Text content console.log(response.finishReason); // Finish reason console.log(response.toolCalls); // Tool calls (if any) + +// Access usage and compression info +if (response.usage) { + console.log(`Tokens used: ${response.usage.total_tokens}`); +} + +if (response.compression) { + console.log(`Input tokens: ${response.compression.input_tokens}`); + console.log(`Saved tokens: ${response.compression.saved_tokens}`); + console.log(`Compression rate: ${response.compression.rate}`); +} ``` ## Stream Method @@ -67,6 +78,7 @@ for await (const chunk of edgee.stream('gpt-4o', 'Tell me a story')) { - ✅ **Streaming** - Real-time response streaming - ✅ **Tool calling** - Full support for function calling - ✅ **Flexible input** - Accept strings or structured objects +- ✅ **Compression info** - Access token compression metrics in responses - ✅ **Zero dependencies** - Lightweight and fast ## Documentation From 8b92686d525c9cbf7bda4af8c73d4a6f40644aaf Mon Sep 17 00:00:00 2001 From: Clement Bouvet Date: Wed, 4 Feb 2026 16:23:15 +0100 Subject: [PATCH 5/7] docs: add compression example - Add example showing how to enable compression and set compression rate - Demonstrate accessing compression metrics from response - Show usage information alongside compression data --- example/compression.ts | 64 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 example/compression.ts diff --git a/example/compression.ts b/example/compression.ts new file mode 100644 index 0000000..778e547 --- /dev/null +++ b/example/compression.ts @@ -0,0 +1,64 @@ +/** + * Example: Token compression with Edgee Gateway SDK + * + * This example demonstrates how to: + * 1. Enable compression for a request + * 2. Set a custom compression rate + * 3. Access compression metrics from the response + */ + +import Edgee from "edgee"; + +const edgee = new Edgee(process.env.EDGEE_API_KEY); + +console.log("=".repeat(70)); +console.log("Edgee Token Compression Example"); +console.log("=".repeat(70)); +console.log(); + +// Example: Request with compression enabled +console.log("Example: Request with compression enabled"); +console.log("-".repeat(70)); + +const response = await edgee.send({ + model: "gpt-4o", + input: { + messages: [ + { role: "user", content: "Explain quantum computing in simple terms." }, + ], + enable_compression: true, + compression_rate: 0.5, + }, +}); + +console.log(`Response: ${response.text}`); +console.log(); + +// Display usage information +if (response.usage) { + console.log("Token Usage:"); + console.log(` Prompt tokens: ${response.usage.prompt_tokens}`); + console.log(` Completion tokens: ${response.usage.completion_tokens}`); + console.log(` Total tokens: ${response.usage.total_tokens}`); + console.log(); +} + +// Display compression information +if (response.compression) { + console.log("Compression Metrics:"); + console.log(` Input tokens: ${response.compression.input_tokens}`); + console.log(` Saved tokens: ${response.compression.saved_tokens}`); + console.log( + ` Compression rate: ${(response.compression.rate * 100).toFixed(2)}%` + ); + console.log( + ` Token savings: ${response.compression.saved_tokens} tokens saved!` + ); +} else { + console.log("No compression data available in response."); + console.log("Note: Compression data is only returned when compression is enabled"); + console.log(" and supported by your API key configuration."); +} + +console.log(); +console.log("=".repeat(70)); From ca2dd2a03e5cfcbe881c9a80622c76c9a109ab03 Mon Sep 17 00:00:00 2001 From: Clement Bouvet Date: Wed, 4 Feb 2026 16:26:22 +0100 Subject: [PATCH 6/7] docs: update compression example with larger input context - Add substantial AI history document as context (~3000+ chars) - Demonstrate meaningful compression on large input - Show percentage of tokens saved - Explain that compression works on input tokens --- example/compression.ts | 83 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 77 insertions(+), 6 deletions(-) diff --git a/example/compression.ts b/example/compression.ts index 778e547..51d5c8e 100644 --- a/example/compression.ts +++ b/example/compression.ts @@ -2,29 +2,85 @@ * Example: Token compression with Edgee Gateway SDK * * This example demonstrates how to: - * 1. Enable compression for a request + * 1. Enable compression for a request with a large input context * 2. Set a custom compression rate * 3. Access compression metrics from the response + * + * Note: Compression works on INPUT tokens, so this example includes a large + * context document to demonstrate meaningful compression savings. */ import Edgee from "edgee"; const edgee = new Edgee(process.env.EDGEE_API_KEY); +// Large context document to demonstrate input compression +const LARGE_CONTEXT = ` +The History and Impact of Artificial Intelligence + +Artificial intelligence (AI) has evolved from a theoretical concept to a +transformative technology that influences nearly every aspect of modern life. +The field began in earnest in the 1950s when pioneers like Alan Turing and +John McCarthy laid the groundwork for machine intelligence. + +Early developments focused on symbolic reasoning and expert systems. These +rule-based approaches dominated the field through the 1970s and 1980s, with +systems like MYCIN demonstrating practical applications in medical diagnosis. +However, these early systems were limited by their inability to learn from data +and adapt to new situations. + +The resurgence of neural networks in the 1980s and 1990s, particularly with +backpropagation algorithms, opened new possibilities. Yet it wasn't until the +2010s, with the advent of deep learning and the availability of massive datasets +and computational power, that AI truly began to revolutionize industries. + +Modern AI applications span numerous domains: +- Natural language processing enables machines to understand and generate human language +- Computer vision allows machines to interpret visual information from the world +- Robotics combines AI with mechanical systems for autonomous operation +- Healthcare uses AI for diagnosis, drug discovery, and personalized treatment +- Finance leverages AI for fraud detection, algorithmic trading, and risk assessment +- Transportation is being transformed by autonomous vehicles and traffic optimization + +The development of large language models like GPT, BERT, and others has +particularly accelerated progress in natural language understanding and generation. +These models, trained on vast amounts of text data, can perform a wide range of +language tasks with remarkable proficiency. + +Despite remarkable progress, significant challenges remain. Issues of bias, +interpretability, safety, and ethical considerations continue to be areas of +active research and debate. The AI community is working to ensure that these +powerful technologies are developed and deployed responsibly, with consideration +for their societal impact. + +Looking forward, AI is expected to continue advancing rapidly, with potential +breakthroughs in areas like artificial general intelligence, quantum machine +learning, and brain-computer interfaces. The integration of AI into daily life +will likely deepen, raising important questions about human-AI collaboration, +workforce transformation, and the future of human cognition itself. +`; + console.log("=".repeat(70)); console.log("Edgee Token Compression Example"); console.log("=".repeat(70)); console.log(); -// Example: Request with compression enabled -console.log("Example: Request with compression enabled"); +// Example: Request with compression enabled and large input +console.log("Example: Large context with compression enabled"); console.log("-".repeat(70)); +console.log(`Input context length: ${LARGE_CONTEXT.length} characters`); +console.log(); const response = await edgee.send({ model: "gpt-4o", input: { messages: [ - { role: "user", content: "Explain quantum computing in simple terms." }, + { role: "system", content: LARGE_CONTEXT }, + { + role: "user", + content: + "Based on the context above, summarize the key milestones in AI development in 3 bullet points.", + }, ], enable_compression: true, compression_rate: 0.5, @@ -51,14 +107,29 @@ if (response.compression) { console.log( ` Compression rate: ${(response.compression.rate * 100).toFixed(2)}%` ); + const savingsPct = + response.compression.input_tokens > 0 + ? (response.compression.saved_tokens / + response.compression.input_tokens) * + 100 + : 0; + console.log(` Savings: ${savingsPct.toFixed(1)}% of input tokens saved!`); + console.log(); + console.log(` 💡 Without compression, this request would have used`); + console.log(` ${response.compression.input_tokens} input tokens.`); console.log( - ` Token savings: ${response.compression.saved_tokens} tokens saved!` + ` With compression, only ${ + response.compression.input_tokens - response.compression.saved_tokens + } tokens were processed!` ); } else { console.log("No compression data available in response."); - console.log("Note: Compression data is only returned when compression is enabled"); + console.log( + "Note: Compression data is only returned when compression is enabled" + ); console.log(" and supported by your API key configuration."); } console.log(); console.log("=".repeat(70)); + From 118e4595efe9b2adf5e4a3e227e49017d523c70d Mon Sep 17 00:00:00 2001 From: Clement Bouvet Date: Wed, 4 Feb 2026 16:35:13 +0100 Subject: [PATCH 7/7] fix: move large context to user message in compression example - Only USER messages are compressed, not system messages - Update example to put context in user message - Add clarifying comment about compression behavior --- example/compression.ts | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/example/compression.ts b/example/compression.ts index 51d5c8e..1e1af71 100644 --- a/example/compression.ts +++ b/example/compression.ts @@ -6,8 +6,9 @@ * 2. Set a custom compression rate * 3. Access compression metrics from the response * - * Note: Compression works on INPUT tokens, so this example includes a large - * context document to demonstrate meaningful compression savings. + * IMPORTANT: Only USER messages are compressed. System messages are not compressed. + * This example includes a large context in the user message to demonstrate meaningful + * compression savings. */ import Edgee from "edgee"; @@ -66,22 +67,23 @@ console.log("=".repeat(70)); console.log(); // Example: Request with compression enabled and large input -console.log("Example: Large context with compression enabled"); +console.log("Example: Large user message with compression enabled"); console.log("-".repeat(70)); console.log(`Input context length: ${LARGE_CONTEXT.length} characters`); console.log(); +// NOTE: Only USER messages are compressed +// Put the large context in the user message to demonstrate compression +const userMessage = `Here is some context about AI: + +${LARGE_CONTEXT} + +Based on this context, summarize the key milestones in AI development in 3 bullet points.`; + const response = await edgee.send({ model: "gpt-4o", input: { - messages: [ - { role: "system", content: LARGE_CONTEXT }, - { - role: "user", - content: - "Based on the context above, summarize the key milestones in AI development in 3 bullet points.", - }, - ], + messages: [{ role: "user", content: userMessage }], enable_compression: true, compression_rate: 0.5, },