From 1eaf20cc255530c1c1bcde47973c7118a5cf58c5 Mon Sep 17 00:00:00 2001
From: Clement Bouvet <clement@cbouvet.fr>
Date: Tue, 3 Feb 2026 17:56:43 +0100
Subject: [PATCH 1/7] feat: add optional compression fields to completion
 requests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add support for compression configuration with optional fields that can be
passed per request to override API key settings.

- Add optional `enable_compression?: boolean` field to `InputObject` interface
- Add optional `compression_rate?: number` field to `InputObject` interface
- Include compression fields in request body for both `send()` and `stream()` methods
- Compression fields are omitted from request when not provided
- Fields are gateway-internal and not sent to providers

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 src/index.ts | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/index.ts b/src/index.ts
index dc060e8..15b9134 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -43,6 +43,8 @@ export interface InputObject {
   tools?: Tool[];
   tool_choice?: ToolChoice;
   tags?: string[];
+  enable_compression?: boolean; // Enable token compression (gateway-internal, not sent to providers)
+  compression_rate?: number; // Compression rate 0.0-1.0 (gateway-internal, not sent to providers)
 }
 
 export interface SendOptions {
@@ -187,6 +189,8 @@ export default class Edgee {
       if (input.tools) body.tools = input.tools;
       if (input.tool_choice) body.tool_choice = input.tool_choice;
       if (input.tags) body.tags = input.tags;
+      if (input.enable_compression !== undefined) body.enable_compression = input.enable_compression;
+      if (input.compression_rate !== undefined) body.compression_rate = input.compression_rate;
     }
 
     const res = await fetch(`${this.baseUrl}/v1/chat/completions`, {
@@ -288,6 +292,8 @@ export default class Edgee {
       if (input.tools) body.tools = input.tools;
       if (input.tool_choice) body.tool_choice = input.tool_choice;
       if (input.tags) body.tags = input.tags;
+      if (input.enable_compression !== undefined) body.enable_compression = input.enable_compression;
+      if (input.compression_rate !== undefined) body.compression_rate = input.compression_rate;
     }
 
     yield* this._handleStreamingResponse(

From d25e3c5d89486a527c08358ab5323ebf564510e0 Mon Sep 17 00:00:00 2001
From: Clement Bouvet <clement@cbouvet.fr>
Date: Wed, 4 Feb 2026 15:49:13 +0100
Subject: [PATCH 2/7] feat: add compression response field to SendResponse

- Add compression interface with input_tokens, saved_tokens, and rate fields
- Add optional compression field to SendResponse for root-level compression data
- Update SendResponse constructor to accept compression parameter
---
 src/index.ts | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/src/index.ts b/src/index.ts
index 15b9134..ee44504 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -69,6 +69,11 @@ export class SendResponse {
     completion_tokens: number;
     total_tokens: number;
   };
+  compression?: {
+    input_tokens: number;
+    saved_tokens: number;
+    rate: number;
+  };
 
   constructor(
     choices: Choice[],
@@ -76,10 +81,16 @@ export class SendResponse {
       prompt_tokens: number;
       completion_tokens: number;
       total_tokens: number;
+    },
+    compression?: {
+      input_tokens: number;
+      saved_tokens: number;
+      rate: number;
     }
   ) {
     this.choices = choices;
     this.usage = usage;
+    this.compression = compression;
   }
 
   get text(): string | null {
@@ -213,10 +224,15 @@ export default class Edgee {
         prompt_tokens: number;
         completion_tokens: number;
         total_tokens: number;
-      }
+      };
+      compression?: {
+        input_tokens: number;
+        saved_tokens: number;
+        rate: number;
+      };
     };
 
-    return new SendResponse(data.choices, data.usage);
+    return new SendResponse(data.choices, data.usage, data.compression);
   }
 
   private async *_handleStreamingResponse(

From e76dd98d0a2c187b2e699b40db63c561daa33091 Mon Sep 17 00:00:00 2001
From: Clement Bouvet <clement@cbouvet.fr>
Date: Wed, 4 Feb 2026 16:04:30 +0100
Subject: [PATCH 3/7] test: add compression response field tests

- Add test for response with compression field
- Add test for response without compression field
---
 tests/index.test.ts | 67 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 67 insertions(+)

diff --git a/tests/index.test.ts b/tests/index.test.ts
index 3b1e646..6ed19d9 100644
--- a/tests/index.test.ts
+++ b/tests/index.test.ts
@@ -491,6 +491,73 @@ describe('Edgee', () => {
       );
     });
 
+    it('should handle response with compression field', async () => {
+      const mockResponse: SendResponse = {
+        choices: [
+          {
+            index: 0,
+            message: {
+              role: 'assistant',
+              content: 'Response',
+            },
+            finish_reason: 'stop',
+          },
+        ],
+        usage: {
+          prompt_tokens: 100,
+          completion_tokens: 50,
+          total_tokens: 150,
+        },
+        compression: {
+          input_tokens: 100,
+          saved_tokens: 42,
+          rate: 0.6102003642987249,
+        },
+      };
+
+      mockFetch.mockResolvedValueOnce({
+        ok: true,
+        json: async () => mockResponse,
+      });
+
+      const result = await client.send({
+        model: 'gpt-4',
+        input: 'Test',
+      });
+
+      expect(result.compression).toBeDefined();
+      expect(result.compression?.input_tokens).toBe(100);
+      expect(result.compression?.saved_tokens).toBe(42);
+      expect(result.compression?.rate).toBe(0.6102003642987249);
+    });
+
+    it('should handle response without compression field', async () => {
+      const mockResponse: SendResponse = {
+        choices: [
+          {
+            index: 0,
+            message: {
+              role: 'assistant',
+              content: 'Response',
+            },
+            finish_reason: 'stop',
+          },
+        ],
+      };
+
+      mockFetch.mockResolvedValueOnce({
+        ok: true,
+        json: async () => mockResponse,
+      });
+
+      const result = await client.send({
+        model: 'gpt-4',
+        input: 'Test',
+      });
+
+      expect(result.compression).toBeUndefined();
+    });
+
     it('should throw error when API returns non-OK status', async () => {
       mockFetch.mockResolvedValueOnce({
         ok: false,

From 7c7edf32f1c6c7178a62c3ecb073db04c0842511 Mon Sep 17 00:00:00 2001
From: Clement Bouvet <clement@cbouvet.fr>
Date: Wed, 4 Feb 2026 16:15:45 +0100
Subject: [PATCH 4/7] docs: add compression field documentation to README

- Add example showing how to access compression data in responses
- Add compression info to features list
---
 README.md | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/README.md b/README.md
index 709dca6..cf0e664 100644
--- a/README.md
+++ b/README.md
@@ -42,6 +42,17 @@ const response = await edgee.send({
 console.log(response.text);           // Text content
 console.log(response.finishReason);   // Finish reason
 console.log(response.toolCalls);      // Tool calls (if any)
+
+// Access usage and compression info
+if (response.usage) {
+  console.log(`Tokens used: ${response.usage.total_tokens}`);
+}
+
+if (response.compression) {
+  console.log(`Input tokens: ${response.compression.input_tokens}`);
+  console.log(`Saved tokens: ${response.compression.saved_tokens}`);
+  console.log(`Compression rate: ${response.compression.rate}`);
+}
 ```
 
 ## Stream Method
@@ -67,6 +78,7 @@ for await (const chunk of edgee.stream('gpt-4o', 'Tell me a story')) {
 - ✅ **Streaming** - Real-time response streaming
 - ✅ **Tool calling** - Full support for function calling
 - ✅ **Flexible input** - Accept strings or structured objects
+- ✅ **Compression info** - Access token compression metrics in responses
 - ✅ **Zero dependencies** - Lightweight and fast
 
 ## Documentation

From 8b92686d525c9cbf7bda4af8c73d4a6f40644aaf Mon Sep 17 00:00:00 2001
From: Clement Bouvet <clement@cbouvet.fr>
Date: Wed, 4 Feb 2026 16:23:15 +0100
Subject: [PATCH 5/7] docs: add compression example

- Add example showing how to enable compression and set compression rate
- Demonstrate accessing compression metrics from response
- Show usage information alongside compression data
---
 example/compression.ts | 64 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)
 create mode 100644 example/compression.ts

diff --git a/example/compression.ts b/example/compression.ts
new file mode 100644
index 0000000..778e547
--- /dev/null
+++ b/example/compression.ts
@@ -0,0 +1,64 @@
+/**
+ * Example: Token compression with Edgee Gateway SDK
+ *
+ * This example demonstrates how to:
+ * 1. Enable compression for a request
+ * 2. Set a custom compression rate
+ * 3. Access compression metrics from the response
+ */
+
+import Edgee from "edgee";
+
+const edgee = new Edgee(process.env.EDGEE_API_KEY);
+
+console.log("=".repeat(70));
+console.log("Edgee Token Compression Example");
+console.log("=".repeat(70));
+console.log();
+
+// Example: Request with compression enabled
+console.log("Example: Request with compression enabled");
+console.log("-".repeat(70));
+
+const response = await edgee.send({
+  model: "gpt-4o",
+  input: {
+    messages: [
+      { role: "user", content: "Explain quantum computing in simple terms." },
+    ],
+    enable_compression: true,
+    compression_rate: 0.5,
+  },
+});
+
+console.log(`Response: ${response.text}`);
+console.log();
+
+// Display usage information
+if (response.usage) {
+  console.log("Token Usage:");
+  console.log(`  Prompt tokens:     ${response.usage.prompt_tokens}`);
+  console.log(`  Completion tokens: ${response.usage.completion_tokens}`);
+  console.log(`  Total tokens:      ${response.usage.total_tokens}`);
+  console.log();
+}
+
+// Display compression information
+if (response.compression) {
+  console.log("Compression Metrics:");
+  console.log(`  Input tokens:  ${response.compression.input_tokens}`);
+  console.log(`  Saved tokens:  ${response.compression.saved_tokens}`);
+  console.log(
+    `  Compression rate: ${(response.compression.rate * 100).toFixed(2)}%`
+  );
+  console.log(
+    `  Token savings: ${response.compression.saved_tokens} tokens saved!`
+  );
+} else {
+  console.log("No compression data available in response.");
+  console.log("Note: Compression data is only returned when compression is enabled");
+  console.log("      and supported by your API key configuration.");
+}
+
+console.log();
+console.log("=".repeat(70));

From ca2dd2a03e5cfcbe881c9a80622c76c9a109ab03 Mon Sep 17 00:00:00 2001
From: Clement Bouvet <clement@cbouvet.fr>
Date: Wed, 4 Feb 2026 16:26:22 +0100
Subject: [PATCH 6/7] docs: update compression example with larger input
 context

- Add substantial AI history document as context (~3000+ chars)
- Demonstrate meaningful compression on large input
- Show percentage of tokens saved
- Explain that compression works on input tokens
---
 example/compression.ts | 83 +++++++++++++++++++++++++++++++++++++++---
 1 file changed, 77 insertions(+), 6 deletions(-)

diff --git a/example/compression.ts b/example/compression.ts
index 778e547..51d5c8e 100644
--- a/example/compression.ts
+++ b/example/compression.ts
@@ -2,29 +2,85 @@
  * Example: Token compression with Edgee Gateway SDK
  *
  * This example demonstrates how to:
- * 1. Enable compression for a request
+ * 1. Enable compression for a request with a large input context
  * 2. Set a custom compression rate
  * 3. Access compression metrics from the response
+ *
+ * Note: Compression works on INPUT tokens, so this example includes a large
+ * context document to demonstrate meaningful compression savings.
  */
 
 import Edgee from "edgee";
 
 const edgee = new Edgee(process.env.EDGEE_API_KEY);
 
+// Large context document to demonstrate input compression
+const LARGE_CONTEXT = `
+The History and Impact of Artificial Intelligence
+
+Artificial intelligence (AI) has evolved from a theoretical concept to a 
+transformative technology that influences nearly every aspect of modern life. 
+The field began in earnest in the 1950s when pioneers like Alan Turing and 
+John McCarthy laid the groundwork for machine intelligence.
+
+Early developments focused on symbolic reasoning and expert systems. These 
+rule-based approaches dominated the field through the 1970s and 1980s, with 
+systems like MYCIN demonstrating practical applications in medical diagnosis. 
+However, these early systems were limited by their inability to learn from data 
+and adapt to new situations.
+
+The resurgence of neural networks in the 1980s and 1990s, particularly with 
+backpropagation algorithms, opened new possibilities. Yet it wasn't until the 
+2010s, with the advent of deep learning and the availability of massive datasets 
+and computational power, that AI truly began to revolutionize industries.
+
+Modern AI applications span numerous domains:
+- Natural language processing enables machines to understand and generate human language
+- Computer vision allows machines to interpret visual information from the world
+- Robotics combines AI with mechanical systems for autonomous operation
+- Healthcare uses AI for diagnosis, drug discovery, and personalized treatment
+- Finance leverages AI for fraud detection, algorithmic trading, and risk assessment
+- Transportation is being transformed by autonomous vehicles and traffic optimization
+
+The development of large language models like GPT, BERT, and others has 
+particularly accelerated progress in natural language understanding and generation. 
+These models, trained on vast amounts of text data, can perform a wide range of 
+language tasks with remarkable proficiency.
+
+Despite remarkable progress, significant challenges remain. Issues of bias, 
+interpretability, safety, and ethical considerations continue to be areas of 
+active research and debate. The AI community is working to ensure that these 
+powerful technologies are developed and deployed responsibly, with consideration 
+for their societal impact.
+
+Looking forward, AI is expected to continue advancing rapidly, with potential 
+breakthroughs in areas like artificial general intelligence, quantum machine 
+learning, and brain-computer interfaces. The integration of AI into daily life 
+will likely deepen, raising important questions about human-AI collaboration, 
+workforce transformation, and the future of human cognition itself.
+`;
+
 console.log("=".repeat(70));
 console.log("Edgee Token Compression Example");
 console.log("=".repeat(70));
 console.log();
 
-// Example: Request with compression enabled
-console.log("Example: Request with compression enabled");
+// Example: Request with compression enabled and large input
+console.log("Example: Large context with compression enabled");
 console.log("-".repeat(70));
+console.log(`Input context length: ${LARGE_CONTEXT.length} characters`);
+console.log();
 
 const response = await edgee.send({
   model: "gpt-4o",
   input: {
     messages: [
-      { role: "user", content: "Explain quantum computing in simple terms." },
+      { role: "system", content: LARGE_CONTEXT },
+      {
+        role: "user",
+        content:
+          "Based on the context above, summarize the key milestones in AI development in 3 bullet points.",
+      },
     ],
     enable_compression: true,
     compression_rate: 0.5,
@@ -51,14 +107,29 @@ if (response.compression) {
   console.log(
     `  Compression rate: ${(response.compression.rate * 100).toFixed(2)}%`
   );
+  const savingsPct =
+    response.compression.input_tokens > 0
+      ? (response.compression.saved_tokens /
+          response.compression.input_tokens) *
+        100
+      : 0;
+  console.log(`  Savings: ${savingsPct.toFixed(1)}% of input tokens saved!`);
+  console.log();
+  console.log(`  💡 Without compression, this request would have used`);
+  console.log(`     ${response.compression.input_tokens} input tokens.`);
   console.log(
-    `  Token savings: ${response.compression.saved_tokens} tokens saved!`
+    `     With compression, only ${
+      response.compression.input_tokens - response.compression.saved_tokens
+    } tokens were processed!`
   );
 } else {
   console.log("No compression data available in response.");
-  console.log("Note: Compression data is only returned when compression is enabled");
+  console.log(
+    "Note: Compression data is only returned when compression is enabled"
+  );
   console.log("      and supported by your API key configuration.");
 }
 
 console.log();
 console.log("=".repeat(70));
+

From 118e4595efe9b2adf5e4a3e227e49017d523c70d Mon Sep 17 00:00:00 2001
From: Clement Bouvet <clement@cbouvet.fr>
Date: Wed, 4 Feb 2026 16:35:13 +0100
Subject: [PATCH 7/7] fix: move large context to user message in compression
 example

- Only USER messages are compressed, not system messages
- Update example to put context in user message
- Add clarifying comment about compression behavior
---
 example/compression.ts | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/example/compression.ts b/example/compression.ts
index 51d5c8e..1e1af71 100644
--- a/example/compression.ts
+++ b/example/compression.ts
@@ -6,8 +6,9 @@
  * 2. Set a custom compression rate
  * 3. Access compression metrics from the response
  *
- * Note: Compression works on INPUT tokens, so this example includes a large
- * context document to demonstrate meaningful compression savings.
+ * IMPORTANT: Only USER messages are compressed. System messages are not compressed.
+ * This example includes a large context in the user message to demonstrate meaningful
+ * compression savings.
  */
 
 import Edgee from "edgee";
@@ -66,22 +67,23 @@ console.log("=".repeat(70));
 console.log();
 
 // Example: Request with compression enabled and large input
-console.log("Example: Large context with compression enabled");
+console.log("Example: Large user message with compression enabled");
 console.log("-".repeat(70));
 console.log(`Input context length: ${LARGE_CONTEXT.length} characters`);
 console.log();
 
+// NOTE: Only USER messages are compressed
+// Put the large context in the user message to demonstrate compression
+const userMessage = `Here is some context about AI:
+
+${LARGE_CONTEXT}
+
+Based on this context, summarize the key milestones in AI development in 3 bullet points.`;
+
 const response = await edgee.send({
   model: "gpt-4o",
   input: {
-    messages: [
-      { role: "system", content: LARGE_CONTEXT },
-      {
-        role: "user",
-        content:
-          "Based on the context above, summarize the key milestones in AI development in 3 bullet points.",
-      },
-    ],
+    messages: [{ role: "user", content: userMessage }],
     enable_compression: true,
     compression_rate: 0.5,
   },