diff --git a/features/token-compression.mdx b/features/token-compression.mdx index 9eeb1f9..52c0b18 100644 --- a/features/token-compression.mdx +++ b/features/token-compression.mdx @@ -40,6 +40,32 @@ Token compression happens automatically on every request through a four-step pro Compression is most effective for prompts with repeated context (RAG), long system instructions, or verbose multi-turn histories. Simple queries may see minimal compression. +## Understanding compression ratio + +The **compression ratio** (sometimes called *compression rate* in APIs) is **compressed size ÷ original size**: how large the compressed prompt is relative to the original. + +- **0.9** (Light) = compressed prompt is 90% of the original length → **~10% fewer tokens** +- **0.7** (Strong) = compressed prompt is 70% of the original → **~30% fewer tokens** (more aggressive) + +In the console you choose **Light (0.9)**, **Medium (0.8)**, or **Strong (0.7)**. The compressor aims for that ratio; the actual ratio per request may vary. Strong (0.7) asks for more compression; Light (0.9) is more conservative and keeps more of the original text. + + + **Ratio vs reduction:** Ratio = compressed/original (e.g. 0.75). Reduction = 1 − ratio (e.g. 25%). When we say "50% reduction," that corresponds to a ratio of 0.50. + + +## Semantic preservation and BERT score + +To avoid changing the meaning of the prompt, we compare the compressed text to the original using **BERT score** (F1). It measures how semantically similar the two texts are on a scale of 0–1 (0%–100%). + +- **Semantic preservation threshold** (0–100%) is the *minimum* similarity we require. If the BERT score is **below** this threshold, we **do not** use the compressed prompt—we send the original instead, so quality is preserved. +- In the console you choose **Off** (no check), **Ultra Safe (0.95)**, **Safe (0.85)**, or **Edgy (0.75)**. Off = we always use the compressed prompt when compression runs; higher values = we only use the compressed prompt when it is very similar to the original; otherwise we fall back to the original. + +This way you can allow aggressive compression (low ratio) while still guaranteeing that we never send a compressed prompt that is too different from what the user wrote. + + + In the Activity table, when we fell back to the original prompt because the similarity was below the threshold, the input token count is shown in red with a tooltip: "Didn't match the semantic threshold – original prompt was used." + + ## Enabling Token Compression Token compression can be enabled in three ways, giving you flexibility to control compression at the request, API key, or organization level: @@ -58,7 +84,7 @@ Enable compression for specific requests using the SDK: {"role": "user", "content": "Your prompt here"} ], "enable_compression": true, - "compression_rate": 0.8 // Target 80% compression (optional) + "compression_rate": 0.8 // Target ratio: compressed = 80% of original (optional) } }); ``` @@ -73,7 +99,7 @@ Enable compression for specific requests using the SDK: {"role": "user", "content": "Your prompt here"} ], "enable_compression": True, - "compression_rate": 0.8 # Target 80% compression (optional) + "compression_rate": 0.8 # Target ratio: compressed = 80% of original (optional) } ) ``` @@ -86,7 +112,7 @@ Enable compression for specific requests using the SDK: {Role: "user", Content: "Your prompt here"}, }, EnableCompression: true, - CompressionRate: 0.8, // Target 80% compression (optional) + CompressionRate: 0.8, // Target ratio: compressed = 80% of original (optional) }) ``` @@ -95,7 +121,7 @@ Enable compression for specific requests using the SDK: ```rust let input = InputObject::new(vec![Message::user("Your prompt here")]) .with_compression(true) - .with_compression_rate(0.8); // Target 80% compression (optional) + .with_compression_rate(0.8); // Target ratio: compressed = 80% of original (optional) let response = client.send("gpt-4o", input).await?; ``` @@ -111,11 +137,12 @@ Enable compression for specific API keys in your organization settings. This is Enable compression for specific API keys -In the **Tools** section of your console: +In the **Edge Models** section of your console: 1. Toggle **Enable token compression** on -2. Set your target **Compression rate** (0.7-0.9, default 0.75) -3. Under **Scope**, select **Apply to specific API keys** -4. Choose which API keys should use compression +2. Set **Compression** to **Light (0.9)**, **Medium (0.8)**, or **Strong (0.7)** — see [Understanding compression ratio](#understanding-compression-ratio) +3. Set **Semantic preservation threshold** to **Off**, **Ultra Safe (0.95)**, **Safe (0.85)**, or **Edgy (0.75)** — see [Semantic preservation and BERT score](#semantic-preservation-and-bert-score) +4. Under **Scope**, select **Apply to specific API keys** +5. Choose which API keys should use compression ### 3. Organization-Wide (Console) @@ -126,14 +153,15 @@ Enable compression for all requests across your entire organization. This is the Enable compression organization-wide -In the **Tools** section of your console: +In the **Edge Models** section of your console: 1. Toggle **Enable token compression** on -2. Set your target **Compression rate** (0.7-0.9, default 0.75) -3. Under **Scope**, select **Apply to all org requests** -4. All API keys will now use compression by default +2. Set **Compression** to **Light (0.9)**, **Medium (0.8)**, or **Strong (0.7)** +3. Set **Semantic preservation threshold** to **Off**, **Ultra Safe (0.95)**, **Safe (0.85)**, or **Edgy (0.75)** +4. Under **Scope**, select **Apply to all org requests** +5. All API keys will now use compression by default - **Compression rate** controls how aggressively Edgee compresses prompts. A higher rate (e.g., 0.9) attempts more compression but may be less effective, while a lower rate (e.g., 0.7) is more conservative. The default of 0.75 provides a good balance for most use cases. + **Compression** controls how aggressively Edgee compresses prompts: **Strong (0.7)** aims for more compression; **Light (0.9)** is more conservative. **Medium (0.8)** is the default. See [Understanding compression ratio](#understanding-compression-ratio). @@ -190,7 +218,7 @@ const response = await edgee.send({ model: 'gpt-4o', input: `Answer the question based on these documents:\n\n${documents.join('\n\n')}\n\nQuestion: What is the main topic?`, enable_compression: true, // Enable compression for this request - compression_rate: 0.8, // Target compression ratio (0-1, e.g., 0.8 = 80%) + compression_rate: 0.8, // Target ratio (0-1): 0.8 = compressed is 80% of original }); console.log(response.text); @@ -200,7 +228,7 @@ if (response.compression) { console.log(`Original tokens: ${response.compression.input_tokens}`); console.log(`Compressed tokens: ${response.usage.prompt_tokens}`); console.log(`Tokens saved: ${response.compression.saved_tokens}`); - console.log(`Compression rate: ${(response.compression.rate * 100).toFixed(1)}%`); + console.log(`Compression ratio: ${(response.compression.rate * 100).toFixed(1)}% (compressed/original)`); } ``` @@ -272,7 +300,7 @@ response.usage.total_tokens // Total for billing calculation // Compression information (when applied) response.compression.input_tokens // Original token count (before compression) response.compression.saved_tokens // Tokens saved by compression -response.compression.rate // Compression rate (0-1, e.g., 0.61 = 61%) +response.compression.rate // Compression ratio (0-1, e.g., 0.61 = compressed is 61% of original) ``` Use these fields to: diff --git a/images/compression-enabled-by-tag-dark.png b/images/compression-enabled-by-tag-dark.png index 5687134..71c5485 100644 Binary files a/images/compression-enabled-by-tag-dark.png and b/images/compression-enabled-by-tag-dark.png differ diff --git a/images/compression-enabled-by-tag-light.png b/images/compression-enabled-by-tag-light.png index 3ed6f53..f300b3d 100644 Binary files a/images/compression-enabled-by-tag-light.png and b/images/compression-enabled-by-tag-light.png differ diff --git a/images/compression-enabled-org-dark.png b/images/compression-enabled-org-dark.png index 957011f..20eb15a 100644 Binary files a/images/compression-enabled-org-dark.png and b/images/compression-enabled-org-dark.png differ diff --git a/images/compression-enabled-org-light.png b/images/compression-enabled-org-light.png index 7a1e36a..3d5ee27 100644 Binary files a/images/compression-enabled-org-light.png and b/images/compression-enabled-org-light.png differ diff --git a/package-lock.json b/package-lock.json index 70f5c9b..8c577ba 100644 --- a/package-lock.json +++ b/package-lock.json @@ -5,7 +5,7 @@ "packages": { "": { "dependencies": { - "mintlify": "^4.2.334" + "mintlify": "^4.2.336" } }, "node_modules/@alcalzone/ansi-tokenize": { @@ -982,18 +982,18 @@ } }, "node_modules/@mintlify/cli": { - "version": "4.0.938", - "resolved": "https://registry.npmjs.org/@mintlify/cli/-/cli-4.0.938.tgz", - "integrity": "sha512-3535UYtPNQ0W/sjr1/ZKwvxBA4bRLp5OKZsmUx6YFe1hmVdI4UHV3ywVC2gvUjOAmp7d9F1G6Slu5hxkKGJP1w==", + "version": "4.0.940", + "resolved": "https://registry.npmjs.org/@mintlify/cli/-/cli-4.0.940.tgz", + "integrity": "sha512-8WNRfIde+PdL+TgesAmmpd1W8eLXgePEr4ioKw1R5tYbEtzOO8OBJ8GoNhV7Y2g7k8VZV7+1H562RG20PQYkew==", "license": "Elastic-2.0", "dependencies": { "@inquirer/prompts": "7.9.0", - "@mintlify/common": "1.0.715", - "@mintlify/link-rot": "3.0.875", + "@mintlify/common": "1.0.717", + "@mintlify/link-rot": "3.0.877", "@mintlify/models": "0.0.269", - "@mintlify/prebuild": "1.0.851", - "@mintlify/previewing": "4.0.908", - "@mintlify/validation": "0.1.586", + "@mintlify/prebuild": "1.0.853", + "@mintlify/previewing": "4.0.910", + "@mintlify/validation": "0.1.587", "adm-zip": "0.5.16", "chalk": "5.2.0", "color": "4.2.3", @@ -1018,9 +1018,9 @@ } }, "node_modules/@mintlify/common": { - "version": "1.0.715", - "resolved": "https://registry.npmjs.org/@mintlify/common/-/common-1.0.715.tgz", - "integrity": "sha512-23atHBnjre0pXNPJmr/bEz7ZWkTtQplefXG+BsKraFWDThBBKzyWhZAIDY/Z1cr/Jeu/sTL7G5UQ67Wj6r5LIw==", + "version": "1.0.717", + "resolved": "https://registry.npmjs.org/@mintlify/common/-/common-1.0.717.tgz", + "integrity": "sha512-wk/8/1/RLpfAv9EPjwtbEznZiXbryn9rhvzPUxHXeHsTMTdva0QrKo1dB6BL9bI9CaxnYYl7YFdNHUNSOHE+xw==", "license": "ISC", "dependencies": { "@asyncapi/parser": "3.4.0", @@ -1028,7 +1028,7 @@ "@mintlify/mdx": "^3.0.4", "@mintlify/models": "0.0.269", "@mintlify/openapi-parser": "^0.0.8", - "@mintlify/validation": "0.1.586", + "@mintlify/validation": "0.1.587", "@sindresorhus/slugify": "2.2.0", "@types/mdast": "4.0.4", "acorn": "8.11.2", @@ -1457,16 +1457,16 @@ } }, "node_modules/@mintlify/link-rot": { - "version": "3.0.875", - "resolved": "https://registry.npmjs.org/@mintlify/link-rot/-/link-rot-3.0.875.tgz", - "integrity": "sha512-38QHad7n0YsAOW0HFpX1yzo+sK43iqaUe0Mf7h2F9NalG/pfmAcl4R0rP9x9/DSkC994RbV8ZuvWbgPiKNBXCw==", + "version": "3.0.877", + "resolved": "https://registry.npmjs.org/@mintlify/link-rot/-/link-rot-3.0.877.tgz", + "integrity": "sha512-geo4aNmtCjP7rsHPj6sO+EbVw8UM2P7aOyHBsttcJm+zsNW1PbyewwH8LaAJGQfwqgY/RYjQAS64rHJO26V6ZQ==", "license": "Elastic-2.0", "dependencies": { - "@mintlify/common": "1.0.715", - "@mintlify/prebuild": "1.0.851", - "@mintlify/previewing": "4.0.908", + "@mintlify/common": "1.0.717", + "@mintlify/prebuild": "1.0.853", + "@mintlify/previewing": "4.0.910", "@mintlify/scraping": "4.0.522", - "@mintlify/validation": "0.1.586", + "@mintlify/validation": "0.1.587", "fs-extra": "11.1.0", "unist-util-visit": "4.1.2" }, @@ -1584,15 +1584,15 @@ } }, "node_modules/@mintlify/prebuild": { - "version": "1.0.851", - "resolved": "https://registry.npmjs.org/@mintlify/prebuild/-/prebuild-1.0.851.tgz", - "integrity": "sha512-bSrgZiQRduxwjmHF/u9V6ROPdG9Qbc/YVDrgqC/dZyT8F+V/NIhzUVkOfkSBT4BHg9B/IoQXuBOL0KjayZR2MA==", + "version": "1.0.853", + "resolved": "https://registry.npmjs.org/@mintlify/prebuild/-/prebuild-1.0.853.tgz", + "integrity": "sha512-WpY8gUi4ALYagFWgQfbIaeS+9WhFPy0D2mjbCjdi3l2KL39Fynz7pOEsGCNdNX0oW4JwFHkC2hF7U4FyKM/RXA==", "license": "Elastic-2.0", "dependencies": { - "@mintlify/common": "1.0.715", + "@mintlify/common": "1.0.717", "@mintlify/openapi-parser": "^0.0.8", - "@mintlify/scraping": "4.0.576", - "@mintlify/validation": "0.1.586", + "@mintlify/scraping": "4.0.578", + "@mintlify/validation": "0.1.587", "chalk": "5.3.0", "favicons": "7.2.0", "front-matter": "4.0.2", @@ -1606,12 +1606,12 @@ } }, "node_modules/@mintlify/prebuild/node_modules/@mintlify/scraping": { - "version": "4.0.576", - "resolved": "https://registry.npmjs.org/@mintlify/scraping/-/scraping-4.0.576.tgz", - "integrity": "sha512-5epw+n1DPiFmk7mw3Nmh8lrc/pClXD5fFKdkpEfJSggbFFY4v5tz935RqUTTwHTvelb+VZCaYN6tai2nZBFezQ==", + "version": "4.0.578", + "resolved": "https://registry.npmjs.org/@mintlify/scraping/-/scraping-4.0.578.tgz", + "integrity": "sha512-Rk0OC9MRhJ6M5uIRn7F62zs1oF41d/aVuGc8+/Ez4PeGIQ1mtyIVE9UIbjjYTWdmjTNUYNEimKLhPP8LdFU12Q==", "license": "Elastic-2.0", "dependencies": { - "@mintlify/common": "1.0.715", + "@mintlify/common": "1.0.717", "@mintlify/openapi-parser": "^0.0.8", "fs-extra": "11.1.1", "hast-util-to-mdast": "10.1.0", @@ -1787,14 +1787,14 @@ } }, "node_modules/@mintlify/previewing": { - "version": "4.0.908", - "resolved": "https://registry.npmjs.org/@mintlify/previewing/-/previewing-4.0.908.tgz", - "integrity": "sha512-JjJFHXgBi1E6elKqq5tzqpUd3sAdRhLZEfY/2hjEpzPcSCeUjf6biCNfTzi/+Qi/0nktthB4BIFY5MkXswdJyw==", + "version": "4.0.910", + "resolved": "https://registry.npmjs.org/@mintlify/previewing/-/previewing-4.0.910.tgz", + "integrity": "sha512-S7NQKRSSSSsv0W06ryW0l5jkdUwGntbvHuFuFB3oybNRrC7cQwRFuuFZ7CKONI+2SkfpZ7ZyiwSHLZfEW5nBdw==", "license": "Elastic-2.0", "dependencies": { - "@mintlify/common": "1.0.715", - "@mintlify/prebuild": "1.0.851", - "@mintlify/validation": "0.1.586", + "@mintlify/common": "1.0.717", + "@mintlify/prebuild": "1.0.853", + "@mintlify/validation": "0.1.587", "better-opn": "3.0.2", "chalk": "5.2.0", "chokidar": "3.5.3", @@ -2434,9 +2434,9 @@ } }, "node_modules/@mintlify/validation": { - "version": "0.1.586", - "resolved": "https://registry.npmjs.org/@mintlify/validation/-/validation-0.1.586.tgz", - "integrity": "sha512-db8axJVnysJfSk16vSBMWKp8+Vareox1U6qxX8JoQRrONrgIHgkCS0wlo0LQwEdBkK8S2mX5xhVaLpktM3hm5g==", + "version": "0.1.587", + "resolved": "https://registry.npmjs.org/@mintlify/validation/-/validation-0.1.587.tgz", + "integrity": "sha512-N8dJqjEOAxvb9lh+byJBLJxixWx3fL/StgnHbzUmOUUb4mSgG6ioMNkOwVdaIu9hxw9qW40gW+31Dy4YogeGgA==", "license": "Elastic-2.0", "dependencies": { "@mintlify/mdx": "^3.0.4", @@ -9558,12 +9558,12 @@ } }, "node_modules/mintlify": { - "version": "4.2.334", - "resolved": "https://registry.npmjs.org/mintlify/-/mintlify-4.2.334.tgz", - "integrity": "sha512-Qly17PezvCGxPXb7cvtZ4LK5pSjoQ1iOK0ylheYT/EZ/puQGQkehQGG7wALUnGMIDPXo640gzYJsFocd21ECkg==", + "version": "4.2.336", + "resolved": "https://registry.npmjs.org/mintlify/-/mintlify-4.2.336.tgz", + "integrity": "sha512-HE/wmGkCLHaPkkR/0M+bsiuckkrmCS0bTOObtsp3HHTY/bmcWHlVmAyHSkdbfNxPiItZfmBW0JaQJtXjZqx3yA==", "license": "Elastic-2.0", "dependencies": { - "@mintlify/cli": "4.0.938" + "@mintlify/cli": "4.0.940" }, "bin": { "mint": "index.js", diff --git a/package.json b/package.json index a0a001a..0bb9291 100644 --- a/package.json +++ b/package.json @@ -4,6 +4,6 @@ "links": "mintlify broken-links" }, "dependencies": { - "mintlify": "^4.2.334" + "mintlify": "^4.2.336" } }