diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 70e1e1257..158851022 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -89,6 +89,7 @@ jobs: deno-version: vx.x.x - name: E2E test - deno import from npm working-directory: e2e/deno - run: deno run --allow-net index.ts + run: deno run --allow-net --allow-env=HF_TOKEN index.ts env: NPM_CONFIG_REGISTRY: http://localhost:4874/ + HF_TOKEN: ${{ secrets.HF_TOKEN }} diff --git a/.vscode/settings.json b/.vscode/settings.json index 072ae9648..0d052137b 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -11,5 +11,7 @@ "json.format.enable": false, "[json]": { "editor.defaultFormatter": "esbenp.prettier-vscode" - } + }, + "cSpell.words": ["huggingface"], + "deno.enablePaths": ["./e2e/deno"] } diff --git a/README.md b/README.md index 2d8adecd2..7dae039d1 100644 --- a/README.md +++ b/README.md @@ -85,8 +85,8 @@ You can run our packages with vanilla JS, without any bundler, by using a CDN or ```html ``` diff --git a/e2e/deno/index.ts b/e2e/deno/index.ts index 92deb71b5..04cfab66a 100644 --- a/e2e/deno/index.ts +++ b/e2e/deno/index.ts @@ -1,7 +1,12 @@ import { HfInference } from "npm:@huggingface/inference@*"; import { whoAmI, listFiles } from "npm:@huggingface/hub@*"; -const hf = new HfInference(); +const token = Deno.env.get("HF_TOKEN"); + +if (!token) { + console.error("Please set the HF_TOKEN environment variable."); + Deno.exit(1); +} const info = await whoAmI({ credentials: { accessToken: "hf_hub.js" }, hubUrl: "https://hub-ci.huggingface.co" }); console.log(info); @@ -10,6 +15,11 @@ for await (const file of listFiles({ credentials: { accessToken: "hf_hub.js" }, console.log(file); } +const hf = new HfInference(token); + +const tokenInfo = await whoAmI({ credentials: { accessToken: token } }); +console.log(tokenInfo); + const sum = await hf.summarization({ model: "google/pegasus-xsum", inputs: diff --git a/e2e/ts/.gitignore b/e2e/ts/.gitignore new file mode 100644 index 000000000..483a9c42c --- /dev/null +++ b/e2e/ts/.gitignore @@ -0,0 +1 @@ +package-lock.json \ No newline at end of file diff --git a/packages/gguf/package.json b/packages/gguf/package.json index 5a047eae8..baf5acc18 100644 --- a/packages/gguf/package.json +++ b/packages/gguf/package.json @@ -1,7 +1,7 @@ { "name": "@huggingface/gguf", "packageManager": "pnpm@8.10.5", - "version": "0.0.10", + "version": "0.1.2", "description": "a GGUF parser that works on remotely hosted files", "repository": "https://github.com/huggingface/huggingface.js.git", "publishConfig": { @@ -47,5 +47,7 @@ ], "author": "Hugging Face", "license": "MIT", - "devDependencies": {} + "devDependencies": { + "type-fest": "^3.9.0" + } } diff --git a/packages/gguf/pnpm-lock.yaml b/packages/gguf/pnpm-lock.yaml index 2b9f1883a..c2f96863a 100644 --- a/packages/gguf/pnpm-lock.yaml +++ b/packages/gguf/pnpm-lock.yaml @@ -1,5 +1,17 @@ lockfileVersion: '6.0' +devDependencies: + type-fest: + specifier: ^3.9.0 + version: 3.13.1 + +packages: + + /type-fest@3.13.1: + resolution: {integrity: sha512-tLq3bSNx+xSpwvAJnzrK0Ep5CLNWjvFTOp71URMaAEWBfRb9nnJiBoUe0tF8bI4ZFO3omgBR6NvnbzVUT3Ly4g==} + engines: {node: '>=14.16'} + dev: true + settings: autoInstallPeers: true excludeLinksFromLockfile: false diff --git a/packages/gguf/src/gguf.spec.ts b/packages/gguf/src/gguf.spec.ts index 13da655ee..2e6f2c21a 100644 --- a/packages/gguf/src/gguf.spec.ts +++ b/packages/gguf/src/gguf.spec.ts @@ -1,5 +1,5 @@ import { describe, expect, it } from "vitest"; -import { GGMLQuantizationType, gguf } from "./gguf"; +import { GGMLQuantizationType, gguf, ggufAllShards, parseGgufShardFilename } from "./gguf"; const URL_LLAMA = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/191239b/llama-2-7b-chat.Q2_K.gguf"; const URL_MISTRAL_7B = @@ -9,6 +9,8 @@ const URL_BIG_ENDIAN = "https://huggingface.co/ggml-org/models/resolve/1213976/bert-bge-small/ggml-model-f16-big-endian.gguf"; const URL_V1 = "https://huggingface.co/tmadge/testing/resolve/66c078028d1ff92d7a9264a1590bc61ba6437933/tinyllamas-stories-260k-f32.gguf"; +const URL_SHARDED_GROK = + "https://huggingface.co/Arki05/Grok-1-GGUF/resolve/ecafa8d8eca9b8cd75d11a0d08d3a6199dc5a068/grok-1-IQ3_XS-split-00001-of-00009.gguf"; describe("gguf", () => { it("should parse a llama2 7b", async () => { @@ -220,4 +222,23 @@ describe("gguf", () => { dtype: GGMLQuantizationType.F32, }); }); + + it("should detect sharded gguf filename", async () => { + const ggufPath = "grok-1/grok-1-q4_0-00003-of-00009.gguf"; // https://huggingface.co/ggml-org/models/blob/fcf344adb9686474c70e74dd5e55465e9e6176ef/grok-1/grok-1-q4_0-00003-of-00009.gguf + const ggufShardFileInfo = parseGgufShardFilename(ggufPath); + + expect(ggufShardFileInfo?.prefix).toEqual("grok-1/grok-1-q4_0"); + expect(ggufShardFileInfo?.shard).toEqual("00003"); + expect(ggufShardFileInfo?.total).toEqual("00009"); + }); + + it("should get param count for llama2 7b", async () => { + const { parameterCount } = await gguf(URL_LLAMA, { computeParametersCount: true }); + expect(parameterCount).toEqual(6_738_415_616); // 7B + }); + + it("should get param count for sharded gguf", async () => { + const { parameterCount } = await ggufAllShards(URL_SHARDED_GROK); + expect(parameterCount).toEqual(316_490_127_360); // 316B + }); }); diff --git a/packages/gguf/src/gguf.ts b/packages/gguf/src/gguf.ts index 5cb46f7ae..6e1527bd6 100644 --- a/packages/gguf/src/gguf.ts +++ b/packages/gguf/src/gguf.ts @@ -1,10 +1,31 @@ import type { MetadataValue, Version, GGUFMetadata, GGUFTensorInfo, GGUFParseOutput } from "./types"; import { GGUFValueType } from "./types"; +import { promisesQueue } from "./utils/promisesQueue"; export type { MetadataBaseValue, MetadataValue, Version, GGUFMetadata, GGUFTensorInfo, GGUFParseOutput } from "./types"; export { GGUFValueType, GGMLQuantizationType } from "./types"; +export { GGUF_QUANT_DESCRIPTIONS } from "./quant-descriptions"; export const RE_GGUF_FILE = /\.gguf$/; +export const RE_GGUF_SHARD_FILE = /^(?.*?)-(?\d{5})-of-(?\d{5})\.gguf$/; + +export interface GgufShardFileInfo { + prefix: string; + shard: string; + total: string; +} + +export function parseGgufShardFilename(filename: string): GgufShardFileInfo | null { + const match = RE_GGUF_SHARD_FILE.exec(filename); + if (match && match.groups) { + return { + prefix: match.groups["prefix"], + shard: match.groups["shard"], + total: match.groups["total"], + }; + } + return null; +} const isVersion = (version: number): version is Version => version === 1 || version === 2 || version === 3; @@ -30,8 +51,11 @@ const HTTP_TOTAL_MAX_SIZE = 50 * 10 ** 6; /// 50MB class RangeView { private chunk: number; private buffer: ArrayBuffer; + private dataView: DataView; - readonly view: DataView; + get view(): DataView { + return this.dataView; + } constructor( public url: string, @@ -47,7 +71,7 @@ class RangeView { // eslint-disable-next-line @typescript-eslint/ban-ts-comment // @ts-ignore this.buffer = new ArrayBuffer(0, { maxByteLength: HTTP_TOTAL_MAX_SIZE }); - this.view = new DataView(this.buffer); + this.dataView = new DataView(this.buffer); } /** * Fetch a new chunk from the server @@ -63,18 +87,40 @@ class RangeView { }) ).arrayBuffer() ); + this.appendBuffer(buf); + this.chunk += 1; + } + /** + * Append new data into the buffer + */ + appendBuffer(buf: Uint8Array) { /// TODO(fix typing) // eslint-disable-next-line @typescript-eslint/ban-ts-comment // @ts-ignore - this.buffer.resize((this.chunk + 1) * HTTP_CHUNK_SIZE); - new Uint8Array(this.buffer).set(buf, this.chunk * HTTP_CHUNK_SIZE); - this.chunk += 1; + if (ArrayBuffer.prototype.resize) { + /// TODO(fix typing) + // eslint-disable-next-line @typescript-eslint/ban-ts-comment + // @ts-ignore + this.buffer.resize((this.chunk + 1) * HTTP_CHUNK_SIZE); + new Uint8Array(this.buffer).set(buf, this.chunk * HTTP_CHUNK_SIZE); + } else { + // If the browser does not support ArrayBuffer.resize, we fallback to this polyfill version + /// TODO(fix typing) + // eslint-disable-next-line @typescript-eslint/ban-ts-comment + // @ts-ignore + const newBuffer = new ArrayBuffer((this.chunk + 1) * HTTP_CHUNK_SIZE, { maxByteLength: HTTP_TOTAL_MAX_SIZE }); + const arrView = new Uint8Array(newBuffer); + arrView.set(new Uint8Array(this.buffer)); + arrView.set(buf, this.chunk * HTTP_CHUNK_SIZE); + this.buffer = newBuffer; + this.dataView = new DataView(this.buffer); + } } /** * Check whether we need to fetch a new chunk */ async fetchChunkIfNeeded(offset: number) { - if (this.view.byteLength - offset < HTTP_DATA_LEEWAY) { + if (this.dataView.byteLength - offset < HTTP_DATA_LEEWAY) { await this.fetchChunk(); } } @@ -156,6 +202,16 @@ function readMetadataValue( } } +export async function gguf( + url: string, + params: { + /** + * Custom fetch function to use instead of the default one, for example to use a proxy or edit headers. + */ + fetch?: typeof fetch; + computeParametersCount: true; + } +): Promise; export async function gguf( url: string, params?: { @@ -164,7 +220,17 @@ export async function gguf( */ fetch?: typeof fetch; } -): Promise { +): Promise; +export async function gguf( + url: string, + params?: { + /** + * Custom fetch function to use instead of the default one, for example to use a proxy or edit headers. + */ + fetch?: typeof fetch; + computeParametersCount?: boolean; + } +): Promise { const r = new RangeView(url, params); await r.fetchChunk(); @@ -273,5 +339,47 @@ export async function gguf( }); } - return { metadata, tensorInfos }; + if (params?.computeParametersCount) { + const parameterCount = tensorInfos + .map(({ shape }) => shape.reduce((acc, val) => acc * Number(val), 1)) + .reduce((acc, val) => acc + val, 0); + + return { metadata, tensorInfos, parameterCount }; + } else { + return { metadata, tensorInfos }; + } +} + +export async function ggufAllShards( + url: string, + params?: { + /** + * Custom fetch function to use instead of the default one, for example to use a proxy or edit headers. + */ + fetch?: typeof fetch; + } +): Promise<{ shards: GGUFParseOutput[]; parameterCount: number }> { + const ggufShardFileInfo = parseGgufShardFilename(url); + if (ggufShardFileInfo) { + const total = parseInt(ggufShardFileInfo.total); + const prefix = ggufShardFileInfo.prefix; + + const urls: string[] = []; + for (let shardIdx = 1; shardIdx <= total; shardIdx++) { + urls.push(`${prefix}-${shardIdx.toString().padStart(5, "0")}-of-${total.toString().padStart(5, "0")}.gguf`); + } + + const PARALLEL_DOWNLOADS = 20; + const shards = await promisesQueue( + urls.map((shardUrl) => () => gguf(shardUrl, { computeParametersCount: true })), + PARALLEL_DOWNLOADS + ); + return { + shards, + parameterCount: shards.map(({ parameterCount }) => parameterCount).reduce((acc, val) => acc + val, 0), + }; + } else { + const { metadata, tensorInfos, parameterCount } = await gguf(url, { ...params, computeParametersCount: true }); + return { shards: [{ metadata, tensorInfos }], parameterCount }; + } } diff --git a/packages/gguf/src/quant-descriptions.ts b/packages/gguf/src/quant-descriptions.ts new file mode 100644 index 000000000..8a0e93e35 --- /dev/null +++ b/packages/gguf/src/quant-descriptions.ts @@ -0,0 +1,99 @@ +import { GGMLQuantizationType } from "./types"; + +export const GGUF_QUANT_DESCRIPTIONS: Record = { + [GGMLQuantizationType.F32]: { + txt: "32-bit standard IEEE 754 single-precision floating-point number.", + src_url: "https://en.wikipedia.org/wiki/Single-precision_floating-point_format", + }, + [GGMLQuantizationType.F16]: { + txt: "16-bit standard IEEE 754 half-precision floating-point number.", + src_url: "https://en.wikipedia.org/wiki/Half-precision_floating-point_format", + }, + [GGMLQuantizationType.Q8_0]: { + txt: "8-bit round-to-nearest quantization (q). Each block has 32 weights. Weight formula: w = q * block_scale. Legacy quantization method (not used widely as of today).", + src_url: "https://github.com/huggingface/huggingface.js/pull/615#discussion_r1557654249", + }, + [GGMLQuantizationType.Q8_1]: { + txt: "8-bit round-to-nearest quantization (q). Each block has 32 weights. Weight formula: w = q * block_scale + block_minimum. Legacy quantization method (not used widely as of today).", + src_url: "https://github.com/huggingface/huggingface.js/pull/615#discussion_r1557682290", + }, + [GGMLQuantizationType.Q8_K]: { + txt: `8-bit quantization (q). Each block has 256 weights. Only used for quantizing intermediate results. All 2-6 bit dot products are implemented for this quantization type. Weight formula: w = q * block_scale.`, + src_url: "https://github.com/ggerganov/llama.cpp/pull/1684#issue-1739619305", + }, + [GGMLQuantizationType.Q6_K]: { + txt: `6-bit quantization (q). Super-blocks with 16 blocks, each block has 16 weights. Weight formula: w = q * block_scale(8-bit), resulting in 6.5625 bits-per-weight.`, + src_url: "https://github.com/ggerganov/llama.cpp/pull/1684#issue-1739619305", + }, + [GGMLQuantizationType.Q5_0]: { + txt: "5-bit round-to-nearest quantization (q). Each block has 32 weights. Weight formula: w = q * block_scale. Legacy quantization method (not used widely as of today).", + src_url: "https://github.com/huggingface/huggingface.js/pull/615#discussion_r1557654249", + }, + [GGMLQuantizationType.Q5_1]: { + txt: "5-bit round-to-nearest quantization (q). Each block has 32 weights. Weight formula: w = q * block_scale + block_minimum. Legacy quantization method (not used widely as of today).", + src_url: "https://github.com/huggingface/huggingface.js/pull/615#discussion_r1557682290", + }, + [GGMLQuantizationType.Q5_K]: { + txt: `5-bit quantization (q). Super-blocks with 8 blocks, each block has 32 weights. Weight formula: w = q * block_scale(6-bit) + block_min(6-bit), resulting in 5.5 bits-per-weight.`, + src_url: "https://github.com/ggerganov/llama.cpp/pull/1684#issue-1739619305", + }, + [GGMLQuantizationType.Q4_0]: { + txt: "4-bit round-to-nearest quantization (q). Each block has 32 weights. Weight formula: w = q * block_scale. Legacy quantization method (not used widely as of today).", + src_url: "https://github.com/huggingface/huggingface.js/pull/615#discussion_r1557654249", + }, + [GGMLQuantizationType.Q4_1]: { + txt: "4-bit round-to-nearest quantization (q). Each block has 32 weights. Weight formula: w = q * block_scale + block_minimum. Legacy quantization method (not used widely as of today).", + src_url: "https://github.com/huggingface/huggingface.js/pull/615#discussion_r1557682290", + }, + [GGMLQuantizationType.Q4_K]: { + txt: `4-bit quantization (q). Super-blocks with 8 blocks, each block has 32 weights. Weight formula: w = q * block_scale(6-bit) + block_min(6-bit), resulting in 4.5 bits-per-weight.`, + src_url: "https://github.com/ggerganov/llama.cpp/pull/1684#issue-1739619305", + }, + [GGMLQuantizationType.Q3_K]: { + txt: `3-bit quantization (q). Super-blocks with 16 blocks, each block has 16 weights. Weight formula: w = q * block_scale(6-bit), resulting. 3.4375 bits-per-weight.`, + src_url: "https://github.com/ggerganov/llama.cpp/pull/1684#issue-1739619305", + }, + [GGMLQuantizationType.Q2_K]: { + txt: `2-bit quantization (q). Super-blocks with 16 blocks, each block has 16 weight. Weight formula: w = q * block_scale(4-bit) + block_min(4-bit), resulting in 2.5625 bits-per-weight.`, + src_url: "https://github.com/ggerganov/llama.cpp/pull/1684#issue-1739619305", + }, + [GGMLQuantizationType.IQ4_XS]: { + txt: "4-bit quantization (q). Super-blocks with 256 weights. Weight w is obtained using super_block_scale & importance matrix, resulting in 4.25 bits-per-weight.", + src_url: + "https://huggingface.co/CISCai/OpenCodeInterpreter-DS-6.7B-SOTA-GGUF/blob/main/README.md?code=true#L59-L70", + }, + [GGMLQuantizationType.IQ3_S]: { + txt: "3-bit quantization (q). Super-blocks with 256 weights. Weight w is obtained using super_block_scale & importance matrix, resulting in 3.44 bits-per-weight.", + src_url: + "https://huggingface.co/CISCai/OpenCodeInterpreter-DS-6.7B-SOTA-GGUF/blob/main/README.md?code=true#L59-L70", + }, + [GGMLQuantizationType.IQ3_XXS]: { + txt: "3-bit quantization (q). Super-blocks with 256 weights. Weight w is obtained using super_block_scale & importance matrix, resulting in 3.06 bits-per-weight.", + src_url: + "https://huggingface.co/CISCai/OpenCodeInterpreter-DS-6.7B-SOTA-GGUF/blob/main/README.md?code=true#L59-L70", + }, + [GGMLQuantizationType.IQ2_S]: { + txt: "2-bit quantization (q). Super-blocks with 256 weights. Weight w is obtained using super_block_scale & importance matrix, resulting in 2.5 bits-per-weight.", + src_url: + "https://huggingface.co/CISCai/OpenCodeInterpreter-DS-6.7B-SOTA-GGUF/blob/main/README.md?code=true#L59-L70", + }, + [GGMLQuantizationType.IQ2_XS]: { + txt: "2-bit quantization (q). Super-blocks with 256 weights. Weight w is obtained using super_block_scale & importance matrix, resulting in 2.31 bits-per-weight.", + src_url: + "https://huggingface.co/CISCai/OpenCodeInterpreter-DS-6.7B-SOTA-GGUF/blob/main/README.md?code=true#L59-L70", + }, + [GGMLQuantizationType.IQ2_XXS]: { + txt: "2-bit quantization (q). Super-blocks with 256 weights. Weight w is obtained using super_block_scale & importance matrix, resulting in 2.06 bits-per-weight.", + src_url: + "https://huggingface.co/CISCai/OpenCodeInterpreter-DS-6.7B-SOTA-GGUF/blob/main/README.md?code=true#L59-L70", + }, + [GGMLQuantizationType.IQ1_S]: { + txt: "1-bit quantization (q). Super-blocks with 256 weights. Weight w is obtained using super_block_scale & importance matrix, resulting in 1.56 bits-per-weight.", + src_url: + "https://huggingface.co/CISCai/OpenCodeInterpreter-DS-6.7B-SOTA-GGUF/blob/main/README.md?code=true#L59-L70", + }, + [GGMLQuantizationType.IQ4_NL]: { + txt: "4-bit quantization (q). Super-blocks with 256 weights. Weight w is obtained using super_block_scale & importance matrix.", + src_url: "https://github.com/ggerganov/llama.cpp/pull/5590", + }, +}; diff --git a/packages/gguf/src/utils/promisesQueue.ts b/packages/gguf/src/utils/promisesQueue.ts new file mode 100644 index 000000000..35d2d0690 --- /dev/null +++ b/packages/gguf/src/utils/promisesQueue.ts @@ -0,0 +1,23 @@ +/** + * Execute queue of promises. + * + * Inspired by github.com/rxaviers/async-pool + */ +export async function promisesQueue(factories: (() => Promise)[], concurrency: number): Promise { + const results: T[] = []; + const executing: Set> = new Set(); + let index = 0; + for (const factory of factories) { + const closureIndex = index++; + const e = factory().then((r) => { + results[closureIndex] = r; + executing.delete(e); + }); + executing.add(e); + if (executing.size >= concurrency) { + await Promise.race(executing); + } + } + await Promise.all(executing); + return results; +} diff --git a/packages/hub/package.json b/packages/hub/package.json index b7c8bbd86..baa0a8323 100644 --- a/packages/hub/package.json +++ b/packages/hub/package.json @@ -1,7 +1,7 @@ { "name": "@huggingface/hub", "packageManager": "pnpm@8.10.5", - "version": "0.14.6", + "version": "0.14.10", "description": "Utilities to interact with the Hugging Face hub", "repository": "https://github.com/huggingface/huggingface.js.git", "publishConfig": { diff --git a/packages/hub/src/lib/file-download-info.ts b/packages/hub/src/lib/file-download-info.ts index 72630728e..f5daaca8b 100644 --- a/packages/hub/src/lib/file-download-info.ts +++ b/packages/hub/src/lib/file-download-info.ts @@ -48,12 +48,12 @@ export async function fileDownloadInfo(params: { const resp = await (params.fetch ?? fetch)(url, { method: "GET", - headers: params.credentials - ? { - Authorization: `Bearer ${params.credentials.accessToken}`, - Range: "bytes=0-0", - } - : {}, + headers: { + ...(params.credentials && { + Authorization: `Bearer ${params.credentials.accessToken}`, + }), + Range: "bytes=0-0", + }, }); if (resp.status === 404 && resp.headers.get("X-Error-Code") === "EntryNotFound") { @@ -70,13 +70,14 @@ export async function fileDownloadInfo(params: { throw new InvalidApiResponseFormatError("Expected ETag"); } - const sizeHeader = resp.headers.get("Content-Length"); + const contentRangeHeader = resp.headers.get("content-range"); - if (!sizeHeader) { + if (!contentRangeHeader) { throw new InvalidApiResponseFormatError("Expected size information"); } - const size = parseInt(sizeHeader); + const [, parsedSize] = contentRangeHeader.split("/"); + const size = parseInt(parsedSize); if (isNaN(size)) { throw new InvalidApiResponseFormatError("Invalid file size received"); diff --git a/packages/hub/src/lib/list-models.spec.ts b/packages/hub/src/lib/list-models.spec.ts index 964cf8e85..e3da993c6 100644 --- a/packages/hub/src/lib/list-models.spec.ts +++ b/packages/hub/src/lib/list-models.spec.ts @@ -75,7 +75,7 @@ describe("listModels", () => { limit: 10, })) { count++; - expect(entry.name).to.include("t5"); + expect(entry.name.toLocaleLowerCase()).to.include("t5"); } expect(count).to.equal(10); diff --git a/packages/hub/src/lib/oauth-handle-redirect.ts b/packages/hub/src/lib/oauth-handle-redirect.ts index 786ef9f2b..771b6d439 100644 --- a/packages/hub/src/lib/oauth-handle-redirect.ts +++ b/packages/hub/src/lib/oauth-handle-redirect.ts @@ -13,9 +13,14 @@ export interface OAuthResult { avatarUrl: string; websiteUrl?: string; isPro: boolean; + canPay?: boolean; orgs: Array<{ + id: string; name: string; isEnterprise: boolean; + canPay?: boolean; + avatarUrl: string; + roleInOrg?: string; }>; }; /** @@ -151,9 +156,14 @@ export async function oauthHandleRedirect(opts?: { hubUrl?: string }): Promise; } = await userInfoRes.json(); @@ -169,7 +179,16 @@ export async function oauthHandleRedirect(opts?: { hubUrl?: string }): Promise ({ + id: org.sub, + name: org.name, + fullname: org.name, + isEnterprise: org.isEnterprise, + canPay: org.canPay, + avatarUrl: org.picture, + roleInOrg: org.roleInOrg, + })) ?? [], }, state: parsedState.state, scope: token.scope, diff --git a/packages/hub/src/lib/oauth-login-url.ts b/packages/hub/src/lib/oauth-login-url.ts index 4762049e9..afb601d21 100644 --- a/packages/hub/src/lib/oauth-login-url.ts +++ b/packages/hub/src/lib/oauth-login-url.ts @@ -122,7 +122,7 @@ export async function oauthLoginUrl(opts?: { return `${opendidConfig.authorization_endpoint}?${new URLSearchParams({ client_id: clientId, - scope: opts?.scopes || "openid profile", + scope: opts?.scopes || variables?.OAUTH_SCOPES || "openid profile", response_type: "code", redirect_uri: redirectUri, state, diff --git a/packages/hub/src/lib/parse-safetensors-metadata.spec.ts b/packages/hub/src/lib/parse-safetensors-metadata.spec.ts index e310076d4..71077e3bb 100644 --- a/packages/hub/src/lib/parse-safetensors-metadata.spec.ts +++ b/packages/hub/src/lib/parse-safetensors-metadata.spec.ts @@ -1,5 +1,5 @@ import { assert, it, describe } from "vitest"; -import { parseSafetensorsMetadata } from "./parse-safetensors-metadata"; +import { parseSafetensorsMetadata, parseSafetensorsShardFilename } from "./parse-safetensors-metadata"; import { sum } from "../utils/sum"; describe("parseSafetensorsMetadata", () => { @@ -109,4 +109,14 @@ describe("parseSafetensorsMetadata", () => { assert.deepStrictEqual(parse.parameterCount, { BF16: 8_537_680_896 }); assert.deepStrictEqual(sum(Object.values(parse.parameterCount)), 8_537_680_896); }); + + it("should detect sharded safetensors filename", async () => { + const safetensorsFilename = "model_00005-of-00072.safetensors"; // https://huggingface.co/bigscience/bloom/blob/4d8e28c67403974b0f17a4ac5992e4ba0b0dbb6f/model_00005-of-00072.safetensors + const safetensorsShardFileInfo = parseSafetensorsShardFilename(safetensorsFilename); + + assert.strictEqual(safetensorsShardFileInfo?.prefix, "model_"); + assert.strictEqual(safetensorsShardFileInfo?.basePrefix, "model"); + assert.strictEqual(safetensorsShardFileInfo?.shard, "00005"); + assert.strictEqual(safetensorsShardFileInfo?.total, "00072"); + }); }); diff --git a/packages/hub/src/lib/parse-safetensors-metadata.ts b/packages/hub/src/lib/parse-safetensors-metadata.ts index 9d5e61e97..1b2d35ed7 100644 --- a/packages/hub/src/lib/parse-safetensors-metadata.ts +++ b/packages/hub/src/lib/parse-safetensors-metadata.ts @@ -14,8 +14,28 @@ export const SAFETENSORS_INDEX_FILE = "model.safetensors.index.json"; /// but in some situations safetensors weights have different filenames. export const RE_SAFETENSORS_FILE = /\.safetensors$/; export const RE_SAFETENSORS_INDEX_FILE = /\.safetensors\.index\.json$/; -export const RE_SAFETENSORS_SHARD_FILE = /\d{5}-of-\d{5}\.safetensors$/; -const PARALLEL_DOWNLOADS = 5; +export const RE_SAFETENSORS_SHARD_FILE = + /^(?(?.*?)[_-])(?\d{5})-of-(?\d{5})\.safetensors$/; +export interface SafetensorsShardFileInfo { + prefix: string; + basePrefix: string; + shard: string; + total: string; +} +export function parseSafetensorsShardFilename(filename: string): SafetensorsShardFileInfo | null { + const match = RE_SAFETENSORS_SHARD_FILE.exec(filename); + if (match && match.groups) { + return { + prefix: match.groups["prefix"], + basePrefix: match.groups["basePrefix"], + shard: match.groups["shard"], + total: match.groups["total"], + }; + } + return null; +} + +const PARALLEL_DOWNLOADS = 20; const MAX_HEADER_LENGTH = 25_000_000; class SafetensorParseError extends Error {} @@ -139,7 +159,7 @@ async function parseShardedIndex( throw new SafetensorParseError(`Failed to parse file ${path}: not a valid JSON.`); } - const pathPrefix = path.substr(0, path.lastIndexOf("/") + 1); + const pathPrefix = path.slice(0, path.lastIndexOf("/") + 1); const filenames = [...new Set(Object.values(index.weight_map))]; const shardedMap: SafetensorsShardedHeaders = Object.fromEntries( await promisesQueue( diff --git a/packages/hub/src/types/public.d.ts b/packages/hub/src/types/public.d.ts index da15dab6b..5655d2441 100644 --- a/packages/hub/src/types/public.d.ts +++ b/packages/hub/src/types/public.d.ts @@ -147,6 +147,7 @@ export type License = | "lgpl-lr" | "deepfloyd-if-license" | "llama2" + | "llama3" | "gemma" | "unknown" | "other"; diff --git a/packages/inference/package.json b/packages/inference/package.json index 6fadf94f0..e01f556ee 100644 --- a/packages/inference/package.json +++ b/packages/inference/package.json @@ -1,6 +1,6 @@ { "name": "@huggingface/inference", - "version": "2.6.6", + "version": "2.6.7", "packageManager": "pnpm@8.10.5", "license": "MIT", "author": "Tim Mikeladze ", diff --git a/packages/inference/src/lib/makeRequestOptions.ts b/packages/inference/src/lib/makeRequestOptions.ts index 51a813f3a..86e1342e0 100644 --- a/packages/inference/src/lib/makeRequestOptions.ts +++ b/packages/inference/src/lib/makeRequestOptions.ts @@ -96,19 +96,14 @@ export async function makeRequestOptions( return `${HF_INFERENCE_API_BASE_URL}/models/${model}`; })(); - // Let users configure credentials, or disable them all together (or keep default behavior). - // --- - // This used to be an internal property only and never exposed to users. This means that most usages will never define this value - // So in order to make this backwards compatible, if it's undefined we go to "same-origin" (default behaviour before). - // If it's a boolean and set to true then set to "include". If false, don't define credentials at all (useful for edge runtimes) - // Then finally, if it's a string, use it as-is. + /** + * For edge runtimes, leave 'credentials' undefined, otherwise cloudflare workers will error + */ let credentials: RequestCredentials | undefined; if (typeof includeCredentials === "string") { credentials = includeCredentials as RequestCredentials; - } else if (typeof includeCredentials === "boolean") { - credentials = includeCredentials ? "include" : undefined; - } else if (includeCredentials === undefined) { - credentials = "same-origin"; + } else if (includeCredentials === true) { + credentials = "include"; } const info: RequestInit = { headers, @@ -119,7 +114,7 @@ export async function makeRequestOptions( ...(otherArgs.model && isUrl(otherArgs.model) ? omit(otherArgs, "model") : otherArgs), ...(otherOptions && !isObjectEmpty(otherOptions) && { options: otherOptions }), }), - credentials, + ...(credentials && { credentials }), signal: options?.signal, }; return { url, info }; diff --git a/packages/inference/src/types.ts b/packages/inference/src/types.ts index e2e110e2b..e762f3cf7 100644 --- a/packages/inference/src/types.ts +++ b/packages/inference/src/types.ts @@ -32,7 +32,7 @@ export interface Options { signal?: AbortSignal; /** - * (Default: "same-origin"). String | Boolean. Credentials to use for the request. If this is a string, it will be passed straight on. If it's a boolean, true will be "include" and false will not send credentials at all. + * Credentials to use for the request. If this is a string, it will be passed straight on. If it's a boolean, true will be "include" and false will not send credentials at all (which defaults to "same-origin" inside browsers). */ includeCredentials?: string | boolean; } diff --git a/packages/jinja/test/e2e.test.js b/packages/jinja/test/e2e.test.js index 86da9642c..58e10f35e 100644 --- a/packages/jinja/test/e2e.test.js +++ b/packages/jinja/test/e2e.test.js @@ -192,7 +192,7 @@ const TEST_CUSTOM_TEMPLATES = Object.freeze({ }, target: `<|im_start|>user\nHello, how are you?<|im_end|>\n<|im_start|>assistant\nI'm doing great. How can I help you today?<|im_end|>\n<|im_start|>user\nI'd like to show off how chat templating works!<|im_end|>\n`, }, - "mistralai/Mistral-7B-Instruct-v0.1": { + "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ": { chat_template: `{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token + ' ' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}`, data: { messages: EXAMPLE_CHAT, @@ -440,7 +440,7 @@ describe("End-to-end tests", () => { }); it("should parse a chat template from the Hugging Face Hub", async () => { - const repo = "mistralai/Mistral-7B-Instruct-v0.1"; + const repo = "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ"; const tokenizerConfig = await ( await downloadFile({ repo, diff --git a/packages/tasks/package.json b/packages/tasks/package.json index 998c1115a..d18df0486 100644 --- a/packages/tasks/package.json +++ b/packages/tasks/package.json @@ -1,7 +1,7 @@ { "name": "@huggingface/tasks", "packageManager": "pnpm@8.10.5", - "version": "0.6.1", + "version": "0.9.1", "description": "List of ML tasks for huggingface.co/tasks", "repository": "https://github.com/huggingface/huggingface.js.git", "publishConfig": { @@ -27,7 +27,8 @@ "build": "tsup src/index.ts --format cjs,esm --clean --dts && pnpm run inference-codegen", "prepare": "pnpm run build", "check": "tsc", - "inference-codegen": "tsx scripts/inference-codegen.ts && prettier --write src/tasks/*/inference.ts" + "inference-codegen": "tsx scripts/inference-codegen.ts && prettier --write src/tasks/*/inference.ts", + "inference-tgi-import": "tsx scripts/inference-tgi-import.ts && prettier --write src/tasks/text-generation/spec/*.json && prettier --write src/tasks/chat-completion/spec/*.json" }, "type": "module", "files": [ @@ -44,6 +45,10 @@ "license": "MIT", "devDependencies": { "@types/node": "^20.11.5", - "quicktype-core": "https://github.com/huggingface/quicktype/raw/pack-18.0.17/packages/quicktype-core/quicktype-core-18.0.17.tgz" + "quicktype-core": "https://github.com/huggingface/quicktype/raw/pack-18.0.17/packages/quicktype-core/quicktype-core-18.0.17.tgz", + "type-fest": "^3.13.1" + }, + "dependencies": { + "node-fetch": "^3.3.2" } } diff --git a/packages/tasks/pnpm-lock.yaml b/packages/tasks/pnpm-lock.yaml index 87ac15302..81a6834ab 100644 --- a/packages/tasks/pnpm-lock.yaml +++ b/packages/tasks/pnpm-lock.yaml @@ -4,6 +4,11 @@ settings: autoInstallPeers: true excludeLinksFromLockfile: false +dependencies: + node-fetch: + specifier: ^3.3.2 + version: 3.3.2 + devDependencies: '@types/node': specifier: ^20.11.5 @@ -11,6 +16,9 @@ devDependencies: quicktype-core: specifier: https://github.com/huggingface/quicktype/raw/pack-18.0.17/packages/quicktype-core/quicktype-core-18.0.17.tgz version: '@github.com/huggingface/quicktype/raw/pack-18.0.17/packages/quicktype-core/quicktype-core-18.0.17.tgz' + type-fest: + specifier: ^3.13.1 + version: 3.13.1 packages: @@ -62,6 +70,11 @@ packages: - encoding dev: true + /data-uri-to-buffer@4.0.1: + resolution: {integrity: sha512-0R9ikRb668HB7QDxT1vkpuUBtqc53YyAwMwGeUFKRojY/NWKvdZ+9UYtRfGmhqNbRkTSVpMbmyhXipFFv2cb/A==} + engines: {node: '>= 12'} + dev: false + /event-target-shim@5.0.1: resolution: {integrity: sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==} engines: {node: '>=6'} @@ -72,6 +85,21 @@ packages: engines: {node: '>=0.8.x'} dev: true + /fetch-blob@3.2.0: + resolution: {integrity: sha512-7yAQpD2UMJzLi1Dqv7qFYnPbaPx7ZfFK6PiIxQ4PfkGPyNyl2Ugx+a/umUonmKqjhM4DnfbMvdX6otXq83soQQ==} + engines: {node: ^12.20 || >= 14.13} + dependencies: + node-domexception: 1.0.0 + web-streams-polyfill: 3.3.3 + dev: false + + /formdata-polyfill@4.0.10: + resolution: {integrity: sha512-buewHzMvYL29jdeQTVILecSaZKnt/RJWjoZCF5OW60Z67/GmSLBkOFM7qh1PI3zFNtJbaZL5eQu1vLfazOwj4g==} + engines: {node: '>=12.20.0'} + dependencies: + fetch-blob: 3.2.0 + dev: false + /ieee754@1.2.1: resolution: {integrity: sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==} dev: true @@ -88,6 +116,11 @@ packages: resolution: {integrity: sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==} dev: true + /node-domexception@1.0.0: + resolution: {integrity: sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==} + engines: {node: '>=10.5.0'} + dev: false + /node-fetch@2.7.0: resolution: {integrity: sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==} engines: {node: 4.x || >=6.0.0} @@ -100,6 +133,15 @@ packages: whatwg-url: 5.0.0 dev: true + /node-fetch@3.3.2: + resolution: {integrity: sha512-dRB78srN/l6gqWulah9SrxeYnxeddIG30+GOqK/9OlLVyLg3HPnr6SqOWTWOXKRwC2eGYCkZ59NNuSgvSrpgOA==} + engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0} + dependencies: + data-uri-to-buffer: 4.0.1 + fetch-blob: 3.2.0 + formdata-polyfill: 4.0.10 + dev: false + /pako@0.2.9: resolution: {integrity: sha512-NUcwaKxUxWrZLpDG+z/xZaCgQITkA/Dv4V/T6bw7VON6l1Xz/VnrBqrYjZQ12TamKHzITTfOEIYUj48y2KXImA==} dev: true @@ -147,6 +189,11 @@ packages: resolution: {integrity: sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==} dev: true + /type-fest@3.13.1: + resolution: {integrity: sha512-tLq3bSNx+xSpwvAJnzrK0Ep5CLNWjvFTOp71URMaAEWBfRb9nnJiBoUe0tF8bI4ZFO3omgBR6NvnbzVUT3Ly4g==} + engines: {node: '>=14.16'} + dev: true + /undici-types@5.26.5: resolution: {integrity: sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==} dev: true @@ -169,6 +216,11 @@ packages: resolution: {integrity: sha512-HXgFDgDommxn5/bIv0cnQZsPhHDA90NPHD6+c/v21U5+Sx5hoP8+dP9IZXBU1gIfvdRfhG8cel9QNPeionfcCQ==} dev: true + /web-streams-polyfill@3.3.3: + resolution: {integrity: sha512-d2JWLCivmZYTSIoge9MsgFCZrt571BikcWGYkjC1khllbTeDlGqZ2D8vD8E/lJa8WGWbb7Plm8/XJYV7IJHZZw==} + engines: {node: '>= 8'} + dev: false + /webidl-conversions@3.0.1: resolution: {integrity: sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==} dev: true diff --git a/packages/tasks/scripts/inference-codegen.ts b/packages/tasks/scripts/inference-codegen.ts index 587c6244b..68d071e57 100644 --- a/packages/tasks/scripts/inference-codegen.ts +++ b/packages/tasks/scripts/inference-codegen.ts @@ -57,7 +57,7 @@ async function buildInputData(taskId: string, taskSpecDir: string, allSpecFiles: if (taskId === "text-generation" || taskId === "chat-completion") { await schema.addSource({ name: `${taskId}-stream-output`, - schema: await fs.readFile(`${taskSpecDir}/output_stream.json`, { encoding: "utf-8" }), + schema: await fs.readFile(`${taskSpecDir}/stream_output.json`, { encoding: "utf-8" }), }); } const inputData = new InputData(); diff --git a/packages/tasks/scripts/inference-tgi-import.ts b/packages/tasks/scripts/inference-tgi-import.ts new file mode 100644 index 000000000..781eb0e30 --- /dev/null +++ b/packages/tasks/scripts/inference-tgi-import.ts @@ -0,0 +1,115 @@ +/* + * Fetches TGI specs and generated JSON schema for input, output and stream_output of + * text-generation and chat-completion tasks. + * See https://huggingface.github.io/text-generation-inference/ + */ +import fs from "fs/promises"; +import fetch from "node-fetch"; +import * as path from "node:path/posix"; +import { existsSync as pathExists } from "node:fs"; +import type { JsonObject, JsonValue } from "type-fest"; + +const URL = "https://huggingface.github.io/text-generation-inference/openapi.json"; + +const rootDirFinder = function (): string { + let currentPath = path.normalize(import.meta.url); + + while (currentPath !== "/") { + if (pathExists(path.join(currentPath, "package.json"))) { + return currentPath; + } + + currentPath = path.normalize(path.join(currentPath, "..")); + } + + return "/"; +}; + +const rootDir = rootDirFinder(); +const tasksDir = path.join(rootDir, "src", "tasks"); + +function toCamelCase(str: string, joiner = "") { + return str + .split(/[-_]/) + .map((part) => part.charAt(0).toUpperCase() + part.slice(1)) + .join(joiner); +} + +async function _extractAndAdapt(task: string, mainComponentName: string, type: "input" | "output" | "stream_output") { + console.debug(`✨ Importing`, task, type); + + console.debug(" 📥 Fetching TGI specs"); + const response = await fetch(URL); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const openapi = (await response.json()) as any; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const components: Record = openapi["components"]["schemas"]; + + // e.g. TextGeneration + const camelName = toCamelCase(task); + // e.g. TextGenerationInput + const camelFullName = camelName + toCamelCase(type); + const mainComponent = components[mainComponentName]; + const filteredComponents: Record = {}; + + function _scan(data: JsonValue) { + if (Array.isArray(data) || data instanceof Array) { + for (const item of data) { + _scan(item); + } + } else if (data && typeof data === "object") { + for (const key of Object.keys(data)) { + if (key === "$ref" && typeof data[key] === "string") { + // Verify reference exists + const ref = (data[key] as string).split("/").pop() ?? ""; + if (!components[ref]) { + throw new Error(`Reference not found in components: ${data[key]}`); + } + + // Add reference to components to export (and scan it too) + const newRef = camelFullName + ref.replace(camelName, ""); + if (!filteredComponents[newRef]) { + components[ref]["title"] = newRef; // Rename title to avoid conflicts + filteredComponents[newRef] = components[ref]; + _scan(components[ref]); + } + + // Updating the reference to new format + data[key] = `#/$defs/${newRef}`; + } else { + _scan(data[key]); + } + } + } + } + + console.debug(" 📦 Packaging jsonschema"); + _scan(mainComponent); + + const prettyName = toCamelCase(task, " ") + " " + toCamelCase(type, " "); + const inputSchema = { + $id: `/inference/schemas/${task}/${type}.json`, + $schema: "http://json-schema.org/draft-06/schema#", + description: + prettyName + + ".\n\nAuto-generated from TGI specs." + + "\nFor more details, check out https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.", + title: camelFullName, + type: "object", + required: mainComponent["required"], + properties: mainComponent["properties"], + $defs: filteredComponents, + }; + + const specPath = path.join(tasksDir, task, "spec", `${type}.json`); + console.debug(" 📂 Exporting", specPath); + await fs.writeFile(specPath, JSON.stringify(inputSchema, null, 4)); +} + +await _extractAndAdapt("text-generation", "CompatGenerateRequest", "input"); +await _extractAndAdapt("text-generation", "GenerateResponse", "output"); +await _extractAndAdapt("text-generation", "StreamResponse", "stream_output"); +await _extractAndAdapt("chat-completion", "ChatRequest", "input"); +await _extractAndAdapt("chat-completion", "ChatCompletion", "output"); +await _extractAndAdapt("chat-completion", "ChatCompletionChunk", "stream_output"); +console.debug("✅ All done!"); diff --git a/packages/tasks/src/library-to-tasks.ts b/packages/tasks/src/library-to-tasks.ts index c368a2b91..71fd8e600 100644 --- a/packages/tasks/src/library-to-tasks.ts +++ b/packages/tasks/src/library-to-tasks.ts @@ -6,7 +6,7 @@ import type { PipelineType } from "./pipelines"; * Inference API (serverless) should be disabled for all other (library, task) pairs beyond this mapping. * This mapping is partially generated automatically by "python-api-export-tasks" action in * huggingface/api-inference-community repo upon merge. For transformers, the mapping is manually - * based on api-inference. + * based on api-inference (hf_types.rs). */ export const LIBRARY_TASK_MAPPING: Partial> = { "adapter-transformers": ["question-answering", "text-classification", "token-classification"], @@ -49,15 +49,24 @@ export const LIBRARY_TASK_MAPPING: Partial [ + `# Available backend options are: "jax", "tensorflow", "torch". +os.environ["KERAS_BACKEND"] = "tensorflow" + +import keras_nlp + +tokenizer = keras_nlp.models.Tokenizer.from_preset("hf://${model.id}") +backbone = keras_nlp.models.Backbone.from_preset("hf://${model.id}") +`, +]; + export const open_clip = (model: ModelData): string[] => [ `import open_clip @@ -531,6 +542,12 @@ IWorker engine = WorkerFactory.CreateWorker(BackendType.GPUCompute, model); `, ]; +export const voicecraft = (model: ModelData): string[] => [ + `from voicecraft import VoiceCraft + +model = VoiceCraft.from_pretrained("${model.id}")`, +]; + export const mlx = (model: ModelData): string[] => [ `pip install huggingface_hub hf_transfer diff --git a/packages/tasks/src/model-libraries.ts b/packages/tasks/src/model-libraries.ts index 9f5f9b55a..756b2dde9 100644 --- a/packages/tasks/src/model-libraries.ts +++ b/packages/tasks/src/model-libraries.ts @@ -116,6 +116,15 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = { repoName: "doctr", repoUrl: "https://github.com/mindee/doctr", }, + elm: { + prettyLabel: "ELM", + repoName: "elm", + repoUrl: "https://github.com/slicex-ai/elm", + filter: false, + countDownloads: { + wildcard: { path: "*/slicex_elm_config.json" }, + }, + }, espnet: { prettyLabel: "ESPnet", repoName: "ESPnet", @@ -185,6 +194,13 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = { filter: true, countDownloads: { term: { path: "saved_model.pb" } }, }, + "keras-nlp": { + prettyLabel: "KerasNLP", + repoName: "KerasNLP", + repoUrl: "https://keras.io/keras_nlp/", + docsUrl: "https://github.com/keras-team/keras-nlp", + snippets: snippets.keras_nlp, + }, k2: { prettyLabel: "K2", repoName: "k2", @@ -401,6 +417,13 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = { wildcard: { path: "*.sentis" }, }, }, + voicecraft: { + prettyLabel: "VoiceCraft", + repoName: "VoiceCraft", + repoUrl: "https://github.com/jasonppy/VoiceCraft", + docsUrl: "https://github.com/jasonppy/VoiceCraft", + snippets: snippets.voicecraft, + }, whisperkit: { prettyLabel: "WhisperKit", repoName: "WhisperKit", diff --git a/packages/tasks/src/tasks/chat-completion/inference.ts b/packages/tasks/src/tasks/chat-completion/inference.ts index 07784d96c..488a1e87e 100644 --- a/packages/tasks/src/tasks/chat-completion/inference.ts +++ b/packages/tasks/src/tasks/chat-completion/inference.ts @@ -5,154 +5,273 @@ */ /** - * Inputs for ChatCompletion inference + * Chat Completion Input. + * + * Auto-generated from TGI specs. + * For more details, check out + * https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts. */ export interface ChatCompletionInput { /** * Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing - * frequency in the text so far, decreasing the model's likelihood to repeat the same line - * verbatim. + * frequency in the text so far, + * decreasing the model's likelihood to repeat the same line verbatim. */ frequency_penalty?: number; + /** + * UNUSED + * Modify the likelihood of specified tokens appearing in the completion. Accepts a JSON + * object that maps tokens + * (specified by their token ID in the tokenizer) to an associated bias value from -100 to + * 100. Mathematically, + * the bias is added to the logits generated by the model prior to sampling. The exact + * effect will vary per model, + * but values between -1 and 1 should decrease or increase likelihood of selection; values + * like -100 or 100 should + * result in a ban or exclusive selection of the relevant token. + */ + logit_bias?: number[]; + /** + * Whether to return log probabilities of the output tokens or not. If true, returns the log + * probabilities of each + * output token returned in the content of message. + */ + logprobs?: boolean; /** * The maximum number of tokens that can be generated in the chat completion. */ max_tokens?: number; + /** + * A list of messages comprising the conversation so far. + */ messages: ChatCompletionInputMessage[]; /** - * The random sampling seed. + * [UNUSED] ID of the model to use. See the model endpoint compatibility table for details + * on which models work with the Chat API. */ - seed?: number; + model: string; + /** + * UNUSED + * How many chat completion choices to generate for each input message. Note that you will + * be charged based on the + * number of generated tokens across all of the choices. Keep n as 1 to minimize costs. + */ + n?: number; /** - * Stop generating tokens if a stop token is generated. + * Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they + * appear in the text so far, + * increasing the model's likelihood to talk about new topics */ - stop?: ChatCompletionInputStopReason; + presence_penalty?: number; + seed?: number; /** - * If set, partial message deltas will be sent. + * Up to 4 sequences where the API will stop generating further tokens. */ + stop?: string[]; stream?: boolean; /** - * The value used to modulate the logits distribution. + * What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the + * output more random, while + * lower values like 0.2 will make it more focused and deterministic. + * + * We generally recommend altering this or `top_p` but not both. */ temperature?: number; + tool_choice?: ChatCompletionInputToolType; + /** + * A prompt to be appended before the tools + */ + tool_prompt?: string; /** - * If set to < 1, only the smallest set of most probable tokens with probabilities that add - * up to `top_p` or higher are kept for generation. + * A list of tools the model may call. Currently, only functions are supported as a tool. + * Use this to provide a list of + * functions the model may generate JSON inputs for. + */ + tools?: ChatCompletionInputTool[]; + /** + * An integer between 0 and 5 specifying the number of most likely tokens to return at each + * token position, each with + * an associated log probability. logprobs must be set to true if this parameter is used. + */ + top_logprobs?: number; + /** + * An alternative to sampling with temperature, called nucleus sampling, where the model + * considers the results of the + * tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% + * probability mass are considered. */ top_p?: number; [property: string]: unknown; } export interface ChatCompletionInputMessage { - /** - * The content of the message. - */ - content: string; - role: ChatCompletionMessageRole; + content?: string; + name?: string; + role: string; + tool_calls?: ChatCompletionInputToolCall[]; [property: string]: unknown; } -/** - * The role of the message author. - */ -export type ChatCompletionMessageRole = "assistant" | "system" | "user"; +export interface ChatCompletionInputToolCall { + function: ChatCompletionInputFunctionDefinition; + id: number; + type: string; + [property: string]: unknown; +} -/** - * Stop generating tokens if a stop token is generated. - */ -export type ChatCompletionInputStopReason = string[] | string; +export interface ChatCompletionInputFunctionDefinition { + arguments: unknown; + description?: string; + name: string; + [property: string]: unknown; +} + +export type ChatCompletionInputToolType = "OneOf" | ChatCompletionInputToolTypeObject; + +export interface ChatCompletionInputToolTypeObject { + FunctionName: string; + [property: string]: unknown; +} + +export interface ChatCompletionInputTool { + function: ChatCompletionInputFunctionDefinition; + type: string; + [property: string]: unknown; +} /** - * Outputs for Chat Completion inference + * Chat Completion Output. + * + * Auto-generated from TGI specs. + * For more details, check out + * https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts. */ export interface ChatCompletionOutput { - /** - * A list of chat completion choices. - */ - choices: ChatCompletionOutputChoice[]; - /** - * The Unix timestamp (in seconds) of when the chat completion was created. - */ + choices: ChatCompletionOutputComplete[]; created: number; + id: string; + model: string; + object: string; + system_fingerprint: string; + usage: ChatCompletionOutputUsage; [property: string]: unknown; } -export interface ChatCompletionOutputChoice { - /** - * The reason why the generation was stopped. - */ - finish_reason: ChatCompletionFinishReason; - /** - * The index of the choice in the list of choices. - */ +export interface ChatCompletionOutputComplete { + finish_reason: string; index: number; - message: ChatCompletionOutputChoiceMessage; + logprobs?: ChatCompletionOutputLogprobs; + message: ChatCompletionOutputMessage; [property: string]: unknown; } -/** - * The reason why the generation was stopped. - * - * The generated sequence reached the maximum allowed length - * - * The model generated an end-of-sentence (EOS) token - * - * One of the sequence in stop_sequences was generated - */ -export type ChatCompletionFinishReason = "length" | "eos_token" | "stop_sequence"; +export interface ChatCompletionOutputLogprobs { + content: ChatCompletionOutputLogprob[]; + [property: string]: unknown; +} -export interface ChatCompletionOutputChoiceMessage { - /** - * The content of the chat completion message. - */ - content: string; - role: ChatCompletionMessageRole; +export interface ChatCompletionOutputLogprob { + logprob: number; + token: string; + top_logprobs: ChatCompletionOutputTopLogprob[]; + [property: string]: unknown; +} + +export interface ChatCompletionOutputTopLogprob { + logprob: number; + token: string; + [property: string]: unknown; +} + +export interface ChatCompletionOutputMessage { + content?: string; + name?: string; + role: string; + tool_calls?: ChatCompletionOutputToolCall[]; + [property: string]: unknown; +} + +export interface ChatCompletionOutputToolCall { + function: ChatCompletionOutputFunctionDefinition; + id: number; + type: string; + [property: string]: unknown; +} + +export interface ChatCompletionOutputFunctionDefinition { + arguments: unknown; + description?: string; + name: string; + [property: string]: unknown; +} + +export interface ChatCompletionOutputUsage { + completion_tokens: number; + prompt_tokens: number; + total_tokens: number; [property: string]: unknown; } /** - * Chat Completion Stream Output + * Chat Completion Stream Output. + * + * Auto-generated from TGI specs. + * For more details, check out + * https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts. */ export interface ChatCompletionStreamOutput { - /** - * A list of chat completion choices. - */ choices: ChatCompletionStreamOutputChoice[]; - /** - * The Unix timestamp (in seconds) of when the chat completion was created. Each chunk has - * the same timestamp. - */ created: number; + id: string; + model: string; + object: string; + system_fingerprint: string; [property: string]: unknown; } export interface ChatCompletionStreamOutputChoice { - /** - * A chat completion delta generated by streamed model responses. - */ delta: ChatCompletionStreamOutputDelta; - /** - * The reason why the generation was stopped. - */ - finish_reason?: ChatCompletionFinishReason; - /** - * The index of the choice in the list of choices. - */ + finish_reason?: string; index: number; + logprobs?: ChatCompletionStreamOutputLogprobs; [property: string]: unknown; } -/** - * A chat completion delta generated by streamed model responses. - */ export interface ChatCompletionStreamOutputDelta { - /** - * The contents of the chunk message. - */ content?: string; - /** - * The role of the author of this message. - */ - role?: string; + role: string; + tool_calls?: ChatCompletionStreamOutputDeltaToolCall; + [property: string]: unknown; +} + +export interface ChatCompletionStreamOutputDeltaToolCall { + function: ChatCompletionStreamOutputFunction; + id: string; + index: number; + type: string; + [property: string]: unknown; +} + +export interface ChatCompletionStreamOutputFunction { + arguments: string; + name?: string; + [property: string]: unknown; +} + +export interface ChatCompletionStreamOutputLogprobs { + content: ChatCompletionStreamOutputLogprob[]; + [property: string]: unknown; +} + +export interface ChatCompletionStreamOutputLogprob { + logprob: number; + token: string; + top_logprobs: ChatCompletionStreamOutputTopLogprob[]; + [property: string]: unknown; +} + +export interface ChatCompletionStreamOutputTopLogprob { + logprob: number; + token: string; [property: string]: unknown; } diff --git a/packages/tasks/src/tasks/chat-completion/spec/input.json b/packages/tasks/src/tasks/chat-completion/spec/input.json index 8af4e8eec..0b549cd58 100644 --- a/packages/tasks/src/tasks/chat-completion/spec/input.json +++ b/packages/tasks/src/tasks/chat-completion/spec/input.json @@ -1,63 +1,227 @@ { - "title": "ChatCompletionInput", "$id": "/inference/schemas/chat-completion/input.json", "$schema": "http://json-schema.org/draft-06/schema#", - "description": "Inputs for ChatCompletion inference", + "description": "Chat Completion Input.\n\nAuto-generated from TGI specs.\nFor more details, check out https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.", + "title": "ChatCompletionInput", "type": "object", + "required": ["model", "messages"], "properties": { - "messages": { + "frequency_penalty": { + "type": "number", + "format": "float", + "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far,\ndecreasing the model's likelihood to repeat the same line verbatim.", + "example": "1.0", + "nullable": true + }, + "logit_bias": { "type": "array", - "title": "ChatCompletionInputMessage", "items": { - "type": "object", - "properties": { - "role": { - "$ref": "#/definitions/Role" - }, - "content": { - "type": "string", - "description": "The content of the message." - } - }, - "required": ["role", "content"] - } + "type": "number", + "format": "float" + }, + "description": "UNUSED\nModify the likelihood of specified tokens appearing in the completion. Accepts a JSON object that maps tokens\n(specified by their token ID in the tokenizer) to an associated bias value from -100 to 100. Mathematically,\nthe bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model,\nbut values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should\nresult in a ban or exclusive selection of the relevant token.", + "nullable": true }, - "frequency_penalty": { - "type": "number", - "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim." + "logprobs": { + "type": "boolean", + "description": "Whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each\noutput token returned in the content of message.", + "example": "false", + "nullable": true }, "max_tokens": { "type": "integer", - "description": "The maximum number of tokens that can be generated in the chat completion." + "format": "int32", + "description": "The maximum number of tokens that can be generated in the chat completion.", + "example": "32", + "nullable": true, + "minimum": 0 + }, + "messages": { + "type": "array", + "items": { + "$ref": "#/$defs/ChatCompletionInputMessage" + }, + "description": "A list of messages comprising the conversation so far.", + "example": "[{\"role\": \"user\", \"content\": \"What is Deep Learning?\"}]" + }, + "model": { + "type": "string", + "description": "[UNUSED] ID of the model to use. See the model endpoint compatibility table for details on which models work with the Chat API.", + "example": "mistralai/Mistral-7B-Instruct-v0.2" + }, + "n": { + "type": "integer", + "format": "int32", + "description": "UNUSED\nHow many chat completion choices to generate for each input message. Note that you will be charged based on the\nnumber of generated tokens across all of the choices. Keep n as 1 to minimize costs.", + "example": "2", + "nullable": true, + "minimum": 0 + }, + "presence_penalty": { + "type": "number", + "format": "float", + "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far,\nincreasing the model's likelihood to talk about new topics", + "example": 0.1, + "nullable": true }, "seed": { "type": "integer", - "description": "The random sampling seed." + "format": "int64", + "example": 42, + "nullable": true, + "minimum": 0 }, "stop": { - "oneOf": [{ "type": "string" }, { "type": "array", "items": { "type": "string" } }], - "title": "ChatCompletionInputStopReason", - "description": "Stop generating tokens if a stop token is generated." + "type": "array", + "items": { + "type": "string" + }, + "description": "Up to 4 sequences where the API will stop generating further tokens.", + "example": "null", + "nullable": true }, "stream": { - "type": "boolean", - "description": "If set, partial message deltas will be sent." + "type": "boolean" }, "temperature": { "type": "number", - "description": "The value used to modulate the logits distribution." + "format": "float", + "description": "What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while\nlower values like 0.2 will make it more focused and deterministic.\n\nWe generally recommend altering this or `top_p` but not both.", + "example": 1, + "nullable": true + }, + "tool_choice": { + "allOf": [ + { + "$ref": "#/$defs/ChatCompletionInputToolType" + } + ], + "nullable": true + }, + "tool_prompt": { + "type": "string", + "description": "A prompt to be appended before the tools", + "example": "\"You will be presented with a JSON schema representing a set of tools.\nIf the user request lacks of sufficient information to make a precise tool selection: Do not invent any tool's properties, instead notify with an error message.\n\nJSON Schema:\n\"", + "nullable": true + }, + "tools": { + "type": "array", + "items": { + "$ref": "#/$defs/ChatCompletionInputTool" + }, + "description": "A list of tools the model may call. Currently, only functions are supported as a tool. Use this to provide a list of\nfunctions the model may generate JSON inputs for.", + "example": "null", + "nullable": true + }, + "top_logprobs": { + "type": "integer", + "format": "int32", + "description": "An integer between 0 and 5 specifying the number of most likely tokens to return at each token position, each with\nan associated log probability. logprobs must be set to true if this parameter is used.", + "example": "5", + "nullable": true, + "minimum": 0 }, "top_p": { "type": "number", - "description": "If set to < 1, only the smallest set of most probable tokens with probabilities that add up to `top_p` or higher are kept for generation." + "format": "float", + "description": "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the\ntokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.", + "example": 0.95, + "nullable": true } }, - "required": ["messages"], - "definitions": { - "Role": { - "oneOf": [{ "const": "assistant" }, { "const": "system" }, { "const": "user" }], - "title": "ChatCompletionMessageRole", - "description": "The role of the message author." + "$defs": { + "ChatCompletionInputMessage": { + "type": "object", + "required": ["role"], + "properties": { + "content": { + "type": "string", + "example": "My name is David and I", + "nullable": true + }, + "name": { + "type": "string", + "example": "\"David\"", + "nullable": true + }, + "role": { + "type": "string", + "example": "user" + }, + "tool_calls": { + "type": "array", + "items": { + "$ref": "#/$defs/ChatCompletionInputToolCall" + }, + "nullable": true + } + }, + "title": "ChatCompletionInputMessage" + }, + "ChatCompletionInputToolCall": { + "type": "object", + "required": ["id", "type", "function"], + "properties": { + "function": { + "$ref": "#/$defs/ChatCompletionInputFunctionDefinition" + }, + "id": { + "type": "integer", + "format": "int32", + "minimum": 0 + }, + "type": { + "type": "string" + } + }, + "title": "ChatCompletionInputToolCall" + }, + "ChatCompletionInputFunctionDefinition": { + "type": "object", + "required": ["name", "arguments"], + "properties": { + "arguments": {}, + "description": { + "type": "string", + "nullable": true + }, + "name": { + "type": "string" + } + }, + "title": "ChatCompletionInputFunctionDefinition" + }, + "ChatCompletionInputToolType": { + "oneOf": [ + { + "type": "object", + "required": ["FunctionName"], + "properties": { + "FunctionName": { + "type": "string" + } + } + }, + { + "type": "string", + "enum": ["OneOf"] + } + ], + "title": "ChatCompletionInputToolType" + }, + "ChatCompletionInputTool": { + "type": "object", + "required": ["type", "function"], + "properties": { + "function": { + "$ref": "#/$defs/ChatCompletionInputFunctionDefinition" + }, + "type": { + "type": "string", + "example": "function" + } + }, + "title": "ChatCompletionInputTool" } } } diff --git a/packages/tasks/src/tasks/chat-completion/spec/output.json b/packages/tasks/src/tasks/chat-completion/spec/output.json index df353d69c..5b602ccd6 100644 --- a/packages/tasks/src/tasks/chat-completion/spec/output.json +++ b/packages/tasks/src/tasks/chat-completion/spec/output.json @@ -1,58 +1,196 @@ { "$id": "/inference/schemas/chat-completion/output.json", "$schema": "http://json-schema.org/draft-06/schema#", - "description": "Outputs for Chat Completion inference", + "description": "Chat Completion Output.\n\nAuto-generated from TGI specs.\nFor more details, check out https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.", "title": "ChatCompletionOutput", "type": "object", + "required": ["id", "object", "created", "model", "system_fingerprint", "choices", "usage"], "properties": { "choices": { "type": "array", - "description": "A list of chat completion choices.", - "title": "ChatCompletionOutputChoice", "items": { - "type": "object", - "properties": { - "finish_reason": { - "$ref": "#/definitions/FinishReason", - "description": "The reason why the generation was stopped." - }, - "index": { - "type": "integer", - "description": "The index of the choice in the list of choices." - }, - "message": { - "type": "object", - "properties": { - "role": { - "$ref": "/inference/schemas/chat-completion/input.json#/definitions/Role" - }, - "content": { - "type": "string", - "description": "The content of the chat completion message." - } - }, - "title": "ChatCompletionOutputChoiceMessage", - "required": ["content", "role"] - } - }, - "required": ["finish_reason", "index", "message"] + "$ref": "#/$defs/ChatCompletionOutputComplete" } }, "created": { "type": "integer", - "description": "The Unix timestamp (in seconds) of when the chat completion was created." + "format": "int64", + "example": "1706270835", + "minimum": 0 + }, + "id": { + "type": "string" + }, + "model": { + "type": "string", + "example": "mistralai/Mistral-7B-Instruct-v0.2" + }, + "object": { + "type": "string" + }, + "system_fingerprint": { + "type": "string" + }, + "usage": { + "$ref": "#/$defs/ChatCompletionOutputUsage" } }, - "required": ["choices", "created"], - "definitions": { - "FinishReason": { - "type": "string", - "title": "ChatCompletionFinishReason", - "oneOf": [ - { "const": "length", "description": "The generated sequence reached the maximum allowed length" }, - { "const": "eos_token", "description": "The model generated an end-of-sentence (EOS) token" }, - { "const": "stop_sequence", "description": "One of the sequence in stop_sequences was generated" } - ] + "$defs": { + "ChatCompletionOutputComplete": { + "type": "object", + "required": ["index", "message", "finish_reason"], + "properties": { + "finish_reason": { + "type": "string" + }, + "index": { + "type": "integer", + "format": "int32", + "minimum": 0 + }, + "logprobs": { + "allOf": [ + { + "$ref": "#/$defs/ChatCompletionOutputLogprobs" + } + ], + "nullable": true + }, + "message": { + "$ref": "#/$defs/ChatCompletionOutputMessage" + } + }, + "title": "ChatCompletionOutputComplete" + }, + "ChatCompletionOutputLogprobs": { + "type": "object", + "required": ["content"], + "properties": { + "content": { + "type": "array", + "items": { + "$ref": "#/$defs/ChatCompletionOutputLogprob" + } + } + }, + "title": "ChatCompletionOutputLogprobs" + }, + "ChatCompletionOutputLogprob": { + "type": "object", + "required": ["token", "logprob", "top_logprobs"], + "properties": { + "logprob": { + "type": "number", + "format": "float" + }, + "token": { + "type": "string" + }, + "top_logprobs": { + "type": "array", + "items": { + "$ref": "#/$defs/ChatCompletionOutputTopLogprob" + } + } + }, + "title": "ChatCompletionOutputLogprob" + }, + "ChatCompletionOutputTopLogprob": { + "type": "object", + "required": ["token", "logprob"], + "properties": { + "logprob": { + "type": "number", + "format": "float" + }, + "token": { + "type": "string" + } + }, + "title": "ChatCompletionOutputTopLogprob" + }, + "ChatCompletionOutputMessage": { + "type": "object", + "required": ["role"], + "properties": { + "content": { + "type": "string", + "example": "My name is David and I", + "nullable": true + }, + "name": { + "type": "string", + "example": "\"David\"", + "nullable": true + }, + "role": { + "type": "string", + "example": "user" + }, + "tool_calls": { + "type": "array", + "items": { + "$ref": "#/$defs/ChatCompletionOutputToolCall" + }, + "nullable": true + } + }, + "title": "ChatCompletionOutputMessage" + }, + "ChatCompletionOutputToolCall": { + "type": "object", + "required": ["id", "type", "function"], + "properties": { + "function": { + "$ref": "#/$defs/ChatCompletionOutputFunctionDefinition" + }, + "id": { + "type": "integer", + "format": "int32", + "minimum": 0 + }, + "type": { + "type": "string" + } + }, + "title": "ChatCompletionOutputToolCall" + }, + "ChatCompletionOutputFunctionDefinition": { + "type": "object", + "required": ["name", "arguments"], + "properties": { + "arguments": {}, + "description": { + "type": "string", + "nullable": true + }, + "name": { + "type": "string" + } + }, + "title": "ChatCompletionOutputFunctionDefinition" + }, + "ChatCompletionOutputUsage": { + "type": "object", + "required": ["prompt_tokens", "completion_tokens", "total_tokens"], + "properties": { + "completion_tokens": { + "type": "integer", + "format": "int32", + "minimum": 0 + }, + "prompt_tokens": { + "type": "integer", + "format": "int32", + "minimum": 0 + }, + "total_tokens": { + "type": "integer", + "format": "int32", + "minimum": 0 + } + }, + "title": "ChatCompletionOutputUsage" } } } diff --git a/packages/tasks/src/tasks/chat-completion/spec/output_stream.json b/packages/tasks/src/tasks/chat-completion/spec/output_stream.json deleted file mode 100644 index 727fbe100..000000000 --- a/packages/tasks/src/tasks/chat-completion/spec/output_stream.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "$id": "/inference/schemas/chat-completion/output_stream.json", - "$schema": "http://json-schema.org/draft-06/schema#", - "description": "Chat Completion Stream Output", - "title": "ChatCompletionStreamOutput", - "type": "object", - "properties": { - "choices": { - "type": "array", - "title": "ChatCompletionStreamOutputChoice", - "description": "A list of chat completion choices.", - "items": { - "type": "object", - "properties": { - "delta": { - "type": "object", - "title": "ChatCompletionStreamOutputDelta", - "description": "A chat completion delta generated by streamed model responses.", - "properties": { - "content": { - "type": "string", - "description": "The contents of the chunk message." - }, - "role": { - "type": "string", - "description": "The role of the author of this message." - } - } - }, - "finish_reason": { - "$ref": "/inference/schemas/chat-completion/output.json#/definitions/FinishReason", - "description": "The reason why the generation was stopped." - }, - "index": { - "type": "integer", - "description": "The index of the choice in the list of choices." - } - }, - "required": ["delta", "index"] - } - }, - "created": { - "type": "integer", - "description": "The Unix timestamp (in seconds) of when the chat completion was created. Each chunk has the same timestamp." - } - }, - "required": ["choices", "created"] -} diff --git a/packages/tasks/src/tasks/chat-completion/spec/stream_output.json b/packages/tasks/src/tasks/chat-completion/spec/stream_output.json new file mode 100644 index 000000000..72575d913 --- /dev/null +++ b/packages/tasks/src/tasks/chat-completion/spec/stream_output.json @@ -0,0 +1,170 @@ +{ + "$id": "/inference/schemas/chat-completion/stream_output.json", + "$schema": "http://json-schema.org/draft-06/schema#", + "description": "Chat Completion Stream Output.\n\nAuto-generated from TGI specs.\nFor more details, check out https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.", + "title": "ChatCompletionStreamOutput", + "type": "object", + "required": ["id", "object", "created", "model", "system_fingerprint", "choices"], + "properties": { + "choices": { + "type": "array", + "items": { + "$ref": "#/$defs/ChatCompletionStreamOutputChoice" + } + }, + "created": { + "type": "integer", + "format": "int64", + "example": "1706270978", + "minimum": 0 + }, + "id": { + "type": "string" + }, + "model": { + "type": "string", + "example": "mistralai/Mistral-7B-Instruct-v0.2" + }, + "object": { + "type": "string" + }, + "system_fingerprint": { + "type": "string" + } + }, + "$defs": { + "ChatCompletionStreamOutputChoice": { + "type": "object", + "required": ["index", "delta"], + "properties": { + "delta": { + "$ref": "#/$defs/ChatCompletionStreamOutputDelta" + }, + "finish_reason": { + "type": "string", + "nullable": true + }, + "index": { + "type": "integer", + "format": "int32", + "minimum": 0 + }, + "logprobs": { + "allOf": [ + { + "$ref": "#/$defs/ChatCompletionStreamOutputLogprobs" + } + ], + "nullable": true + } + }, + "title": "ChatCompletionStreamOutputChoice" + }, + "ChatCompletionStreamOutputDelta": { + "type": "object", + "required": ["role"], + "properties": { + "content": { + "type": "string", + "example": "What is Deep Learning?", + "nullable": true + }, + "role": { + "type": "string", + "example": "user" + }, + "tool_calls": { + "allOf": [ + { + "$ref": "#/$defs/ChatCompletionStreamOutputDeltaToolCall" + } + ], + "nullable": true + } + }, + "title": "ChatCompletionStreamOutputDelta" + }, + "ChatCompletionStreamOutputDeltaToolCall": { + "type": "object", + "required": ["index", "id", "type", "function"], + "properties": { + "function": { + "$ref": "#/$defs/ChatCompletionStreamOutputFunction" + }, + "id": { + "type": "string" + }, + "index": { + "type": "integer", + "format": "int32", + "minimum": 0 + }, + "type": { + "type": "string" + } + }, + "title": "ChatCompletionStreamOutputDeltaToolCall" + }, + "ChatCompletionStreamOutputFunction": { + "type": "object", + "required": ["arguments"], + "properties": { + "arguments": { + "type": "string" + }, + "name": { + "type": "string", + "nullable": true + } + }, + "title": "ChatCompletionStreamOutputFunction" + }, + "ChatCompletionStreamOutputLogprobs": { + "type": "object", + "required": ["content"], + "properties": { + "content": { + "type": "array", + "items": { + "$ref": "#/$defs/ChatCompletionStreamOutputLogprob" + } + } + }, + "title": "ChatCompletionStreamOutputLogprobs" + }, + "ChatCompletionStreamOutputLogprob": { + "type": "object", + "required": ["token", "logprob", "top_logprobs"], + "properties": { + "logprob": { + "type": "number", + "format": "float" + }, + "token": { + "type": "string" + }, + "top_logprobs": { + "type": "array", + "items": { + "$ref": "#/$defs/ChatCompletionStreamOutputTopLogprob" + } + } + }, + "title": "ChatCompletionStreamOutputLogprob" + }, + "ChatCompletionStreamOutputTopLogprob": { + "type": "object", + "required": ["token", "logprob"], + "properties": { + "logprob": { + "type": "number", + "format": "float" + }, + "token": { + "type": "string" + } + }, + "title": "ChatCompletionStreamOutputTopLogprob" + } + } +} diff --git a/packages/tasks/src/tasks/feature-extraction/about.md b/packages/tasks/src/tasks/feature-extraction/about.md index 60c7c7ed3..6ead09eeb 100644 --- a/packages/tasks/src/tasks/feature-extraction/about.md +++ b/packages/tasks/src/tasks/feature-extraction/about.md @@ -1,20 +1,13 @@ -## About the Task - -Feature extraction is the task of building features intended to be informative from a given dataset, -facilitating the subsequent learning and generalization steps in various domains of machine learning. - ## Use Cases -Feature extraction can be used to do transfer learning in natural language processing, computer vision and audio models. +Models trained on a specific dataset can learn features about the data. For instance, a model trained on an English poetry dataset learns English grammar at a very high level. This information can be transferred to a new model that is going to be trained on tweets. This process of extracting features and transferring to another model is called transfer learning. One can pass their dataset through a feature extraction pipeline and feed the result to a classifier. ## Inference -#### Feature Extraction - ```python from transformers import pipeline checkpoint = "facebook/bart-base" -feature_extractor = pipeline("feature-extraction",framework="pt",model=checkpoint) +feature_extractor = pipeline("feature-extraction", framework="pt", model=checkpoint) text = "Transformers is an awesome library!" #Reducing along the first dimension to get a 768 dimensional array diff --git a/packages/tasks/src/tasks/feature-extraction/data.ts b/packages/tasks/src/tasks/feature-extraction/data.ts index c06e942d8..4d620fec2 100644 --- a/packages/tasks/src/tasks/feature-extraction/data.ts +++ b/packages/tasks/src/tasks/feature-extraction/data.ts @@ -41,8 +41,7 @@ const taskData: TaskDataCustom = { }, ], spaces: [], - summary: - "Feature extraction refers to the process of transforming raw data into numerical features that can be processed while preserving the information in the original dataset.", + summary: "Feature extraction is the task of extracting features learnt in a model.", widgetModels: ["facebook/bart-base"], }; diff --git a/packages/tasks/src/tasks/image-feature-extraction/about.md b/packages/tasks/src/tasks/image-feature-extraction/about.md new file mode 100644 index 000000000..9a968b106 --- /dev/null +++ b/packages/tasks/src/tasks/image-feature-extraction/about.md @@ -0,0 +1,23 @@ +## Use Cases + +### Transfer Learning + +Models trained on a specific dataset can learn features about the data. For instance, a model trained on a car classification dataset learns to recognize edges and curves on a very high level and car-specific features on a low level. This information can be transferred to a new model that is going to be trained on classifying trucks. This process of extracting features and transferring to another model is called transfer learning. + +### Similarity + +Features extracted from models contain semantically meaningful information about the world. These features can be used to detect the similarity between two images. Assume there are two images: a photo of a stray cat in a street setting and a photo of a cat at home. These images both contain cats, and the features will contain the information that there's a cat in the image. Thus, comparing the features of a stray cat photo to the features of a domestic cat photo will result in higher similarity compared to any other image that doesn't contain any cats. + +## Inference + +```python +import torch +from transformers import pipeline + +pipe = pipeline(task="image-feature-extraction", model_name="google/vit-base-patch16-384", framework="pt", pool=True) +pipe("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/cats.png") + +feature_extractor(text,return_tensors = "pt")[0].numpy().mean(axis=0) + +'[[[0.21236686408519745, 1.0919708013534546, 0.8512550592422485, ...]]]' +``` diff --git a/packages/tasks/src/tasks/image-feature-extraction/data.ts b/packages/tasks/src/tasks/image-feature-extraction/data.ts new file mode 100644 index 000000000..8f207ccf1 --- /dev/null +++ b/packages/tasks/src/tasks/image-feature-extraction/data.ts @@ -0,0 +1,51 @@ +import type { TaskDataCustom } from ".."; + +const taskData: TaskDataCustom = { + datasets: [ + { + description: + "ImageNet-1K is a image classification dataset in which images are used to train image-feature-extraction models.", + id: "imagenet-1k", + }, + ], + demo: { + inputs: [ + { + filename: "mask-generation-input.png", + type: "img", + }, + ], + outputs: [ + { + table: [ + ["Dimension 1", "Dimension 2", "Dimension 3"], + ["0.21236686408519745", "1.0919708013534546", "0.8512550592422485"], + ["0.809657871723175", "-0.18544459342956543", "-0.7851548194885254"], + ["1.3103108406066895", "-0.2479034662246704", "-0.9107287526130676"], + ["1.8536205291748047", "-0.36419737339019775", "0.09717650711536407"], + ], + type: "tabular", + }, + ], + }, + metrics: [], + models: [ + { + description: "A powerful image feature extraction model.", + id: "timm/vit_large_patch14_dinov2.lvd142m", + }, + { + description: "A strong image feature extraction model.", + id: "google/vit-base-patch16-224-in21k", + }, + { + description: "A robust image feature extraction models.", + id: "facebook/dino-vitb16", + }, + ], + spaces: [], + summary: "Image feature extraction is the task of extracting features learnt in a computer vision model.", + widgetModels: [], +}; + +export default taskData; diff --git a/packages/tasks/src/tasks/index.ts b/packages/tasks/src/tasks/index.ts index f0a046e53..e3f60b89f 100644 --- a/packages/tasks/src/tasks/index.ts +++ b/packages/tasks/src/tasks/index.ts @@ -8,6 +8,7 @@ import documentQuestionAnswering from "./document-question-answering/data"; import featureExtraction from "./feature-extraction/data"; import fillMask from "./fill-mask/data"; import imageClassification from "./image-classification/data"; +import imageFeatureExtraction from "./image-feature-extraction/data"; import imageToImage from "./image-to-image/data"; import imageToText from "./image-to-text/data"; import imageSegmentation from "./image-segmentation/data"; @@ -42,9 +43,8 @@ export type { ChatCompletionInput, ChatCompletionInputMessage, ChatCompletionOutput, - ChatCompletionOutputChoice, - ChatCompletionFinishReason, - ChatCompletionOutputChoiceMessage, + ChatCompletionOutputComplete, + ChatCompletionOutputMessage, ChatCompletionStreamOutput, ChatCompletionStreamOutputChoice, ChatCompletionStreamOutputDelta, @@ -84,14 +84,16 @@ export type { TextClassificationParameters, } from "./text-classification/inference"; export type { - TextGenerationFinishReason, - TextGenerationPrefillToken, + TextGenerationOutputFinishReason, + TextGenerationOutputPrefillToken, TextGenerationInput, TextGenerationOutput, TextGenerationOutputDetails, - TextGenerationParameters, - TextGenerationOutputSequenceDetails, + TextGenerationInputGenerateParameters, + TextGenerationOutputBestOfSequence, TextGenerationOutputToken, + TextGenerationStreamOutputStreamDetails, + TextGenerationStreamOutput, } from "./text-generation/inference"; export type * from "./video-classification/inference"; export type * from "./visual-question-answering/inference"; @@ -198,6 +200,7 @@ export const TASKS_DATA: Record = { "fill-mask": getData("fill-mask", fillMask), "graph-ml": undefined, "image-classification": getData("image-classification", imageClassification), + "image-feature-extraction": getData("image-feature-extraction", imageFeatureExtraction), "image-segmentation": getData("image-segmentation", imageSegmentation), "image-text-to-text": undefined, "image-to-image": getData("image-to-image", imageToImage), @@ -237,7 +240,6 @@ export const TASKS_DATA: Record = { "zero-shot-object-detection": getData("zero-shot-object-detection", zeroShotObjectDetection), "text-to-3d": getData("text-to-3d", placeholder), "image-to-3d": getData("image-to-3d", placeholder), - "image-feature-extraction": getData("image-feature-extraction", placeholder), } as const; export interface ExampleRepo { diff --git a/packages/tasks/src/tasks/text-generation/inference.ts b/packages/tasks/src/tasks/text-generation/inference.ts index 32419fd7e..37395c580 100644 --- a/packages/tasks/src/tasks/text-generation/inference.ts +++ b/packages/tasks/src/tasks/text-generation/inference.ts @@ -5,246 +5,134 @@ */ /** - * Inputs for Text Generation inference + * Text Generation Input. + * + * Auto-generated from TGI specs. + * For more details, check out + * https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts. */ export interface TextGenerationInput { - /** - * The text to initialize generation with - */ inputs: string; - /** - * Additional inference parameters - */ - parameters?: TextGenerationParameters; - /** - * Whether to stream output tokens - */ + parameters?: TextGenerationInputGenerateParameters; stream?: boolean; [property: string]: unknown; } -/** - * Additional inference parameters - * - * Additional inference parameters for Text Generation - */ -export interface TextGenerationParameters { - /** - * The number of sampling queries to run. Only the best one (in terms of total logprob) will - * be returned. - */ +export interface TextGenerationInputGenerateParameters { best_of?: number; - /** - * Whether or not to output decoder input details - */ decoder_input_details?: boolean; - /** - * Whether or not to output details - */ details?: boolean; - /** - * Whether to use logits sampling instead of greedy decoding when generating new tokens. - */ do_sample?: boolean; - /** - * The maximum number of tokens to generate. - */ + frequency_penalty?: number; + grammar?: TextGenerationInputGrammarType; max_new_tokens?: number; - /** - * The parameter for repetition penalty. A value of 1.0 means no penalty. See [this - * paper](https://hf.co/papers/1909.05858) for more details. - */ repetition_penalty?: number; - /** - * Whether to prepend the prompt to the generated text. - */ return_full_text?: boolean; - /** - * The random sampling seed. - */ seed?: number; - /** - * Stop generating tokens if a member of `stop_sequences` is generated. - */ - stop_sequences?: string[]; - /** - * The value used to modulate the logits distribution. - */ + stop?: string[]; temperature?: number; - /** - * The number of highest probability vocabulary tokens to keep for top-k-filtering. - */ top_k?: number; - /** - * If set to < 1, only the smallest set of most probable tokens with probabilities that add - * up to `top_p` or higher are kept for generation. - */ + top_n_tokens?: number; top_p?: number; - /** - * Truncate input tokens to the given size. - */ truncate?: number; - /** - * Typical Decoding mass. See [Typical Decoding for Natural Language - * Generation](https://hf.co/papers/2202.00666) for more information - */ typical_p?: number; + watermark?: boolean; + [property: string]: unknown; +} + +export interface TextGenerationInputGrammarType { + type: Type; /** - * Watermarking with [A Watermark for Large Language Models](https://hf.co/papers/2301.10226) + * A string that represents a [JSON Schema](https://json-schema.org/). + * + * JSON Schema is a declarative language that allows to annotate JSON documents + * with types and descriptions. */ - watermark?: boolean; + value: unknown; [property: string]: unknown; } +export type Type = "json" | "regex"; + /** - * Outputs for Text Generation inference + * Text Generation Output. + * + * Auto-generated from TGI specs. + * For more details, check out + * https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts. */ export interface TextGenerationOutput { - /** - * When enabled, details about the generation - */ details?: TextGenerationOutputDetails; - /** - * The generated text - */ generated_text: string; [property: string]: unknown; } -/** - * When enabled, details about the generation - */ export interface TextGenerationOutputDetails { - /** - * Details about additional sequences when best_of is provided - */ - best_of_sequences?: TextGenerationOutputSequenceDetails[]; - /** - * The reason why the generation was stopped. - */ - finish_reason: TextGenerationFinishReason; - /** - * The number of generated tokens - */ + best_of_sequences?: TextGenerationOutputBestOfSequence[]; + finish_reason: TextGenerationOutputFinishReason; generated_tokens: number; - prefill: TextGenerationPrefillToken[]; - /** - * The random seed used for generation - */ + prefill: TextGenerationOutputPrefillToken[]; seed?: number; - /** - * The generated tokens and associated details - */ tokens: TextGenerationOutputToken[]; - /** - * Most likely tokens - */ top_tokens?: Array; [property: string]: unknown; } -export interface TextGenerationOutputSequenceDetails { - finish_reason: TextGenerationFinishReason; - /** - * The generated text - */ +export interface TextGenerationOutputBestOfSequence { + finish_reason: TextGenerationOutputFinishReason; generated_text: string; - /** - * The number of generated tokens - */ generated_tokens: number; - prefill: TextGenerationPrefillToken[]; - /** - * The random seed used for generation - */ + prefill: TextGenerationOutputPrefillToken[]; seed?: number; - /** - * The generated tokens and associated details - */ tokens: TextGenerationOutputToken[]; - /** - * Most likely tokens - */ top_tokens?: Array; [property: string]: unknown; } -/** - * The reason why the generation was stopped. - * - * length: The generated sequence reached the maximum allowed length - * - * eos_token: The model generated an end-of-sentence (EOS) token - * - * stop_sequence: One of the sequence in stop_sequences was generated - */ -export type TextGenerationFinishReason = "length" | "eos_token" | "stop_sequence"; +export type TextGenerationOutputFinishReason = "length" | "eos_token" | "stop_sequence"; -export interface TextGenerationPrefillToken { +export interface TextGenerationOutputPrefillToken { id: number; logprob: number; - /** - * The text associated with that token - */ text: string; [property: string]: unknown; } -/** - * Generated token. - */ export interface TextGenerationOutputToken { id: number; - logprob?: number; - /** - * Whether or not that token is a special one - */ + logprob: number; special: boolean; - /** - * The text associated with that token - */ text: string; [property: string]: unknown; } /** - * Text Generation Stream Output + * Text Generation Stream Output. + * + * Auto-generated from TGI specs. + * For more details, check out + * https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts. */ export interface TextGenerationStreamOutput { - /** - * Generation details. Only available when the generation is finished. - */ - details?: TextGenerationStreamDetails; - /** - * The complete generated text. Only available when the generation is finished. - */ + details?: TextGenerationStreamOutputStreamDetails; generated_text?: string; - /** - * The token index within the stream. Optional to support older clients that omit it. - */ - index?: number; - /** - * Generated token. - */ - token: TextGenerationOutputToken; + index: number; + token: TextGenerationStreamOutputToken; + top_tokens?: TextGenerationStreamOutputToken[]; [property: string]: unknown; } -/** - * Generation details. Only available when the generation is finished. - */ -export interface TextGenerationStreamDetails { - /** - * The reason why the generation was stopped. - */ - finish_reason: TextGenerationFinishReason; - /** - * The number of generated tokens - */ +export interface TextGenerationStreamOutputStreamDetails { + finish_reason: TextGenerationOutputFinishReason; generated_tokens: number; - /** - * The random seed used for generation - */ - seed: number; + seed?: number; + [property: string]: unknown; +} + +export interface TextGenerationStreamOutputToken { + id: number; + logprob: number; + special: boolean; + text: string; [property: string]: unknown; } diff --git a/packages/tasks/src/tasks/text-generation/spec/input.json b/packages/tasks/src/tasks/text-generation/spec/input.json index e4c8e4f16..0742cefe0 100644 --- a/packages/tasks/src/tasks/text-generation/spec/input.json +++ b/packages/tasks/src/tasks/text-generation/spec/input.json @@ -1,94 +1,195 @@ { "$id": "/inference/schemas/text-generation/input.json", "$schema": "http://json-schema.org/draft-06/schema#", - "description": "Inputs for Text Generation inference", + "description": "Text Generation Input.\n\nAuto-generated from TGI specs.\nFor more details, check out https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.", "title": "TextGenerationInput", "type": "object", + "required": ["inputs"], "properties": { "inputs": { - "description": "The text to initialize generation with", - "type": "string" + "type": "string", + "example": "My name is Olivier and I" }, "parameters": { - "description": "Additional inference parameters", - "$ref": "#/$defs/TextGenerationParameters" + "$ref": "#/$defs/TextGenerationInputGenerateParameters" }, "stream": { - "description": "Whether to stream output tokens", - "type": "boolean" + "type": "boolean", + "default": "false" } }, "$defs": { - "TextGenerationParameters": { - "title": "TextGenerationParameters", - "description": "Additional inference parameters for Text Generation", + "TextGenerationInputGenerateParameters": { "type": "object", "properties": { "best_of": { "type": "integer", - "description": "The number of sampling queries to run. Only the best one (in terms of total logprob) will be returned." + "default": "null", + "example": 1, + "nullable": true, + "minimum": 0, + "exclusiveMinimum": 0 }, "decoder_input_details": { "type": "boolean", - "description": "Whether or not to output decoder input details" + "default": "false" }, "details": { "type": "boolean", - "description": "Whether or not to output details" + "default": "true" }, "do_sample": { "type": "boolean", - "description": "Whether to use logits sampling instead of greedy decoding when generating new tokens." + "default": "false", + "example": true + }, + "frequency_penalty": { + "type": "number", + "format": "float", + "default": "null", + "example": 0.1, + "nullable": true, + "exclusiveMinimum": -2 + }, + "grammar": { + "allOf": [ + { + "$ref": "#/$defs/TextGenerationInputGrammarType" + } + ], + "default": "null", + "nullable": true }, "max_new_tokens": { "type": "integer", - "description": "The maximum number of tokens to generate." + "format": "int32", + "default": "100", + "example": "20", + "nullable": true, + "minimum": 0 }, "repetition_penalty": { "type": "number", - "description": "The parameter for repetition penalty. A value of 1.0 means no penalty. See [this paper](https://hf.co/papers/1909.05858) for more details." + "format": "float", + "default": "null", + "example": 1.03, + "nullable": true, + "exclusiveMinimum": 0 }, "return_full_text": { "type": "boolean", - "description": "Whether to prepend the prompt to the generated text." + "default": "null", + "example": false, + "nullable": true }, "seed": { "type": "integer", - "description": "The random sampling seed." + "format": "int64", + "default": "null", + "example": "null", + "nullable": true, + "minimum": 0, + "exclusiveMinimum": 0 }, - "stop_sequences": { + "stop": { "type": "array", "items": { "type": "string" }, - "description": "Stop generating tokens if a member of `stop_sequences` is generated." + "example": ["photographer"], + "maxItems": 4 }, "temperature": { "type": "number", - "description": "The value used to modulate the logits distribution." + "format": "float", + "default": "null", + "example": 0.5, + "nullable": true, + "exclusiveMinimum": 0 }, "top_k": { "type": "integer", - "description": "The number of highest probability vocabulary tokens to keep for top-k-filtering." + "format": "int32", + "default": "null", + "example": 10, + "nullable": true, + "exclusiveMinimum": 0 + }, + "top_n_tokens": { + "type": "integer", + "format": "int32", + "default": "null", + "example": 5, + "nullable": true, + "minimum": 0, + "exclusiveMinimum": 0 }, "top_p": { "type": "number", - "description": "If set to < 1, only the smallest set of most probable tokens with probabilities that add up to `top_p` or higher are kept for generation." + "format": "float", + "default": "null", + "example": 0.95, + "nullable": true, + "maximum": 1, + "exclusiveMinimum": 0 }, "truncate": { "type": "integer", - "description": "Truncate input tokens to the given size." + "default": "null", + "example": "null", + "nullable": true, + "minimum": 0 }, "typical_p": { "type": "number", - "description": "Typical Decoding mass. See [Typical Decoding for Natural Language Generation](https://hf.co/papers/2202.00666) for more information" + "format": "float", + "default": "null", + "example": 0.95, + "nullable": true, + "maximum": 1, + "exclusiveMinimum": 0 }, "watermark": { "type": "boolean", - "description": "Watermarking with [A Watermark for Large Language Models](https://hf.co/papers/2301.10226)" + "default": "false", + "example": true } - } + }, + "title": "TextGenerationInputGenerateParameters" + }, + "TextGenerationInputGrammarType": { + "oneOf": [ + { + "type": "object", + "required": ["type", "value"], + "properties": { + "type": { + "type": "string", + "enum": ["json"] + }, + "value": { + "description": "A string that represents a [JSON Schema](https://json-schema.org/).\n\nJSON Schema is a declarative language that allows to annotate JSON documents\nwith types and descriptions." + } + } + }, + { + "type": "object", + "required": ["type", "value"], + "properties": { + "type": { + "type": "string", + "enum": ["regex"] + }, + "value": { + "type": "string" + } + } + } + ], + "discriminator": { + "propertyName": "type" + }, + "title": "TextGenerationInputGrammarType" } - }, - "required": ["inputs"] + } } diff --git a/packages/tasks/src/tasks/text-generation/spec/output.json b/packages/tasks/src/tasks/text-generation/spec/output.json index 467795c38..cb6ef3f99 100644 --- a/packages/tasks/src/tasks/text-generation/spec/output.json +++ b/packages/tasks/src/tasks/text-generation/spec/output.json @@ -1,165 +1,179 @@ { "$id": "/inference/schemas/text-generation/output.json", "$schema": "http://json-schema.org/draft-06/schema#", - "description": "Outputs for Text Generation inference", + "description": "Text Generation Output.\n\nAuto-generated from TGI specs.\nFor more details, check out https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.", "title": "TextGenerationOutput", "type": "object", + "required": ["generated_text"], "properties": { + "details": { + "allOf": [ + { + "$ref": "#/$defs/TextGenerationOutputDetails" + } + ], + "nullable": true + }, "generated_text": { "type": "string", - "description": "The generated text" - }, - "details": { - "$ref": "#/$defs/Details", - "description": "When enabled, details about the generation" + "example": "test" } }, - "required": ["generated_text"], "$defs": { - "FinishReason": { - "type": "string", - "title": "TextGenerationFinishReason", - "description": "The reason why the generation was stopped.", - "oneOf": [ - { "const": "length", "description": "length: The generated sequence reached the maximum allowed length" }, - { "const": "eos_token", "description": "eos_token: The model generated an end-of-sentence (EOS) token" }, - { - "const": "stop_sequence", - "description": "stop_sequence: One of the sequence in stop_sequences was generated" - } - ] - }, - "PrefillToken": { - "title": "TextGenerationPrefillToken", + "TextGenerationOutputDetails": { "type": "object", + "required": ["finish_reason", "generated_tokens", "prefill", "tokens"], "properties": { - "id": { - "type": "integer" - }, - "logprob": { - "type": "number" - }, - "text": { - "type": "string", - "description": "The text associated with that token" - } - }, - "required": ["id", "logprob", "text"] - }, - "Token": { - "type": "object", - "title": "TextGenerationOutputToken", - "properties": { - "id": { - "type": "integer" - }, - "logprob": { - "type": "number" - }, - "special": { - "type": "boolean", - "description": "Whether or not that token is a special one" + "best_of_sequences": { + "type": "array", + "items": { + "$ref": "#/$defs/TextGenerationOutputBestOfSequence" + }, + "nullable": true }, - "text": { - "type": "string", - "description": "The text associated with that token" - } - }, - "required": ["id", "special", "text"] - }, - "Details": { - "type": "object", - "title": "TextGenerationOutputDetails", - "properties": { "finish_reason": { - "$ref": "#/$defs/FinishReason", - "description": "The reason why the generation was stopped." + "$ref": "#/$defs/TextGenerationOutputFinishReason" }, "generated_tokens": { "type": "integer", - "description": "The number of generated tokens" + "format": "int32", + "example": 1, + "minimum": 0 }, "prefill": { "type": "array", "items": { - "$ref": "#/$defs/PrefillToken" + "$ref": "#/$defs/TextGenerationOutputPrefillToken" } }, "seed": { "type": "integer", - "description": "The random seed used for generation" + "format": "int64", + "example": 42, + "nullable": true, + "minimum": 0 }, "tokens": { "type": "array", - "description": "The generated tokens and associated details", "items": { - "$ref": "#/$defs/Token" + "$ref": "#/$defs/TextGenerationOutputToken" } }, "top_tokens": { "type": "array", - "description": "Most likely tokens", "items": { "type": "array", "items": { - "$ref": "#/$defs/Token" + "$ref": "#/$defs/TextGenerationOutputToken" } } - }, - "best_of_sequences": { - "type": "array", - "description": "Details about additional sequences when best_of is provided", - "items": { - "$ref": "#/$defs/SequenceDetails" - } } }, - "required": ["finish_reason", "generated_tokens", "prefill", "tokens"] + "title": "TextGenerationOutputDetails" }, - "SequenceDetails": { + "TextGenerationOutputBestOfSequence": { "type": "object", - "title": "TextGenerationOutputSequenceDetails", + "required": ["generated_text", "finish_reason", "generated_tokens", "prefill", "tokens"], "properties": { + "finish_reason": { + "$ref": "#/$defs/TextGenerationOutputFinishReason" + }, "generated_text": { "type": "string", - "description": "The generated text" - }, - "finish_reason": { - "$ref": "#/$defs/FinishReason" + "example": "test" }, "generated_tokens": { "type": "integer", - "description": "The number of generated tokens" + "format": "int32", + "example": 1, + "minimum": 0 }, "prefill": { "type": "array", "items": { - "$ref": "#/$defs/PrefillToken" + "$ref": "#/$defs/TextGenerationOutputPrefillToken" } }, "seed": { "type": "integer", - "description": "The random seed used for generation" + "format": "int64", + "example": 42, + "nullable": true, + "minimum": 0 }, "tokens": { "type": "array", - "description": "The generated tokens and associated details", "items": { - "$ref": "#/$defs/Token" + "$ref": "#/$defs/TextGenerationOutputToken" } }, "top_tokens": { "type": "array", - "description": "Most likely tokens", "items": { "type": "array", "items": { - "$ref": "#/$defs/Token" + "$ref": "#/$defs/TextGenerationOutputToken" } } } }, - "required": ["generated_text", "finish_reason", "generated_tokens", "prefill", "tokens"] + "title": "TextGenerationOutputBestOfSequence" + }, + "TextGenerationOutputFinishReason": { + "type": "string", + "enum": ["length", "eos_token", "stop_sequence"], + "example": "Length", + "title": "TextGenerationOutputFinishReason" + }, + "TextGenerationOutputPrefillToken": { + "type": "object", + "required": ["id", "text", "logprob"], + "properties": { + "id": { + "type": "integer", + "format": "int32", + "example": 0, + "minimum": 0 + }, + "logprob": { + "type": "number", + "format": "float", + "example": -0.34, + "nullable": true + }, + "text": { + "type": "string", + "example": "test" + } + }, + "title": "TextGenerationOutputPrefillToken" + }, + "TextGenerationOutputToken": { + "type": "object", + "required": ["id", "text", "logprob", "special"], + "properties": { + "id": { + "type": "integer", + "format": "int32", + "example": 0, + "minimum": 0 + }, + "logprob": { + "type": "number", + "format": "float", + "example": -0.34, + "nullable": true + }, + "special": { + "type": "boolean", + "example": "false" + }, + "text": { + "type": "string", + "example": "test" + } + }, + "title": "TextGenerationOutputToken" } } } diff --git a/packages/tasks/src/tasks/text-generation/spec/output_stream.json b/packages/tasks/src/tasks/text-generation/spec/output_stream.json deleted file mode 100644 index 2c58ec042..000000000 --- a/packages/tasks/src/tasks/text-generation/spec/output_stream.json +++ /dev/null @@ -1,47 +0,0 @@ -{ - "$id": "/inference/schemas/text-generation/output.json", - "$schema": "http://json-schema.org/draft-06/schema#", - "description": "Text Generation Stream Output", - "title": "TextGenerationStreamOutput", - "type": "object", - "properties": { - "token": { - "$ref": "#/$defs/Token", - "description": "Generated token." - }, - "index": { - "type": "integer", - "description": "The token index within the stream. Optional to support older clients that omit it." - }, - "generated_text": { - "type": "string", - "description": "The complete generated text. Only available when the generation is finished." - }, - "details": { - "$ref": "#/$defs/StreamDetails", - "description": "Generation details. Only available when the generation is finished." - } - }, - "required": ["token"], - "$defs": { - "StreamDetails": { - "type": "object", - "title": "TextGenerationStreamDetails", - "properties": { - "finish_reason": { - "$ref": "#/$defs/FinishReason", - "description": "The reason why the generation was stopped." - }, - "generated_tokens": { - "type": "integer", - "description": "The number of generated tokens" - }, - "seed": { - "type": "integer", - "description": "The random seed used for generation" - } - }, - "required": ["finish_reason", "generated_tokens", "seed"] - } - } -} diff --git a/packages/tasks/src/tasks/text-generation/spec/stream_output.json b/packages/tasks/src/tasks/text-generation/spec/stream_output.json new file mode 100644 index 000000000..e1ef8a0dc --- /dev/null +++ b/packages/tasks/src/tasks/text-generation/spec/stream_output.json @@ -0,0 +1,97 @@ +{ + "$id": "/inference/schemas/text-generation/stream_output.json", + "$schema": "http://json-schema.org/draft-06/schema#", + "description": "Text Generation Stream Output.\n\nAuto-generated from TGI specs.\nFor more details, check out https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.", + "title": "TextGenerationStreamOutput", + "type": "object", + "required": ["index", "token"], + "properties": { + "details": { + "allOf": [ + { + "$ref": "#/$defs/TextGenerationStreamOutputStreamDetails" + } + ], + "default": "null", + "nullable": true + }, + "generated_text": { + "type": "string", + "default": "null", + "example": "test", + "nullable": true + }, + "index": { + "type": "integer", + "format": "int32", + "minimum": 0 + }, + "token": { + "$ref": "#/$defs/TextGenerationStreamOutputToken" + }, + "top_tokens": { + "type": "array", + "items": { + "$ref": "#/$defs/TextGenerationStreamOutputToken" + } + } + }, + "$defs": { + "TextGenerationStreamOutputStreamDetails": { + "type": "object", + "required": ["finish_reason", "generated_tokens"], + "properties": { + "finish_reason": { + "$ref": "#/$defs/TextGenerationStreamOutputFinishReason" + }, + "generated_tokens": { + "type": "integer", + "format": "int32", + "example": 1, + "minimum": 0 + }, + "seed": { + "type": "integer", + "format": "int64", + "example": 42, + "nullable": true, + "minimum": 0 + } + }, + "title": "TextGenerationStreamOutputStreamDetails" + }, + "TextGenerationStreamOutputFinishReason": { + "type": "string", + "enum": ["length", "eos_token", "stop_sequence"], + "example": "Length", + "title": "TextGenerationStreamOutputFinishReason" + }, + "TextGenerationStreamOutputToken": { + "type": "object", + "required": ["id", "text", "logprob", "special"], + "properties": { + "id": { + "type": "integer", + "format": "int32", + "example": 0, + "minimum": 0 + }, + "logprob": { + "type": "number", + "format": "float", + "example": -0.34, + "nullable": true + }, + "special": { + "type": "boolean", + "example": "false" + }, + "text": { + "type": "string", + "example": "test" + } + }, + "title": "TextGenerationStreamOutputToken" + } + } +} diff --git a/packages/tasks/src/tokenizer-data.ts b/packages/tasks/src/tokenizer-data.ts index b08926ccc..6be41e8f6 100644 --- a/packages/tasks/src/tokenizer-data.ts +++ b/packages/tasks/src/tokenizer-data.ts @@ -28,5 +28,5 @@ export type SpecialTokensMap = { */ export interface TokenizerConfig extends SpecialTokensMap { use_default_system_prompt?: boolean; - chat_template?: string; + chat_template?: string | Array<{ name: string; template: string }>; } diff --git a/packages/widgets/README.md b/packages/widgets/README.md index b304335e4..b64da7918 100644 --- a/packages/widgets/README.md +++ b/packages/widgets/README.md @@ -16,7 +16,14 @@ You can run the demo locally: ```console pnpm install -pnpm dev +pnpm dev --open +``` + +If you are submitting a PR, make sure that you run `format` & `lint` before submitting the PR: + +```console +pnpm format +pnpm lint ``` If you want to try the "Sign-in with HF" feature locally, you will need to https://huggingface.co/settings/applications/new an OAuth application with `"openid"`, `"profile"` and `"inference-api"` scopes and `http://localhost:5173/auth/callback/huggingface` as the redirect URL. @@ -29,3 +36,16 @@ OAUTH_CLIENT_SECRET=... ``` If you want to try the "Sign-in with HF" feature in a Space, you can just duplicate https://huggingface.co/spaces/huggingfacejs/inference-widgets, it should work out of the box thanks to the metadata in the `README.md` file. + +## Testing for moon (for huggingface admins) + +```console +pnpm i +pnpm build +``` + +And then inside moon, run the following command for both `server` & `front`: + +```console +npm i --save @huggingface/widgets@ +``` diff --git a/packages/widgets/package.json b/packages/widgets/package.json index faa52cf99..c596da6bf 100644 --- a/packages/widgets/package.json +++ b/packages/widgets/package.json @@ -1,7 +1,7 @@ { "name": "@huggingface/widgets", "packageManager": "pnpm@8.10.5", - "version": "0.2.4", + "version": "0.2.9", "publishConfig": { "access": "public" }, diff --git a/packages/widgets/src/lib/components/Icons/IconHuggingFace.svelte b/packages/widgets/src/lib/components/Icons/IconHuggingFace.svelte new file mode 100644 index 000000000..325aec971 --- /dev/null +++ b/packages/widgets/src/lib/components/Icons/IconHuggingFace.svelte @@ -0,0 +1,56 @@ + + + diff --git a/packages/widgets/src/lib/components/InferenceWidget/InferenceWidget.svelte b/packages/widgets/src/lib/components/InferenceWidget/InferenceWidget.svelte index af923a5fa..493064b6d 100644 --- a/packages/widgets/src/lib/components/InferenceWidget/InferenceWidget.svelte +++ b/packages/widgets/src/lib/components/InferenceWidget/InferenceWidget.svelte @@ -28,6 +28,7 @@ import ZeroShotImageClassificationWidget from "./widgets/ZeroShotImageClassificationWidget/ZeroShotImageClassificationWidget.svelte"; import type { WidgetType } from "@huggingface/tasks"; import WidgetInfo from "./shared/WidgetInfo/WidgetInfo.svelte"; + import { isLoggedIn as isLoggedInStore } from "./stores.js"; export let apiToken: WidgetProps["apiToken"] = undefined; export let callApiOnMount = false; @@ -85,6 +86,8 @@ ? WIDGET_COMPONENTS[model.pipeline_tag as keyof typeof WIDGET_COMPONENTS] : undefined; + $isLoggedInStore = isLoggedIn; + // prettier-ignore $: widgetProps = ({ apiToken, diff --git a/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetAddSentenceBtn/WidgetAddSentenceBtn.svelte b/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetAddSentenceBtn/WidgetAddSentenceBtn.svelte index 7eca76ae4..3495b0abd 100644 --- a/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetAddSentenceBtn/WidgetAddSentenceBtn.svelte +++ b/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetAddSentenceBtn/WidgetAddSentenceBtn.svelte @@ -1,12 +1,11 @@ {#if !isDisabled} - {/if} diff --git a/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetDropzone/WidgetDropzone.svelte b/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetDropzone/WidgetDropzone.svelte index bcf052d26..c936d2240 100644 --- a/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetDropzone/WidgetDropzone.svelte +++ b/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetDropzone/WidgetDropzone.svelte @@ -1,6 +1,9 @@ @@ -54,36 +64,43 @@ style="display: none;" type="file" /> - -
{ - fileInput.click(); - }} - on:dragenter={() => { - isDragging = true; - }} - on:dragleave={() => { - isDragging = false; - }} - on:dragover|preventDefault - on:drop|preventDefault={onDrop} -> - {#if !imgSrc && !isDisabled} - {label} - {:else} -
- -
- {/if} - {#if isLoading} -
- -
- {/if} -
+ on:click={() => { + if (!$isLoggedIn) { + popOverOpen = true; + return; + } + fileInput.click(); + }} + on:dragenter={() => { + isDragging = true; + }} + on:dragleave={() => { + isDragging = false; + }} + on:dragover|preventDefault + on:drop|preventDefault={onDrop} + > + {#if !imgSrc && !isDisabled} + {label} + {:else} +
+ +
+ {/if} + {#if isLoading} +
+ +
+ {/if} + + diff --git a/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetFileInput/WidgetFileInput.svelte b/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetFileInput/WidgetFileInput.svelte index c278b287e..3ff740aa9 100644 --- a/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetFileInput/WidgetFileInput.svelte +++ b/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetFileInput/WidgetFileInput.svelte @@ -1,56 +1,74 @@ {#if !isDisabled} -
{ - isDragging = true; - }} - on:dragover|preventDefault - on:dragleave={() => { - isDragging = false; - }} - on:drop|preventDefault={(e) => { - isDragging = false; - fileInput.files = e.dataTransfer?.files ?? null; - onChange(); - }} - > - -
+ + + {/if} diff --git a/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetFooter/WidgetFooter.svelte b/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetFooter/WidgetFooter.svelte index f090b7eb4..d8b3c9baa 100644 --- a/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetFooter/WidgetFooter.svelte +++ b/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetFooter/WidgetFooter.svelte @@ -1,9 +1,9 @@
@@ -28,18 +29,26 @@ JSON Output {/if} - + + +
{#if outputJson && isOutputJsonVisible}
+	import { createEventDispatcher } from "svelte";
 	import { onCmdEnter } from "../../../../utils/ViewUtils.js";
 	import WidgetSubmitBtn from "../WidgetSubmitBtn/WidgetSubmitBtn.svelte";
+	import { isLoggedIn } from "../../stores.js";
+	import LogInPopover from "../../../LogInPopover/LogInPopover.svelte";
 
 	export let flatTop = false;
 	export let isLoading: boolean;
 	export let isDisabled = false;
-	export let onClickSubmitBtn: (e?: MouseEvent) => void;
 	export let placeholder = "Your sentence here...";
 	export let submitButtonLabel: string | undefined = undefined;
 	export let value: string = "";
+
+	let popOverOpen = false;
+
+	const dispatch = createEventDispatcher<{ cmdEnter: void }>();
 
 
-
- - -
+ +
+ { + if (!$isLoggedIn) { + popOverOpen = true; + return; + } + dispatch("cmdEnter"); + }} + /> + (popOverOpen = true)} + /> +
+
diff --git a/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetRealtimeRecorder/WidgetRealtimeRecorder.svelte b/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetRealtimeRecorder/WidgetRealtimeRecorder.svelte index be240045b..d30136d34 100644 --- a/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetRealtimeRecorder/WidgetRealtimeRecorder.svelte +++ b/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetRealtimeRecorder/WidgetRealtimeRecorder.svelte @@ -1,6 +1,6 @@ {#if !isDisabled} - + + + {/if} diff --git a/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetTableInput/WidgetTableInput.svelte b/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetTableInput/WidgetTableInput.svelte index 43e540c84..b513c4ab7 100644 --- a/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetTableInput/WidgetTableInput.svelte +++ b/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetTableInput/WidgetTableInput.svelte @@ -1,20 +1,23 @@ diff --git a/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetTextInput/WidgetTextInput.svelte b/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetTextInput/WidgetTextInput.svelte index 72fb7082f..2670e1ef7 100644 --- a/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetTextInput/WidgetTextInput.svelte +++ b/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetTextInput/WidgetTextInput.svelte @@ -1,24 +1,39 @@ - - - - - + + + + { + if (!$isLoggedIn) { + popOverOpen = true; + return; + } + dispatch("cmdEnter"); + }} + /> + + + diff --git a/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetTextarea/WidgetTextarea.svelte b/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetTextarea/WidgetTextarea.svelte index 5c088b50e..cfd3946c8 100644 --- a/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetTextarea/WidgetTextarea.svelte +++ b/packages/widgets/src/lib/components/InferenceWidget/shared/WidgetTextarea/WidgetTextarea.svelte @@ -1,8 +1,10 @@ - - - - (isOnFocus = false)} - /> - - + + + + + { + if (!$isLoggedIn) { + popOverOpen = true; + return; + } + dispatch("cmdEnter"); + }} + bind:this={containerSpanEl} + on:paste|preventDefault={handlePaste} + on:input={updateInnerTextValue} + on:focus={onFocus} + on:blur={() => (isOnFocus = false)} + /> + + +