Skip to content

Commit

Permalink
Merge branch 'main' into enable-messages-api
Browse files Browse the repository at this point in the history
  • Loading branch information
radames authored May 1, 2024
2 parents eec9f25 + 779c9f1 commit a97d7f7
Show file tree
Hide file tree
Showing 90 changed files with 2,558 additions and 981 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ jobs:
deno-version: vx.x.x
- name: E2E test - deno import from npm
working-directory: e2e/deno
run: deno run --allow-net index.ts
run: deno run --allow-net --allow-env=HF_TOKEN index.ts
env:
NPM_CONFIG_REGISTRY: http://localhost:4874/
HF_TOKEN: ${{ secrets.HF_TOKEN }}
4 changes: 3 additions & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,7 @@
"json.format.enable": false,
"[json]": {
"editor.defaultFormatter": "esbenp.prettier-vscode"
}
},
"cSpell.words": ["huggingface"],
"deno.enablePaths": ["./e2e/deno"]
}
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,8 @@ You can run our packages with vanilla JS, without any bundler, by using a CDN or

```html
<script type="module">
import { HfInference } from 'https://cdn.jsdelivr.net/npm/@huggingface/inference@2.6.6/+esm';
import { createRepo, commit, deleteRepo, listFiles } from "https://cdn.jsdelivr.net/npm/@huggingface/hub@0.14.6/+esm";
import { HfInference } from 'https://cdn.jsdelivr.net/npm/@huggingface/inference@2.6.7/+esm';
import { createRepo, commit, deleteRepo, listFiles } from "https://cdn.jsdelivr.net/npm/@huggingface/hub@0.14.10/+esm";
</script>
```

Expand Down
12 changes: 11 additions & 1 deletion e2e/deno/index.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
import { HfInference } from "npm:@huggingface/inference@*";
import { whoAmI, listFiles } from "npm:@huggingface/hub@*";

const hf = new HfInference();
const token = Deno.env.get("HF_TOKEN");

if (!token) {
console.error("Please set the HF_TOKEN environment variable.");
Deno.exit(1);
}

const info = await whoAmI({ credentials: { accessToken: "hf_hub.js" }, hubUrl: "https://hub-ci.huggingface.co" });
console.log(info);
Expand All @@ -10,6 +15,11 @@ for await (const file of listFiles({ credentials: { accessToken: "hf_hub.js" },
console.log(file);
}

const hf = new HfInference(token);

const tokenInfo = await whoAmI({ credentials: { accessToken: token } });
console.log(tokenInfo);

const sum = await hf.summarization({
model: "google/pegasus-xsum",
inputs:
Expand Down
1 change: 1 addition & 0 deletions e2e/ts/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
package-lock.json
6 changes: 4 additions & 2 deletions packages/gguf/package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "@huggingface/gguf",
"packageManager": "pnpm@8.10.5",
"version": "0.0.10",
"version": "0.1.2",
"description": "a GGUF parser that works on remotely hosted files",
"repository": "https://github.com/huggingface/huggingface.js.git",
"publishConfig": {
Expand Down Expand Up @@ -47,5 +47,7 @@
],
"author": "Hugging Face",
"license": "MIT",
"devDependencies": {}
"devDependencies": {
"type-fest": "^3.9.0"
}
}
12 changes: 12 additions & 0 deletions packages/gguf/pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 22 additions & 1 deletion packages/gguf/src/gguf.spec.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { describe, expect, it } from "vitest";
import { GGMLQuantizationType, gguf } from "./gguf";
import { GGMLQuantizationType, gguf, ggufAllShards, parseGgufShardFilename } from "./gguf";

const URL_LLAMA = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/191239b/llama-2-7b-chat.Q2_K.gguf";
const URL_MISTRAL_7B =
Expand All @@ -9,6 +9,8 @@ const URL_BIG_ENDIAN =
"https://huggingface.co/ggml-org/models/resolve/1213976/bert-bge-small/ggml-model-f16-big-endian.gguf";
const URL_V1 =
"https://huggingface.co/tmadge/testing/resolve/66c078028d1ff92d7a9264a1590bc61ba6437933/tinyllamas-stories-260k-f32.gguf";
const URL_SHARDED_GROK =
"https://huggingface.co/Arki05/Grok-1-GGUF/resolve/ecafa8d8eca9b8cd75d11a0d08d3a6199dc5a068/grok-1-IQ3_XS-split-00001-of-00009.gguf";

describe("gguf", () => {
it("should parse a llama2 7b", async () => {
Expand Down Expand Up @@ -220,4 +222,23 @@ describe("gguf", () => {
dtype: GGMLQuantizationType.F32,
});
});

it("should detect sharded gguf filename", async () => {
const ggufPath = "grok-1/grok-1-q4_0-00003-of-00009.gguf"; // https://huggingface.co/ggml-org/models/blob/fcf344adb9686474c70e74dd5e55465e9e6176ef/grok-1/grok-1-q4_0-00003-of-00009.gguf
const ggufShardFileInfo = parseGgufShardFilename(ggufPath);

expect(ggufShardFileInfo?.prefix).toEqual("grok-1/grok-1-q4_0");
expect(ggufShardFileInfo?.shard).toEqual("00003");
expect(ggufShardFileInfo?.total).toEqual("00009");
});

it("should get param count for llama2 7b", async () => {
const { parameterCount } = await gguf(URL_LLAMA, { computeParametersCount: true });
expect(parameterCount).toEqual(6_738_415_616); // 7B
});

it("should get param count for sharded gguf", async () => {
const { parameterCount } = await ggufAllShards(URL_SHARDED_GROK);
expect(parameterCount).toEqual(316_490_127_360); // 316B
});
});
124 changes: 116 additions & 8 deletions packages/gguf/src/gguf.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,31 @@
import type { MetadataValue, Version, GGUFMetadata, GGUFTensorInfo, GGUFParseOutput } from "./types";
import { GGUFValueType } from "./types";
import { promisesQueue } from "./utils/promisesQueue";

export type { MetadataBaseValue, MetadataValue, Version, GGUFMetadata, GGUFTensorInfo, GGUFParseOutput } from "./types";
export { GGUFValueType, GGMLQuantizationType } from "./types";
export { GGUF_QUANT_DESCRIPTIONS } from "./quant-descriptions";

export const RE_GGUF_FILE = /\.gguf$/;
export const RE_GGUF_SHARD_FILE = /^(?<prefix>.*?)-(?<shard>\d{5})-of-(?<total>\d{5})\.gguf$/;

export interface GgufShardFileInfo {
prefix: string;
shard: string;
total: string;
}

export function parseGgufShardFilename(filename: string): GgufShardFileInfo | null {
const match = RE_GGUF_SHARD_FILE.exec(filename);
if (match && match.groups) {
return {
prefix: match.groups["prefix"],
shard: match.groups["shard"],
total: match.groups["total"],
};
}
return null;
}

const isVersion = (version: number): version is Version => version === 1 || version === 2 || version === 3;

Expand All @@ -30,8 +51,11 @@ const HTTP_TOTAL_MAX_SIZE = 50 * 10 ** 6; /// 50MB
class RangeView {
private chunk: number;
private buffer: ArrayBuffer;
private dataView: DataView;

readonly view: DataView;
get view(): DataView {
return this.dataView;
}

constructor(
public url: string,
Expand All @@ -47,7 +71,7 @@ class RangeView {
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore
this.buffer = new ArrayBuffer(0, { maxByteLength: HTTP_TOTAL_MAX_SIZE });
this.view = new DataView(this.buffer);
this.dataView = new DataView(this.buffer);
}
/**
* Fetch a new chunk from the server
Expand All @@ -63,18 +87,40 @@ class RangeView {
})
).arrayBuffer()
);
this.appendBuffer(buf);
this.chunk += 1;
}
/**
* Append new data into the buffer
*/
appendBuffer(buf: Uint8Array) {
/// TODO(fix typing)
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore
this.buffer.resize((this.chunk + 1) * HTTP_CHUNK_SIZE);
new Uint8Array(this.buffer).set(buf, this.chunk * HTTP_CHUNK_SIZE);
this.chunk += 1;
if (ArrayBuffer.prototype.resize) {
/// TODO(fix typing)
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore
this.buffer.resize((this.chunk + 1) * HTTP_CHUNK_SIZE);
new Uint8Array(this.buffer).set(buf, this.chunk * HTTP_CHUNK_SIZE);
} else {
// If the browser does not support ArrayBuffer.resize, we fallback to this polyfill version
/// TODO(fix typing)
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore
const newBuffer = new ArrayBuffer((this.chunk + 1) * HTTP_CHUNK_SIZE, { maxByteLength: HTTP_TOTAL_MAX_SIZE });
const arrView = new Uint8Array(newBuffer);
arrView.set(new Uint8Array(this.buffer));
arrView.set(buf, this.chunk * HTTP_CHUNK_SIZE);
this.buffer = newBuffer;
this.dataView = new DataView(this.buffer);
}
}
/**
* Check whether we need to fetch a new chunk
*/
async fetchChunkIfNeeded(offset: number) {
if (this.view.byteLength - offset < HTTP_DATA_LEEWAY) {
if (this.dataView.byteLength - offset < HTTP_DATA_LEEWAY) {
await this.fetchChunk();
}
}
Expand Down Expand Up @@ -156,6 +202,16 @@ function readMetadataValue(
}
}

export async function gguf(
url: string,
params: {
/**
* Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
*/
fetch?: typeof fetch;
computeParametersCount: true;
}
): Promise<GGUFParseOutput & { parameterCount: number }>;
export async function gguf(
url: string,
params?: {
Expand All @@ -164,7 +220,17 @@ export async function gguf(
*/
fetch?: typeof fetch;
}
): Promise<GGUFParseOutput> {
): Promise<GGUFParseOutput>;
export async function gguf(
url: string,
params?: {
/**
* Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
*/
fetch?: typeof fetch;
computeParametersCount?: boolean;
}
): Promise<GGUFParseOutput & { parameterCount?: number }> {
const r = new RangeView(url, params);
await r.fetchChunk();

Expand Down Expand Up @@ -273,5 +339,47 @@ export async function gguf(
});
}

return { metadata, tensorInfos };
if (params?.computeParametersCount) {
const parameterCount = tensorInfos
.map(({ shape }) => shape.reduce((acc, val) => acc * Number(val), 1))
.reduce((acc, val) => acc + val, 0);

return { metadata, tensorInfos, parameterCount };
} else {
return { metadata, tensorInfos };
}
}

export async function ggufAllShards(
url: string,
params?: {
/**
* Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
*/
fetch?: typeof fetch;
}
): Promise<{ shards: GGUFParseOutput[]; parameterCount: number }> {
const ggufShardFileInfo = parseGgufShardFilename(url);
if (ggufShardFileInfo) {
const total = parseInt(ggufShardFileInfo.total);
const prefix = ggufShardFileInfo.prefix;

const urls: string[] = [];
for (let shardIdx = 1; shardIdx <= total; shardIdx++) {
urls.push(`${prefix}-${shardIdx.toString().padStart(5, "0")}-of-${total.toString().padStart(5, "0")}.gguf`);
}

const PARALLEL_DOWNLOADS = 20;
const shards = await promisesQueue(
urls.map((shardUrl) => () => gguf(shardUrl, { computeParametersCount: true })),
PARALLEL_DOWNLOADS
);
return {
shards,
parameterCount: shards.map(({ parameterCount }) => parameterCount).reduce((acc, val) => acc + val, 0),
};
} else {
const { metadata, tensorInfos, parameterCount } = await gguf(url, { ...params, computeParametersCount: true });
return { shards: [{ metadata, tensorInfos }], parameterCount };
}
}
Loading

0 comments on commit a97d7f7

Please sign in to comment.