diff --git a/packages/gguf/src/gguf.spec.ts b/packages/gguf/src/gguf.spec.ts index 58852dbee..eb34dd199 100644 --- a/packages/gguf/src/gguf.spec.ts +++ b/packages/gguf/src/gguf.spec.ts @@ -283,4 +283,9 @@ describe("gguf", () => { expect(parseGGUFQuantLabel("Codestral-22B-v0.1-IQ3_XS.gguf")).toEqual(undefined); // TODO: investigate IQ3_XS expect(parseGGUFQuantLabel("Codestral-22B-v0.1-Q4_0_4_4.gguf")).toEqual("Q4_0"); // TODO: investigate Q4_0_4_4 }); + + it("calculate tensor data offset", async () => { + const { tensorDataOffset } = await gguf(URL_LLAMA); + expect(tensorDataOffset).toEqual(741056n); + }); }); diff --git a/packages/gguf/src/gguf.ts b/packages/gguf/src/gguf.ts index d83b405da..8a8dd442d 100644 --- a/packages/gguf/src/gguf.ts +++ b/packages/gguf/src/gguf.ts @@ -10,6 +10,8 @@ export { parseGGUFQuantLabel, GGUF_QUANT_RE, GGUF_QUANT_RE_GLOBAL } from "@huggi export const RE_GGUF_FILE = /\.gguf$/; export const RE_GGUF_SHARD_FILE = /^(?.*?)-(?\d{5})-of-(?\d{5})\.gguf$/; +const GGUF_DEFAULT_ALIGNMENT = 32; // defined in ggml.h +const GGML_PAD = (x: number, n: number) => (x + n - 1) & ~(n - 1); // defined in ggml.h const PARALLEL_DOWNLOADS = 20; export interface GgufShardFileInfo { @@ -384,14 +386,18 @@ export async function gguf( }); } + // calculate absolute offset of tensor data + const alignment: number = Number(metadata["general.alignment"] ?? GGUF_DEFAULT_ALIGNMENT); + const tensorDataOffset = BigInt(GGML_PAD(offset, alignment)); + if (params?.computeParametersCount) { const parameterCount = tensorInfos .map(({ shape }) => shape.reduce((acc, val) => acc * Number(val), 1)) .reduce((acc, val) => acc + val, 0); - return { metadata, tensorInfos, parameterCount }; + return { metadata, tensorInfos, tensorDataOffset, parameterCount }; } else { - return { metadata, tensorInfos }; + return { metadata, tensorInfos, tensorDataOffset }; } } @@ -429,7 +435,10 @@ export async function ggufAllShards( parameterCount: shards.map(({ parameterCount }) => parameterCount).reduce((acc, val) => acc + val, 0), }; } else { - const { metadata, tensorInfos, parameterCount } = await gguf(url, { ...params, computeParametersCount: true }); - return { shards: [{ metadata, tensorInfos }], parameterCount }; + const { metadata, tensorInfos, tensorDataOffset, parameterCount } = await gguf(url, { + ...params, + computeParametersCount: true, + }); + return { shards: [{ metadata, tensorInfos, tensorDataOffset }], parameterCount }; } } diff --git a/packages/gguf/src/types.ts b/packages/gguf/src/types.ts index 4a6b40e16..aba842447 100644 --- a/packages/gguf/src/types.ts +++ b/packages/gguf/src/types.ts @@ -155,4 +155,5 @@ export interface GGUFTensorInfo { export interface GGUFParseOutput { metadata: GGUFMetadata; tensorInfos: GGUFTensorInfo[]; + tensorDataOffset: bigint; }