From 97060303d0d6f87722abf700ac41de5909b66832 Mon Sep 17 00:00:00 2001 From: drazdra <133811709+drazdra@users.noreply.github.com> Date: Sun, 29 Dec 2024 22:18:09 +0600 Subject: [PATCH] gguf: calculate tensor data offset (#1076) adding tensors offset within file. offsets stored in metadata are relative to this offset and it's absent in metadata. with this field it's possible to actually access layers in gguf. without of it - offsets shown are useless. --------- Co-authored-by: Xuan Son Nguyen Co-authored-by: Julien Chaumond --- packages/gguf/src/gguf.spec.ts | 5 +++++ packages/gguf/src/gguf.ts | 17 +++++++++++++---- packages/gguf/src/types.ts | 1 + 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/packages/gguf/src/gguf.spec.ts b/packages/gguf/src/gguf.spec.ts index 58852dbee..eb34dd199 100644 --- a/packages/gguf/src/gguf.spec.ts +++ b/packages/gguf/src/gguf.spec.ts @@ -283,4 +283,9 @@ describe("gguf", () => { expect(parseGGUFQuantLabel("Codestral-22B-v0.1-IQ3_XS.gguf")).toEqual(undefined); // TODO: investigate IQ3_XS expect(parseGGUFQuantLabel("Codestral-22B-v0.1-Q4_0_4_4.gguf")).toEqual("Q4_0"); // TODO: investigate Q4_0_4_4 }); + + it("calculate tensor data offset", async () => { + const { tensorDataOffset } = await gguf(URL_LLAMA); + expect(tensorDataOffset).toEqual(741056n); + }); }); diff --git a/packages/gguf/src/gguf.ts b/packages/gguf/src/gguf.ts index d83b405da..8a8dd442d 100644 --- a/packages/gguf/src/gguf.ts +++ b/packages/gguf/src/gguf.ts @@ -10,6 +10,8 @@ export { parseGGUFQuantLabel, GGUF_QUANT_RE, GGUF_QUANT_RE_GLOBAL } from "@huggi export const RE_GGUF_FILE = /\.gguf$/; export const RE_GGUF_SHARD_FILE = /^(?.*?)-(?\d{5})-of-(?\d{5})\.gguf$/; +const GGUF_DEFAULT_ALIGNMENT = 32; // defined in ggml.h +const GGML_PAD = (x: number, n: number) => (x + n - 1) & ~(n - 1); // defined in ggml.h const PARALLEL_DOWNLOADS = 20; export interface GgufShardFileInfo { @@ -384,14 +386,18 @@ export async function gguf( }); } + // calculate absolute offset of tensor data + const alignment: number = Number(metadata["general.alignment"] ?? GGUF_DEFAULT_ALIGNMENT); + const tensorDataOffset = BigInt(GGML_PAD(offset, alignment)); + if (params?.computeParametersCount) { const parameterCount = tensorInfos .map(({ shape }) => shape.reduce((acc, val) => acc * Number(val), 1)) .reduce((acc, val) => acc + val, 0); - return { metadata, tensorInfos, parameterCount }; + return { metadata, tensorInfos, tensorDataOffset, parameterCount }; } else { - return { metadata, tensorInfos }; + return { metadata, tensorInfos, tensorDataOffset }; } } @@ -429,7 +435,10 @@ export async function ggufAllShards( parameterCount: shards.map(({ parameterCount }) => parameterCount).reduce((acc, val) => acc + val, 0), }; } else { - const { metadata, tensorInfos, parameterCount } = await gguf(url, { ...params, computeParametersCount: true }); - return { shards: [{ metadata, tensorInfos }], parameterCount }; + const { metadata, tensorInfos, tensorDataOffset, parameterCount } = await gguf(url, { + ...params, + computeParametersCount: true, + }); + return { shards: [{ metadata, tensorInfos, tensorDataOffset }], parameterCount }; } } diff --git a/packages/gguf/src/types.ts b/packages/gguf/src/types.ts index 4a6b40e16..aba842447 100644 --- a/packages/gguf/src/types.ts +++ b/packages/gguf/src/types.ts @@ -155,4 +155,5 @@ export interface GGUFTensorInfo { export interface GGUFParseOutput { metadata: GGUFMetadata; tensorInfos: GGUFTensorInfo[]; + tensorDataOffset: bigint; }