From 6f818ebd1dd1588e054db71e71133ef4fa276d1e Mon Sep 17 00:00:00 2001 From: Daniel Campagnoli Date: Fri, 6 Dec 2024 19:22:18 +0800 Subject: [PATCH] Donwload tiktokenizer model in prod Dockerfile --- Dockerfile | 2 ++ package.json | 1 + src/initTiktokenizer.ts | 7 +++++++ 3 files changed, 10 insertions(+) create mode 100644 src/initTiktokenizer.ts diff --git a/Dockerfile b/Dockerfile index 07d7faf4..ef366332 100644 --- a/Dockerfile +++ b/Dockerfile @@ -30,6 +30,8 @@ USER $user RUN mkdir .sophia # Generate the function schemas RUN npm run functionSchemas +# Download the tiktokenizer model +RUN npm run initTiktokenizer ENV NODE_ENV=production ENV PORT=8080 diff --git a/package.json b/package.json index c2ba8201..fe0ddb12 100644 --- a/package.json +++ b/package.json @@ -30,6 +30,7 @@ "watch": " node --env-file=variables/local.env -r ts-node/register src/cli/watch.ts", "build": " tsc", "build:dev": "tsc -project tsconfig.swc.json", + "initTiktokenizer": "node --env-file=variables/local.env -r ts-node/register src/initTiktokenizer.ts", "functionSchemas": "node --env-file=variables/local.env -r ts-node/register src/generateFunctionSchemas.ts", "start": " node -r ts-node/register src/index.ts", "start:local": "node -r ts-node/register --env-file=variables/local.env --inspect=0.0.0.0:9229 src/index.ts", diff --git a/src/initTiktokenizer.ts b/src/initTiktokenizer.ts new file mode 100644 index 00000000..6f3d8844 --- /dev/null +++ b/src/initTiktokenizer.ts @@ -0,0 +1,7 @@ +import { countTokens } from '#llm/tokens'; + +// node_modules is read-only for the sophia user in prod, so download in the docker build +countTokens('hi').catch((err) => { + console.error('Failed to download tiktoken model'); + console.error(err); +}).finally(() => console.log('Done'));