-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: publishing ritual-net/prime v0.1.0
- Loading branch information
0 parents
commit 36fc288
Showing
119 changed files
with
15,722 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files. | ||
|
||
# dependencies | ||
/node_modules | ||
/.pnp | ||
.pnp.js | ||
|
||
# testing | ||
/coverage | ||
|
||
# next.js | ||
/.next/ | ||
/out/ | ||
|
||
# production | ||
/build | ||
|
||
# misc | ||
.DS_Store | ||
*.pem | ||
|
||
# debug | ||
npm-debug.log* | ||
yarn-debug.log* | ||
yarn-error.log* | ||
|
||
# vercel | ||
.vercel | ||
|
||
# VSCode workspace profile | ||
.vscode/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
NEXTAUTH_URL="http://localhost:3000" | ||
NEXTAUTH_SECRET="aT0cELwZnDwbY/6TO6sR4i90aEAFuJMbVw4/stfAtWI=" | ||
|
||
# Database for Prime | ||
DATABASE_URL="postgresql://postgres:postgres@database/postgres" | ||
|
||
# Email server for passwordless authentication | ||
EMAIL_SERVER_USER="email-user" | ||
EMAIL_SERVER_PASSWORD="email-pass" | ||
EMAIL_SERVER_HOST="smtp" | ||
EMAIL_SERVER_PORT="1025" | ||
EMAIL_FROM="admin@ritual.com" | ||
|
||
# Database for forwarding logs of TGI servers | ||
DB_HOST="xxxxxxxx.yyyyyyyy.us-east-2.rds.amazonaws.com" | ||
DB_PORT=5432 | ||
DB_USER="db-user" | ||
DB_PASS="db-password" | ||
DB_NAME="db-name" | ||
|
||
# For pulling TGI from DockerHub | ||
DOCKERHUB_USER=ritual | ||
DOCKERHUB_TGI_IMAGE_TAG=tgi:1.1.0 | ||
|
||
# (optional) For HuggingFace private model deployment | ||
HF_API_KEY="HuggingFace-API-Key" | ||
HF_ORG_NAME="Org-name" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
{ | ||
"extends": ["next/core-web-vitals", "prettier"] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files. | ||
|
||
# dependencies | ||
/node_modules | ||
/.pnp | ||
.pnp.js | ||
|
||
# testing | ||
/coverage | ||
|
||
# next.js | ||
/.next/ | ||
/out/ | ||
|
||
# production | ||
/build | ||
|
||
# misc | ||
.DS_Store | ||
*.pem | ||
|
||
# debug | ||
npm-debug.log* | ||
yarn-debug.log* | ||
yarn-error.log* | ||
|
||
# local env files | ||
.env*.local | ||
|
||
# vercel | ||
.vercel | ||
|
||
# typescript | ||
*.tsbuildinfo | ||
next-env.d.ts | ||
|
||
# Env variables | ||
.env | ||
.env.local | ||
|
||
# VSCode workspace profile | ||
.vscode/ | ||
|
||
# Prisma | ||
migrations/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
#!/usr/bin/env sh | ||
. "$(dirname -- "$0")/_/husky.sh" | ||
|
||
npx lint-staged |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
# Defaults | ||
**/.git | ||
**/.svn | ||
**/.hg | ||
**/node_modules | ||
|
||
# NextJS | ||
.next |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
{ | ||
"tabWidth": 2, | ||
"useTabs": false | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
FROM node:18-alpine as base | ||
|
||
# Setup pnpm | ||
RUN apk add --no-cache libc6-compat | ||
RUN corepack enable && corepack prepare pnpm@8.7.5 --activate | ||
|
||
# Install dependencies only when needed | ||
FROM base as deps | ||
|
||
# Setup directory | ||
WORKDIR /app | ||
|
||
# Install dependencies | ||
COPY package.json pnpm-lock.yaml* ./ | ||
RUN pnpm i --frozen-lockfile | ||
|
||
# Rebuild source code as needed | ||
FROM base as builder | ||
WORKDIR /app | ||
COPY --from=deps /app/node_modules ./node_modules | ||
COPY . . | ||
COPY .env.sample .env | ||
RUN npx prisma generate | ||
RUN pnpm run build | ||
|
||
# Runner | ||
FROM base as runner | ||
WORKDIR /app | ||
ENV NODE_ENV production | ||
ENV NEXT_TELEMETRY_DISABLED 1 | ||
|
||
RUN addgroup --system --gid 1001 nodejs | ||
RUN adduser --system --uid 1001 nextjs | ||
|
||
COPY --from=builder /app/public ./public | ||
|
||
# Automatically leverage output traces to reduce image size | ||
# https://nextjs.org/docs/advanced-features/output-file-tracing | ||
COPY --from=builder --chown=nextjs:nodejs /app/.next/standalone ./ | ||
COPY --from=builder --chown=nextjs:nodejs /app/.next/static ./.next/static | ||
|
||
# Run | ||
USER nextjs | ||
EXPOSE 3000 | ||
ENV PORT 3000 | ||
CMD ["node", "server.js"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
The Clear BSD License | ||
|
||
Copyright (c) 2023 Origin Research Ltd | ||
All rights reserved. | ||
|
||
Redistribution and use in source and binary forms, with or without modification, | ||
are permitted (subject to the limitations in the disclaimer below) provided that | ||
the following conditions are met: | ||
|
||
* Redistributions of source code must retain the above copyright notice, | ||
this list of conditions and the following disclaimer. | ||
|
||
* Redistributions in binary form must reproduce the above copyright | ||
notice, this list of conditions and the following disclaimer in the | ||
documentation and/or other materials provided with the distribution. | ||
|
||
* Neither the name of the copyright holder nor the names of its | ||
contributors may be used to endorse or promote products derived from this | ||
software without specific prior written permission. | ||
|
||
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY | ||
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND | ||
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A | ||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR | ||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | ||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | ||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR | ||
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER | ||
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | ||
POSSIBILITY OF SUCH DAMAGE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,147 @@ | ||
<p align="center"> | ||
<h1 align="center">Prime</h1> | ||
</p> | ||
<p align="center"> | ||
<b><a href="https://github.com/ritual-net/prime#about">About</a></b> | ||
| | ||
<b><a href="https://github.com/ritual-net/prime#architecture">Architecture</a></b> | ||
| | ||
<b><a href="https://github.com/ritual-net/prime#usage">Usage</a></b> | ||
| | ||
<b><a href="https://github.com/ritual-net/prime#customization">Customization</a></b> | ||
</p> | ||
|
||
## About | ||
|
||
Prime is a one-click toolkit for provisioning servers to deploy and serve Large Language Models (LLMs). | ||
|
||
- Cloud provider agnostic for flexible deployments | ||
- Fine-grained access controls with ACL permission system | ||
- Out-of-the-box inference playground support with client-side PII redaction | ||
- Inference engine powered by [text-generation-inference](https://github.com/huggingface/text-generation-inference) | ||
|
||
## Architecture | ||
|
||
Generally, the idea behind Prime is simple—to act as a managed interface that connects to other inference providing solutions (a la Paperspace, CoreWeave, Fluidstack, etc.). | ||
|
||
### Technology | ||
|
||
- TypeScript | ||
- Frontend: | ||
- [NextJS](https://nextjs.org/) as React framework | ||
- [TailwindCSS](https://tailwindcss.com/) as CSS framework | ||
- [shadcn/ui](https://ui.shadcn.com/) as component library | ||
- Backend: | ||
- [NextJS serverless functions](https://vercel.com/docs/concepts/functions/serverless-functions) | ||
- [Prisma](https://prisma.io) as database ORM connecting to Postgres | ||
|
||
### System | ||
|
||
1. Users authenticate via [passwordless email magic links](./pages/api/auth/[...nextauth].ts) | ||
2. Users are approved by admin users | ||
3. There is a hierarchy of [permission ACLs](./utils/auth.ts) | ||
4. Users can create inference servers across any ML inference provider that is implemented ([extending BaseProvider](./ml/base.ts)) | ||
a. Currently, only [Paperspace](#paperspace) is supported. | ||
|
||
### Supported cloud providers | ||
|
||
#### Paperspace | ||
|
||
[Paperspace](https://www.paperspace.com/) is the only cloud GPU provider currently supported out-of-the-box. | ||
|
||
##### Account | ||
|
||
To use Paperspace, you will need an account (email, password) and API key. Note that: | ||
|
||
1. **Credentials for each ML provider are shared across all users of a Prime server.** In other words, each user cannot use their individual account to deploy machines; only one account is used. **Therefore, we highly recommended you only add users you trust.** | ||
2. Paperspace restricts the amount and type of machines a new account is allowed to deploy. Therefore, you will have to [request access and limit increases](https://docs.paperspace.com/core/quota-limits/) for the types of machines you want to deploy, depending on your use case. | ||
|
||
## Usage | ||
|
||
### Environment setup | ||
|
||
See [.env.example](.env.sample) for setting up your environment variables correctly. Put your variables in a file named `.env`. | ||
|
||
```bash | ||
cp .env.sample .env | ||
vim .env | ||
``` | ||
|
||
**For running Prime, you will need:** (skip if using Docker) | ||
|
||
- A Postgres database for prime (or use the test one, see [Locally](#run-without-docker)) | ||
- SendGrid credentials (or use the test ones, see [Locally](#run-with-docker)) | ||
|
||
**For deploying TGI to Paperspace, you will need:** | ||
|
||
- A Postgres database for TGI logs | ||
- See [Log Database setup](#optional-setup-log-forwarding-database) below | ||
- Optionally, [Hugging Face](https://huggingface.co/) credentials for running gated models (e.g. Llama2) or private ones (i.e. your own org's finetuned models) | ||
|
||
### Run with Docker | ||
|
||
1. Install [Docker](https://docs.docker.com/get-docker/) | ||
2. Install [Tilt](https://docs.tilt.dev/) | ||
3. Run `tilt up` | ||
4. (optionally) Press space to open the tilt manager | ||
5. To shut down all containers, run `tilt down` | ||
|
||
Services will be started at: | ||
|
||
- Tilt manager: [localhost:10350](http://localhost:10350/) | ||
- Frontend: [localhost:3000](http://localhost:3000/) | ||
- Backend: [localhost:3000/api](http://localhost:3000/api) | ||
- MailHog UI: [localhost:8025](http://localhost:8025/) | ||
- Postgres: `localhost:5432` (user: `postgres`, pw: `postgres`) | ||
|
||
If this is your first time logging in, a new user will be created with email `admin@ritual.com` with ADMIN privileges. | ||
|
||
### Run without Docker | ||
|
||
The docker container makes a new NextJS production build on each save (although optimized for dependency diffing, not as instantaneous as true HMR). | ||
|
||
To make use of the NextJS hot-reload and bring your iteration cycles down from ~5s/change -> ~50ms/change, it is better to develop locally. | ||
|
||
1. Either install Postgres (great utility for MAC users is [postgres.app](https://postgresapp.com/)) or selectively run the Postgres container. | ||
a. Run `npx prisma generate` to generate the db schema using [Prisma](https://www.prisma.io) | ||
2. Either slot in SendGrid credentials or selectively run the Mailhog SMTP container. | ||
3. If you don't have `pnpm` install via `npm i -g pnpm`. | ||
4. Run `pnpm install` and `pnpm run dev` | ||
|
||
### Setup Postgres | ||
|
||
Use the following commands to setup your Postgres database, whether running with or without Docker. | ||
|
||
Using [Prisma](https://www.prisma.io): | ||
b. Run `npx prisma migrate dev --name init` to generate the initial migration file. | ||
c. Run `npx prisma migrate deploy` to deploy the migration, creating the necessary tables etc. | ||
d. Run `npx prisma db seed` to create initial admin user for testing. | ||
|
||
### [Optional] Setup log forwarding database | ||
|
||
You can optionally instrument your deployment of [text-generation-inference](https://github.com/huggingface/text-generation-inference) to add logs via [fluentbit](https://fluentbit.com). You can host a log database anywhere you like, and provide the connection parameters [here](.env.sample#L13-L17). After creating the database, execute the following query to create a `"fluentbit"` table: | ||
|
||
``` | ||
-- Table: public.fluentbit | ||
-- DROP TABLE IF EXISTS public.fluentbit; | ||
CREATE TABLE IF NOT EXISTS public.fluentbit | ||
( | ||
tag character varying COLLATE pg_catalog."default", | ||
"time" timestamp without time zone, | ||
data jsonb | ||
) | ||
TABLESPACE pg_default; | ||
``` | ||
|
||
## Customization | ||
|
||
You can easily extend the Prime user interface: | ||
|
||
- We show a **limited selection of HuggingFace models** that can be deployed via the UI. Depending on the use case, you may want to add or remove some. To enable deploying a public (or private, assuming your [key](.env.sample#L27) has access to it) model via the UI, add it to the [whitelist](./types/ml/model.ts#L1). | ||
- We show a **limited selection of inference server parameters** that can be configured via the UI. Depending on the use case, you may want to add or remove some. To modify or extend the inference server parameters available in the UI, see `INFERENCE_OPTIONS` and `RUN_OPTIONS` in [tgi](./utils/tgi.ts). | ||
- We show a **limited selection of Paperspace machines** that can be deployed via the UI. Depending on the use case, you may want to add or remove some. To enable deploying other machine types, add them to the [whitelist](./types/ml/paperspace.ts#L1-L12) by **GPU name**. | ||
- We show a **limited selection of Paperspace OS templates** that can be deployed via the UI. Depending on the use case, you may want to add or remove some. To enable deploying other Operating Systems [templates](https://docs.paperspace.com/core/api-reference/templates), add them to the [whitelist](./types/ml/paperspace.ts#L14) by **id**. | ||
|
||
## License | ||
|
||
[BSD 3-clause Clear](./LICENSE) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
# Enforce a minimum Tilt version, so labels are supported | ||
# https://docs.tilt.dev/api.html#api.version_settings | ||
version_settings(constraint='>=0.22.1') | ||
|
||
# Setup with compose | ||
docker_compose("./docker-compose.yml") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
{ | ||
"$schema": "https://ui.shadcn.com/schema.json", | ||
"style": "default", | ||
"rsc": false, | ||
"tailwind": { | ||
"config": "tailwind.config.js", | ||
"css": "styles/globals.css", | ||
"baseColor": "zinc", | ||
"cssVariables": true | ||
}, | ||
"aliases": { | ||
"components": "components/generated", | ||
"utils": "utils" | ||
} | ||
} |
Oops, something went wrong.