Skip to content

Commit

Permalink
feat: publishing ritual-net/prime v0.1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
anish-ritual committed Nov 30, 2023
0 parents commit 36fc288
Show file tree
Hide file tree
Showing 119 changed files with 15,722 additions and 0 deletions.
31 changes: 31 additions & 0 deletions .Dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.

# dependencies
/node_modules
/.pnp
.pnp.js

# testing
/coverage

# next.js
/.next/
/out/

# production
/build

# misc
.DS_Store
*.pem

# debug
npm-debug.log*
yarn-debug.log*
yarn-error.log*

# vercel
.vercel

# VSCode workspace profile
.vscode/
27 changes: 27 additions & 0 deletions .env.sample
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
NEXTAUTH_URL="http://localhost:3000"
NEXTAUTH_SECRET="aT0cELwZnDwbY/6TO6sR4i90aEAFuJMbVw4/stfAtWI="

# Database for Prime
DATABASE_URL="postgresql://postgres:postgres@database/postgres"

# Email server for passwordless authentication
EMAIL_SERVER_USER="email-user"
EMAIL_SERVER_PASSWORD="email-pass"
EMAIL_SERVER_HOST="smtp"
EMAIL_SERVER_PORT="1025"
EMAIL_FROM="admin@ritual.com"

# Database for forwarding logs of TGI servers
DB_HOST="xxxxxxxx.yyyyyyyy.us-east-2.rds.amazonaws.com"
DB_PORT=5432
DB_USER="db-user"
DB_PASS="db-password"
DB_NAME="db-name"

# For pulling TGI from DockerHub
DOCKERHUB_USER=ritual
DOCKERHUB_TGI_IMAGE_TAG=tgi:1.1.0

# (optional) For HuggingFace private model deployment
HF_API_KEY="HuggingFace-API-Key"
HF_ORG_NAME="Org-name"
3 changes: 3 additions & 0 deletions .eslintrc.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"extends": ["next/core-web-vitals", "prettier"]
}
45 changes: 45 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.

# dependencies
/node_modules
/.pnp
.pnp.js

# testing
/coverage

# next.js
/.next/
/out/

# production
/build

# misc
.DS_Store
*.pem

# debug
npm-debug.log*
yarn-debug.log*
yarn-error.log*

# local env files
.env*.local

# vercel
.vercel

# typescript
*.tsbuildinfo
next-env.d.ts

# Env variables
.env
.env.local

# VSCode workspace profile
.vscode/

# Prisma
migrations/
4 changes: 4 additions & 0 deletions .husky/pre-commit
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/usr/bin/env sh
. "$(dirname -- "$0")/_/husky.sh"

npx lint-staged
8 changes: 8 additions & 0 deletions .prettierignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Defaults
**/.git
**/.svn
**/.hg
**/node_modules

# NextJS
.next
4 changes: 4 additions & 0 deletions .prettierrc
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"tabWidth": 2,
"useTabs": false
}
46 changes: 46 additions & 0 deletions Dockerfile.dev
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
FROM node:18-alpine as base

# Setup pnpm
RUN apk add --no-cache libc6-compat
RUN corepack enable && corepack prepare pnpm@8.7.5 --activate

# Install dependencies only when needed
FROM base as deps

# Setup directory
WORKDIR /app

# Install dependencies
COPY package.json pnpm-lock.yaml* ./
RUN pnpm i --frozen-lockfile

# Rebuild source code as needed
FROM base as builder
WORKDIR /app
COPY --from=deps /app/node_modules ./node_modules
COPY . .
COPY .env.sample .env
RUN npx prisma generate
RUN pnpm run build

# Runner
FROM base as runner
WORKDIR /app
ENV NODE_ENV production
ENV NEXT_TELEMETRY_DISABLED 1

RUN addgroup --system --gid 1001 nodejs
RUN adduser --system --uid 1001 nextjs

COPY --from=builder /app/public ./public

# Automatically leverage output traces to reduce image size
# https://nextjs.org/docs/advanced-features/output-file-tracing
COPY --from=builder --chown=nextjs:nodejs /app/.next/standalone ./
COPY --from=builder --chown=nextjs:nodejs /app/.next/static ./.next/static

# Run
USER nextjs
EXPOSE 3000
ENV PORT 3000
CMD ["node", "server.js"]
32 changes: 32 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
The Clear BSD License

Copyright (c) 2023 Origin Research Ltd
All rights reserved.

Redistribution and use in source and binary forms, with or without modification,
are permitted (subject to the limitations in the disclaimer below) provided that
the following conditions are met:

* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.

* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.

* Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from this
software without specific prior written permission.

NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
147 changes: 147 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
<p align="center">
<h1 align="center">Prime</h1>
</p>
<p align="center">
<b><a href="https://github.com/ritual-net/prime#about">About</a></b>
|
<b><a href="https://github.com/ritual-net/prime#architecture">Architecture</a></b>
|
<b><a href="https://github.com/ritual-net/prime#usage">Usage</a></b>
|
<b><a href="https://github.com/ritual-net/prime#customization">Customization</a></b>
</p>

## About

Prime is a one-click toolkit for provisioning servers to deploy and serve Large Language Models (LLMs).

- Cloud provider agnostic for flexible deployments
- Fine-grained access controls with ACL permission system
- Out-of-the-box inference playground support with client-side PII redaction
- Inference engine powered by [text-generation-inference](https://github.com/huggingface/text-generation-inference)

## Architecture

Generally, the idea behind Prime is simple—to act as a managed interface that connects to other inference providing solutions (a la Paperspace, CoreWeave, Fluidstack, etc.).

### Technology

- TypeScript
- Frontend:
- [NextJS](https://nextjs.org/) as React framework
- [TailwindCSS](https://tailwindcss.com/) as CSS framework
- [shadcn/ui](https://ui.shadcn.com/) as component library
- Backend:
- [NextJS serverless functions](https://vercel.com/docs/concepts/functions/serverless-functions)
- [Prisma](https://prisma.io) as database ORM connecting to Postgres

### System

1. Users authenticate via [passwordless email magic links](./pages/api/auth/[...nextauth].ts)
2. Users are approved by admin users
3. There is a hierarchy of [permission ACLs](./utils/auth.ts)
4. Users can create inference servers across any ML inference provider that is implemented ([extending BaseProvider](./ml/base.ts))
a. Currently, only [Paperspace](#paperspace) is supported.

### Supported cloud providers

#### Paperspace

[Paperspace](https://www.paperspace.com/) is the only cloud GPU provider currently supported out-of-the-box.

##### Account

To use Paperspace, you will need an account (email, password) and API key. Note that:

1. **Credentials for each ML provider are shared across all users of a Prime server.** In other words, each user cannot use their individual account to deploy machines; only one account is used. **Therefore, we highly recommended you only add users you trust.**
2. Paperspace restricts the amount and type of machines a new account is allowed to deploy. Therefore, you will have to [request access and limit increases](https://docs.paperspace.com/core/quota-limits/) for the types of machines you want to deploy, depending on your use case.

## Usage

### Environment setup

See [.env.example](.env.sample) for setting up your environment variables correctly. Put your variables in a file named `.env`.

```bash
cp .env.sample .env
vim .env
```

**For running Prime, you will need:** (skip if using Docker)

- A Postgres database for prime (or use the test one, see [Locally](#run-without-docker))
- SendGrid credentials (or use the test ones, see [Locally](#run-with-docker))

**For deploying TGI to Paperspace, you will need:**

- A Postgres database for TGI logs
- See [Log Database setup](#optional-setup-log-forwarding-database) below
- Optionally, [Hugging Face](https://huggingface.co/) credentials for running gated models (e.g. Llama2) or private ones (i.e. your own org's finetuned models)

### Run with Docker

1. Install [Docker](https://docs.docker.com/get-docker/)
2. Install [Tilt](https://docs.tilt.dev/)
3. Run `tilt up`
4. (optionally) Press space to open the tilt manager
5. To shut down all containers, run `tilt down`

Services will be started at:

- Tilt manager: [localhost:10350](http://localhost:10350/)
- Frontend: [localhost:3000](http://localhost:3000/)
- Backend: [localhost:3000/api](http://localhost:3000/api)
- MailHog UI: [localhost:8025](http://localhost:8025/)
- Postgres: `localhost:5432` (user: `postgres`, pw: `postgres`)

If this is your first time logging in, a new user will be created with email `admin@ritual.com` with ADMIN privileges.

### Run without Docker

The docker container makes a new NextJS production build on each save (although optimized for dependency diffing, not as instantaneous as true HMR).

To make use of the NextJS hot-reload and bring your iteration cycles down from ~5s/change -> ~50ms/change, it is better to develop locally.

1. Either install Postgres (great utility for MAC users is [postgres.app](https://postgresapp.com/)) or selectively run the Postgres container.
a. Run `npx prisma generate` to generate the db schema using [Prisma](https://www.prisma.io)
2. Either slot in SendGrid credentials or selectively run the Mailhog SMTP container.
3. If you don't have `pnpm` install via `npm i -g pnpm`.
4. Run `pnpm install` and `pnpm run dev`

### Setup Postgres

Use the following commands to setup your Postgres database, whether running with or without Docker.

Using [Prisma](https://www.prisma.io):
b. Run `npx prisma migrate dev --name init` to generate the initial migration file.
c. Run `npx prisma migrate deploy` to deploy the migration, creating the necessary tables etc.
d. Run `npx prisma db seed` to create initial admin user for testing.

### [Optional] Setup log forwarding database

You can optionally instrument your deployment of [text-generation-inference](https://github.com/huggingface/text-generation-inference) to add logs via [fluentbit](https://fluentbit.com). You can host a log database anywhere you like, and provide the connection parameters [here](.env.sample#L13-L17). After creating the database, execute the following query to create a `"fluentbit"` table:

```
-- Table: public.fluentbit
-- DROP TABLE IF EXISTS public.fluentbit;
CREATE TABLE IF NOT EXISTS public.fluentbit
(
tag character varying COLLATE pg_catalog."default",
"time" timestamp without time zone,
data jsonb
)
TABLESPACE pg_default;
```

## Customization

You can easily extend the Prime user interface:

- We show a **limited selection of HuggingFace models** that can be deployed via the UI. Depending on the use case, you may want to add or remove some. To enable deploying a public (or private, assuming your [key](.env.sample#L27) has access to it) model via the UI, add it to the [whitelist](./types/ml/model.ts#L1).
- We show a **limited selection of inference server parameters** that can be configured via the UI. Depending on the use case, you may want to add or remove some. To modify or extend the inference server parameters available in the UI, see `INFERENCE_OPTIONS` and `RUN_OPTIONS` in [tgi](./utils/tgi.ts).
- We show a **limited selection of Paperspace machines** that can be deployed via the UI. Depending on the use case, you may want to add or remove some. To enable deploying other machine types, add them to the [whitelist](./types/ml/paperspace.ts#L1-L12) by **GPU name**.
- We show a **limited selection of Paperspace OS templates** that can be deployed via the UI. Depending on the use case, you may want to add or remove some. To enable deploying other Operating Systems [templates](https://docs.paperspace.com/core/api-reference/templates), add them to the [whitelist](./types/ml/paperspace.ts#L14) by **id**.

## License

[BSD 3-clause Clear](./LICENSE)
6 changes: 6 additions & 0 deletions Tiltfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Enforce a minimum Tilt version, so labels are supported
# https://docs.tilt.dev/api.html#api.version_settings
version_settings(constraint='>=0.22.1')

# Setup with compose
docker_compose("./docker-compose.yml")
15 changes: 15 additions & 0 deletions components.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"$schema": "https://ui.shadcn.com/schema.json",
"style": "default",
"rsc": false,
"tailwind": {
"config": "tailwind.config.js",
"css": "styles/globals.css",
"baseColor": "zinc",
"cssVariables": true
},
"aliases": {
"components": "components/generated",
"utils": "utils"
}
}
Loading

0 comments on commit 36fc288

Please sign in to comment.