diff --git a/.github/workflows/code.deploy.demo.yml b/.github/workflows/code.deploy.demo.yml new file mode 100644 index 00000000..cefda522 --- /dev/null +++ b/.github/workflows/code.deploy.demo.yml @@ -0,0 +1,65 @@ +name: Deploy Demo Environment +on: + push: + branches: [ "main"] + +permissions: + id-token: write + contents: read + +jobs: + CheckPendingWorkflow: + runs-on: ubuntu-latest + steps: + - uses: ahmadnassri/action-workflow-queue@v1 + with: + delay: 300000 + timeout: 7200000 + DeployLISA: + needs: CheckPendingWorkflow + environment: demo + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-region: ${{ vars.AWS_REGION }} + role-to-assume: arn:aws:iam::${{ vars.AWS_ACCOUNT }}:role/${{ vars.ROLE_NAME_TO_ASSUME }} + role-session-name: GitHub_to_AWS_via_FederatedOIDC + role-duration-seconds: 14400 + - name: Create config-custom.yaml + id: create-yaml + run: | + echo "${{vars.CONFIG_YAML}}" > config-custom.yaml + - name: Set up Python 3.11 + uses: actions/setup-python@v5 + with: + python-version: "3.11" + - name: Use Node.js 20.x + uses: actions/setup-node@v4 + with: + node-version: 20.x + - name: Install CDK dependencies + run: | + npm install + - name: Deploy LISA + run: | + make deploy HEADLESS=true + SendSlackNotification: + name: Send Slack Notification + needs: [ DeployLISA ] + runs-on: ubuntu-latest + if: always() + steps: + - name: Send Notification that Demo Deploy Finished + uses: rtCamp/action-slack-notify@v2 + env: + SLACK_WEBHOOK: ${{ secrets.INTERNAL_DEV_SLACK_WEBHOOK_URL }} + SLACK_COLOR: ${{ contains(join(needs.*.result, ' '), 'failure') && 'failure' || 'success' }} + SLACK_TITLE: 'Demo Deploy Finished' + SLACK_FOOTER: '' + MSG_MINIMAL: 'actions url,commit' + SLACK_MESSAGE_ON_FAILURE: ' Demo Deploy FAILED on branch ${{ github.head_ref || github.ref_name }} for <${{ github.event.pull_request.html_url || github.event.head_commit.url }}|commit>' + SLACK_MESSAGE_ON_SUCCESS: 'Demo Deploy SUCCESS on branch ${{ github.head_ref || github.ref_name }} for <${{ github.event.pull_request.html_url || github.event.head_commit.url }}|commit>.' + SLACK_MESSAGE: 'Demo Deploy Finished with status ${{ job.status }} on branch ${{ github.head_ref || github.ref_name }} for <${{ github.event.pull_request.html_url || github.event.head_commit.url }}|commit>' diff --git a/.github/workflows/code.deploy.dev.yml b/.github/workflows/code.deploy.dev.yml new file mode 100644 index 00000000..514e8910 --- /dev/null +++ b/.github/workflows/code.deploy.dev.yml @@ -0,0 +1,65 @@ +name: Deploy Dev Environment +on: + push: + branches: [ "main", "develop", "release/**" ] + +permissions: + id-token: write + contents: read + +jobs: + CheckPendingWorkflow: + runs-on: ubuntu-latest + steps: + - uses: ahmadnassri/action-workflow-queue@v1 + with: + delay: 300000 + timeout: 7200000 + DeployLISA: + needs: CheckPendingWorkflow + environment: dev + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-region: ${{ vars.AWS_REGION }} + role-to-assume: arn:aws:iam::${{ vars.AWS_ACCOUNT }}:role/${{ vars.ROLE_NAME_TO_ASSUME }} + role-session-name: GitHub_to_AWS_via_FederatedOIDC + role-duration-seconds: 14400 + - name: Create config-custom.yaml + id: create-yaml + run: | + echo "${{vars.CONFIG_YAML}}" > config-custom.yaml + - name: Set up Python 3.11 + uses: actions/setup-python@v5 + with: + python-version: "3.11" + - name: Use Node.js 20.x + uses: actions/setup-node@v4 + with: + node-version: 20.x + - name: Install CDK dependencies + run: | + npm install + - name: Deploy LISA + run: | + make deploy HEADLESS=true + SendSlackNotification: + name: Send Slack Notification + needs: [ DeployLISA ] + runs-on: ubuntu-latest + if: always() + steps: + - name: Send Notification that Dev Deploy Finished + uses: rtCamp/action-slack-notify@v2 + env: + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK_URL }} + SLACK_COLOR: ${{ contains(join(needs.*.result, ' '), 'failure') && 'failure' || 'success' }} + SLACK_TITLE: 'Dev Deploy Finished' + SLACK_FOOTER: '' + MSG_MINIMAL: 'actions url,commit' + SLACK_MESSAGE_ON_FAILURE: ' Dev Deploy FAILED on branch ${{ github.head_ref || github.ref_name }} for <${{ github.event.pull_request.html_url || github.event.head_commit.url }}|commit>' + SLACK_MESSAGE_ON_SUCCESS: 'Dev Deploy SUCCESS on branch ${{ github.head_ref || github.ref_name }} for <${{ github.event.pull_request.html_url || github.event.head_commit.url }}|commit>.' + SLACK_MESSAGE: 'Dev Deploy Finished with status ${{ job.status }} on branch ${{ github.head_ref || github.ref_name }} for <${{ github.event.pull_request.html_url || github.event.head_commit.url }}|commit>' diff --git a/.github/workflows/code.hotfix.branch.yml b/.github/workflows/code.hotfix.branch.yml index 34df3206..6f8bede9 100644 --- a/.github/workflows/code.hotfix.branch.yml +++ b/.github/workflows/code.hotfix.branch.yml @@ -24,7 +24,7 @@ jobs: ref: refs/tags/${{ github.event.inputs.source_tag }} - name: Create Hotfix Branch and Update Version run: | - git config --global user.email "petermul@amazon.com" + git config --global user.email "evmann@amazon.com" git config --global user.name "github_actions_lisa" SRC_TAG=${{ github.event.inputs.source_tag }} DST_TAG=${{ github.event.inputs.dest_tag }} diff --git a/.github/workflows/code.merge.main-to-develop.yml b/.github/workflows/code.merge.main-to-develop.yml index d5a47b27..0a18b259 100644 --- a/.github/workflows/code.merge.main-to-develop.yml +++ b/.github/workflows/code.merge.main-to-develop.yml @@ -18,7 +18,7 @@ jobs: ssh-key: ${{ secrets.DEPLOYMENT_SSH_KEY }} - name: merge main into develop run: | - git config --global user.email "petermul@amazon.com" + git config --global user.email "evmann@amazon.com" git config --global user.name "github_actions_lisa" git fetch --unshallow git checkout develop diff --git a/.github/workflows/code.release.branch.yml b/.github/workflows/code.release.branch.yml index 4b8e07b1..cbe3900c 100644 --- a/.github/workflows/code.release.branch.yml +++ b/.github/workflows/code.release.branch.yml @@ -21,7 +21,7 @@ jobs: ref: develop - name: Create Release Branch and Update Version run: | - git config --global user.email "petermul@amazon.com" + git config --global user.email "evmann@amazon.com" git config --global user.name "github_actions_lisa" RELEASE_TAG=${{ github.event.inputs.release_tag }} git checkout -b release/${{ github.event.inputs.release_tag }} diff --git a/.github/workflows/docs.deploy.github-pages.yml b/.github/workflows/docs.deploy.github-pages.yml new file mode 100644 index 00000000..ac442a3d --- /dev/null +++ b/.github/workflows/docs.deploy.github-pages.yml @@ -0,0 +1,60 @@ + +name: Deploy VitePress site to Github Pages +on: + push: + branches: [main] + workflow_dispatch: + +permissions: + contents: read + pages: write + id-token: write + +concurrency: + group: pages + cancel-in-progress: false + +jobs: + build: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Setup Node + uses: actions/setup-node@v4 + with: + node-version: 20 + cache: npm + - name: Setup Pages + uses: actions/configure-pages@v4 + - name: Install root dependencies + run: | + npm install + - name: Install dependencies + working-directory: ./lib/docs + run: npm install + env: + CI: "" + - name: Build with VitePress + working-directory: ./lib/docs + run: npm run build + env: + CI: "" + DOCS_BASE_PATH: '/LISA/' + - name: Upload artifact + uses: actions/upload-pages-artifact@v3 + with: + path: ./lib/docs/dist + deploy: + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + needs: build + runs-on: ubuntu-latest + name: Deploy + steps: + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v4 diff --git a/.github/workflows/test-and-lint.yml b/.github/workflows/test-and-lint.yml index 6174f1df..f8f320ee 100644 --- a/.github/workflows/test-and-lint.yml +++ b/.github/workflows/test-and-lint.yml @@ -4,7 +4,7 @@ on: push: branches: ['main', 'develop', 'release/**', 'hotfix/**'] pull_request: - branches: ['main', 'develop', 'release/**', 'hotfix/**'] + branches: ['main', 'develop', 'release/**', 'hotfix/**', 'feature/**'] permissions: contents: read diff --git a/.gitignore b/.gitignore index 5ffdff8e..7ad88b97 100644 --- a/.gitignore +++ b/.gitignore @@ -32,3 +32,4 @@ coverage /models # Deployment configuration file config.yaml +config-custom.yaml diff --git a/CHANGELOG.md b/CHANGELOG.md index ab04ea73..199dc844 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,62 @@ +# v3.2.0 +## Key Features +### Enhanced Deployment Configuration +- LISA v3.2.0 introduces a significant update to the configuration file schema, optimizing the deployment process +- The previous single config.yaml file has been replaced with a more flexible two-file system: config-base.yaml and config-custom.yaml +- config-base.yaml now contains default properties, which can be selectively overridden using config-custom.yaml, allowing for greater customization while maintaining a standardized base configuration +- The number of required properties in the config-custom.yaml file has been reduced to 8 items, simplifying the configuration process +- This update enhances the overall flexibility and maintainability of LISA configurations, providing a more robust foundation for future developments and easier customization for end-users + +#### Important Note +- The previous config.yaml file format is no longer compatible with this update +- To facilitate migration, we have developed a utility. Users can execute `npm run migrate-properties` to automatically convert their existing config.yaml file to the new config-custom.yaml format + +### Admin UI Configuration Page +- Administrative Control of Chat Components: + - Administrators now have granular control over the activation and deactivation of chat components for all users through the Configuration Page + - This feature allows for dynamic management of user interface elements, enhancing system flexibility and user experience customization + - Items that can be configured include: + - The option to delete session history + - Visibility of message metadata + - Configuration of chat Kwargs + - Customization of prompt templates + - Adjust chat history buffer settings + - Modify the number of RAG documents to be included in the retrieval process (TopK) + - Ability to upload RAG documents + - Ability to upload in-context documents +- System Banner Management: + - The Configuration Page now includes functionality for administrators to manage the system banner + - Administrators can activate, deactivate, and update the content of the system banner + +### LISA Documentation Site +- We are pleased to announce the launch of the official [LISA Documentation site](https://awslabs.github.io/LISA/) +- This comprehensive resource provides customers with additional guides and extensive information on LISA +- The documentation is also optionally deployable within your environment during LISA deployment +- The team is continuously working to add and expand content available on this site + +## Enhancements +- Implemented a selection-based interface for instance input, replacing free text entry +- Improved CDK Nag integration across stacks +- Added functionality for administrators to specify block volume size for models, enabling successful deployment of larger models +- Introduced options for administrators to choose between Private or Regional API Gateway endpoints +- Enabled subnet specification within the designated VPC for deployed resources +- Implemented support for headless deployment execution + +## Bug Fixes +- Resolved issues with Create and Update model alerts to ensure proper display in the modal +- Enhanced error handling for model creation/update processes to cover all potential scenarios + +## Coming Soon +- Version 3.3.0 will include a new RAG ingestion pipeline. This will allow users to configure an S3 bucket and an ingestion trigger. When triggered, these documents will be pre-processed and loaded into the selected vector store. + +## Acknowledgements +* @bedanley +* @estohlmann +* @dustins + +**Full Changelog**: https://github.com/awslabs/LISA/compare/v3.1.0...v3.2.0 + + # v3.1.0 ## Enhancements ### Model Management Administration diff --git a/Makefile b/Makefile index 201aa7bd..92ec4518 100644 --- a/Makefile +++ b/Makefile @@ -10,53 +10,50 @@ ################################################################################# PROJECT_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST)))) +HEADLESS = false +DOCKER_CMD ?= $(or $(CDK_DOCKER),docker) # Arguments defined through command line or config.yaml -# ENV -ifeq (${ENV},) -ENV := $(shell cat $(PROJECT_DIR)/config.yaml | yq '.env') -endif - -ifeq (${ENV},) -$(error env must be set in command line using ENV variable or config.yaml) -endif - # PROFILE (optional argument) ifeq (${PROFILE},) -TEMP_PROFILE := $(shell cat $(PROJECT_DIR)/config.yaml | yq .$(ENV).profile) +TEMP_PROFILE := $(shell cat $(PROJECT_DIR)/config-custom.yaml | yq .profile) ifneq ($(TEMP_PROFILE), null) PROFILE := ${TEMP_PROFILE} else -$(warning profile is not set in the command line using PROFILE variable or config.yaml, attempting deployment without this variable) +$(warning profile is not set in the command line using PROFILE variable or config files, attempting deployment without this variable) endif endif # DEPLOYMENT_NAME ifeq (${DEPLOYMENT_NAME},) -DEPLOYMENT_NAME := $(shell cat $(PROJECT_DIR)/config.yaml | yq .$(ENV).deploymentName) +DEPLOYMENT_NAME := $(shell cat $(PROJECT_DIR)/config-custom.yaml | yq .deploymentName) endif -ifeq (${DEPLOYMENT_NAME},) -$(error deploymentName must be set in command line using DEPLOYMENT_NAME variable or config.yaml) +ifeq (${DEPLOYMENT_NAME}, null) +DEPLOYMENT_NAME := $(shell cat $(PROJECT_DIR)/config-base.yaml | yq .deploymentName) +endif + +ifeq (${DEPLOYMENT_NAME}, null) +DEPLOYMENT_NAME := prod endif # ACCOUNT_NUMBER ifeq (${ACCOUNT_NUMBER},) -ACCOUNT_NUMBER := $(shell cat $(PROJECT_DIR)/config.yaml | yq .$(ENV).accountNumber) +ACCOUNT_NUMBER := $(shell cat $(PROJECT_DIR)/config-custom.yaml | yq .accountNumber) endif ifeq (${ACCOUNT_NUMBER},) -$(error accountNumber must be set in command line using ACCOUNT_NUMBER variable or config.yaml) +$(error accountNumber must be set in command line using ACCOUNT_NUMBER variable or config files) endif # REGION ifeq (${REGION},) -REGION := $(shell cat $(PROJECT_DIR)/config.yaml | yq .$(ENV).region) +REGION := $(shell cat $(PROJECT_DIR)/config-custom.yaml | yq .region) endif ifeq (${REGION},) -$(error region must be set in command line using REGION variable or config.yaml) +$(error region must be set in command line using REGION variable or config files) endif # URL_SUFFIX - used for the docker login @@ -66,22 +63,30 @@ else URL_SUFFIX := c2s.ic.gov endif -# Arguments defined through config.yaml +# Arguments defined through config files # APP_NAME -APP_NAME := $(shell cat $(PROJECT_DIR)/config.yaml | yq .$(ENV).appName) -ifeq (${APP_NAME},) -$(error appName must be set in config.yaml) +APP_NAME := $(shell cat $(PROJECT_DIR)/config-custom.yaml | yq .appName) +ifeq (${APP_NAME}, null) +APP_NAME := $(shell cat $(PROJECT_DIR)/config-base.yaml | yq .appName) +endif + +ifeq (${APP_NAME}, null) +APP_NAME := lisa endif # DEPLOYMENT_STAGE -DEPLOYMENT_STAGE := $(shell cat $(PROJECT_DIR)/config.yaml | yq .$(ENV).deploymentStage) -ifeq (${DEPLOYMENT_STAGE},) -$(error deploymentStage must be set in config.yaml) +DEPLOYMENT_STAGE := $(shell cat $(PROJECT_DIR)/config-custom.yaml | yq .deploymentStage) +ifeq (${DEPLOYMENT_STAGE}, null) +DEPLOYMENT_STAGE := $(shell cat $(PROJECT_DIR)/config-base.yaml | yq .deploymentStage) +endif + +ifeq (${DEPLOYMENT_STAGE}, null) +DEPLOYMENT_STAGE := prod endif # ACCOUNT_NUMBERS_ECR - AWS account numbers that need to be logged into with Docker CLI to use ECR -ACCOUNT_NUMBERS_ECR := $(shell cat $(PROJECT_DIR)/config.yaml | yq .$(ENV).accountNumbersEcr[]) +ACCOUNT_NUMBERS_ECR := $(shell cat $(PROJECT_DIR)/config-custom.yaml | yq .accountNumbersEcr[]) # Append deployed account number to array for dockerLogin rule ACCOUNT_NUMBERS_ECR := $(ACCOUNT_NUMBERS_ECR) $(ACCOUNT_NUMBER) @@ -96,10 +101,10 @@ ifneq ($(findstring $(DEPLOYMENT_STAGE),$(STACK)),$(DEPLOYMENT_STAGE)) endif # MODEL_IDS - IDs of models to deploy -MODEL_IDS := $(shell cat $(PROJECT_DIR)/config.yaml | yq '.$(ENV).ecsModels[].modelName') +MODEL_IDS := $(shell cat $(PROJECT_DIR)/config-custom.yaml | yq '.ecsModels[].modelName') # MODEL_BUCKET - S3 bucket containing model artifacts -MODEL_BUCKET := $(shell cat $(PROJECT_DIR)/config.yaml | yq '.$(ENV).s3BucketModels') +MODEL_BUCKET := $(shell cat $(PROJECT_DIR)/config-custom.yaml | yq '.s3BucketModels') ################################################################################# @@ -146,14 +151,13 @@ installTypeScriptRequirements: ## Make sure Docker is running dockerCheck: - @cmd_output=$$(docker ps); \ - if \ - [ $$? != 0 ]; \ - then \ - echo $$cmd_output; \ - exit 1; \ + @cmd_output=$$($(DOCKER_CMD) ps); \ + if [ $$? != 0 ]; then \ + echo "Process $(DOCKER_CMD) is not running. Exiting..."; \ + exit 1; \ fi; \ + ## Check if models are uploaded modelCheck: @$(foreach MODEL_ID,$(MODEL_IDS), \ @@ -180,7 +184,7 @@ modelCheck: echo "What is your huggingface access token? "; \ read -s access_token; \ echo "Converting and uploading safetensors for model: $(MODEL_ID)"; \ - tgiImage=$$(yq -r '[.$(ENV).ecsModels[] | select(.inferenceContainer == "tgi") | .baseImage] | first' $(PROJECT_DIR)/config.yaml); \ + tgiImage=$$(yq -r '[.ecsModels[] | select(.inferenceContainer == "tgi") | .baseImage] | first' $(PROJECT_DIR)/config-custom.yaml); \ echo $$tgiImage; \ $(PROJECT_DIR)/scripts/convert-and-upload-model.sh -m $(MODEL_ID) -s $(MODEL_BUCKET) -a $$access_token -t $$tgiImage -d $$localModelDir; \ fi; \ @@ -225,104 +229,58 @@ cleanMisc: dockerLogin: dockerCheck ifdef PROFILE @$(foreach ACCOUNT,$(ACCOUNT_NUMBERS_ECR), \ - aws ecr get-login-password --region ${REGION} --profile ${PROFILE} | docker login --username AWS --password-stdin $(ACCOUNT).dkr.ecr.${REGION}.${URL_SUFFIX} >/dev/null 2>&1; \ + aws ecr get-login-password --region ${REGION} --profile ${PROFILE} | $(DOCKER_CMD) login --username AWS --password-stdin ${ACCOUNT}.dkr.ecr.${REGION}.${URL_SUFFIX} >/dev/null 2>&1; \ ) else @$(foreach ACCOUNT,$(ACCOUNT_NUMBERS_ECR), \ - aws ecr get-login-password --region ${REGION} | docker login --username AWS --password-stdin $(ACCOUNT).dkr.ecr.${REGION}.${URL_SUFFIX} >/dev/null 2>&1; \ + aws ecr get-login-password --region ${REGION} | $(DOCKER_CMD) login --username AWS --password-stdin ${ACCOUNT}.dkr.ecr.${REGION}.${URL_SUFFIX} >/dev/null 2>&1; \ ) endif + listStacks: @npx cdk list buildEcsDeployer: @cd ./ecs_model_deployer && npm install && npm run build +define print_config + @printf "\n \ + DEPLOYING $(STACK) STACK APP INFRASTRUCTURE \n \ + -----------------------------------\n \ + Account Number $(ACCOUNT_NUMBER)\n \ + Region $(REGION)\n \ + App Name $(APP_NAME)\n \ + Deployment Stage $(DEPLOYMENT_STAGE)\n \ + Deployment Name $(DEPLOYMENT_NAME)" + @if [ -n "$(PROFILE)" ]; then \ + printf "\n Deployment Profile $(PROFILE)"; \ + fi + @printf "\n-----------------------------------\n" +endef + ## Deploy all infrastructure deploy: dockerCheck dockerLogin cleanMisc modelCheck buildEcsDeployer -ifdef PROFILE - @printf "\n \ - DEPLOYING $(STACK) STACK APP INFRASTRUCTURE \n \ - -----------------------------------\n \ - Deployment Profile ${PROFILE}\n \ - Account Number ${ACCOUNT_NUMBER}\n \ - Region ${REGION}\n \ - App Name ${APP_NAME}\n \ - Deployment Stage ${DEPLOYMENT_STAGE}\n \ - Deployment Name ${DEPLOYMENT_NAME}\n \ - -----------------------------------\n \ - Is the configuration correct? [y/N] "\ - && read confirm_config &&\ - if \ - [ $${confirm_config:-'N'} = 'y' ]; \ - then \ - npx cdk deploy ${STACK} \ - --profile ${PROFILE} \ - -c ${ENV}='$(shell echo '${${ENV}}')'; \ - fi; + $(call print_config) +ifneq (,$(findstring true, $(HEADLESS))) + npx cdk deploy ${STACK} $(if $(PROFILE),--profile ${PROFILE}) --require-approval never -c ${ENV}='$(shell echo '${${ENV}}')'; else - @printf "\n \ - DEPLOYING $(STACK) STACK APP INFRASTRUCTURE \n \ - -----------------------------------\n \ - Account Number ${ACCOUNT_NUMBER}\n \ - Region ${REGION}\n \ - App Name ${APP_NAME}\n \ - Deployment Stage ${DEPLOYMENT_STAGE}\n \ - Deployment Name ${DEPLOYMENT_NAME}\n \ - -----------------------------------\n \ - Is the configuration correct? [y/N] "\ + @printf "Is the configuration correct? [y/N] "\ && read confirm_config &&\ - if \ - [ $${confirm_config:-'N'} = 'y' ]; \ - then \ - npx cdk deploy ${STACK} \ - -c ${ENV}='$(shell echo '${${ENV}}')'; \ + if [ $${confirm_config:-'N'} = 'y' ]; then \ + npx cdk deploy ${STACK} $(if $(PROFILE),--profile ${PROFILE}) -c ${ENV}='$(shell echo '${${ENV}}')'; \ fi; endif ## Tear down all infrastructure destroy: cleanMisc -ifdef PROFILE - @printf "\n \ - DESTROYING $(STACK) STACK APP INFRASTRUCTURE \n \ - -----------------------------------\n \ - Deployment Profile ${PROFILE}\n \ - Account Number ${ACCOUNT_NUMBER}\n \ - Region ${REGION}\n \ - App Name ${APP_NAME}\n \ - Deployment Stage ${DEPLOYMENT_STAGE}\n \ - Deployment Name ${DEPLOYMENT_NAME}\n \ - -----------------------------------\n \ - Is the configuration correct? [y/N] "\ + $(call print_config) + @printf "Is the configuration correct? [y/N] "\ && read confirm_config &&\ - if \ - [ $${confirm_config:-'N'} = 'y' ]; \ - then \ - npx cdk destroy ${STACK} \ - --force \ - --profile ${PROFILE}; \ + if [ $${confirm_config:-'N'} = 'y' ]; then \ + npx cdk destroy ${STACK} --force $(if $(PROFILE),--profile ${PROFILE}); \ fi; -else - @printf "\n \ - DESTROYING $(STACK) STACK APP INFRASTRUCTURE \n \ - -----------------------------------\n \ - Account Number ${ACCOUNT_NUMBER}\n \ - Region ${REGION}\n \ - App Name ${APP_NAME}\n \ - Deployment Stage ${DEPLOYMENT_STAGE}\n \ - Deployment Name ${DEPLOYMENT_NAME}\n \ - -----------------------------------\n \ - Is the configuration correct? [y/N] "\ - && read confirm_config &&\ - if \ - [ $${confirm_config:-'N'} = 'y' ]; \ - then \ - npx cdk destroy ${STACK} \ - --force; \ - fi; -endif ################################################################################# diff --git a/README.md b/README.md index e8dec549..56f1b5ab 100644 --- a/README.md +++ b/README.md @@ -1,1307 +1,83 @@ # LLM Inference Solution for Amazon Dedicated Cloud (LISA) -![LISA Architecture](./assets/LisaArchitecture.png) -LISA is an infrastructure-as-code solution that supports model hosting and inference. Customers deploy LISA directly -into an AWS account and provision their own infrastructure. Customers bring their own models to LISA for hosting and -inference through Amazon ECS. LISA accelerates the use of Generative AI (GenAI) applications by providing scalable, -low latency access to customers’ generative LLMs and embedding language models. Customers can then focus on -experimenting with LLMs and developing GenAI applications. -LISA’s chatbot user interface can be used for experiment with features and for production use cases. LISA enhances model -output by integrating retrieval-augmented generation (RAG) with Amazon OpenSearch or PostgreSQL’s PGVector extension, -incorporating external knowledge sources into model responses. This helps reduce the need for fine-tuning and delivers -more contextually relevant outputs. - -LISA supports OpenAI’s API Spec via the LiteLLM proxy. This means that LISA is compatible for customers to configure -with models hosted externally by supported model providers. LiteLLM also allows customers to use LISA to standardize -model orchestration and communication across model providers instead of managing each individually. With OpenAI API spec -support, LISA can also be used as a stand-in replacement for any application that already utilizes OpenAI-centric -tooling (ex: OpenAI’s Python library, LangChain). - ---- -# Table of Contents - -- [LISA (LLM Inference Solution for Amazon Dedicated Cloud)](#lisa-llm-inference-solution-for-amazon-dedicated-cloud) -- [Breaking Changes in v2 to v3 Migration](#breaking-changes-in-v2-to-v3-migration) -- [Background](#background) -- [System Overview](#system-overview) -- [LISA Components](#lisa-components) - - [LISA Model Management](#lisa-model-management) - - [LISA Serve](#lisa-serve) - - [LISA Chat](#lisa-chat) -- [Interaction Flow](#interaction-flow) -- [Getting Started with LISA](#getting-started-with-lisa) - - [Prerequisites](#prerequisites) - - [Step 1: Clone the Repository](#step-1-clone-the-repository) - - [Step 2: Set Up Environment Variables](#step-2-set-up-environment-variables) - - [Step 3: Set Up Python and TypeScript Environments](#step-3-set-up-python-and-typescript-environments) - - [Step 4: Configure LISA](#step-4-configure-lisa) - - [Step 5: Stage Model Weights](#step-5-stage-model-weights) - - [Step 6: Configure Identity Provider](#step-6-configure-identity-provider) - - [Step 7: Configure LiteLLM](#step-7-configure-litellm) - - [Step 8: Set Up SSL Certificates (Development Only)](#step-8-set-up-ssl-certificates-development-only) - - [Step 9: Customize Model Deployment](#step-9-customize-model-deployment) - - [Step 10: Bootstrap CDK (If Not Already Done)](#step-10-bootstrap-cdk-if-not-already-done) -- [Recommended LiteLLM Configuration Options](#recommended-litellm-configuration-options) -- [API Usage Overview](#api-usage-overview) - - [User-facing OpenAI-Compatible API](#user-facing-openai-compatible-api) - - [Admin-level Model Management API](#admin-level-model-management-api) -- [Error Handling for API Requests](#error-handling-for-api-requests) -- [Deployment](#deployment) - - [Using Pre-built Resources](#using-pre-built-resources) - - [Deploying](#deploying) -- [Programmatic API Tokens](#programmatic-api-tokens) -- [Model Compatibility](#model-compatibility) -- [Chatbot Example](#chatbot-example) -- [Usage and Features](#usage-and-features) - - [OpenAI Specification Compatibility](#openai-specification-compatibility) - - [Continue JetBrains and VS Code Plugin](#continue-jetbrains-and-vs-code-plugin) - - [Usage in LLM Libraries](#usage-in-llm-libraries) -- [License Notice](#license-notice) - ---- -# Breaking Changes - -## v2 to v3 Migration - -With the release of LISA v3.0.0, we have introduced several architectural changes that are incompatible with previous versions. Although these changes may cause some friction for existing users, they aim to simplify the deployment experience and enhance long-term scalability. The following breaking changes are critical for existing users planning to upgrade: - -1. Model Deletion Upon Upgrade: Models deployed via EC2 and ECS using the config.yaml file’s ecsModels list will be deleted during the upgrade process. LISA has migrated to a new model deployment system that manages models internally, rendering the ecsModels list obsolete. We recommend backing up your model settings to facilitate their redeployment through the new Model Management API with minimal downtime. -1. Networking Changes and Full Teardown: Core networking changes require a complete teardown of the existing LISA installation using the make destroy command before upgrading. Cross-stack dependencies have been modified, necessitating this full teardown to ensure proper application of the v3 infrastructure changes. Additionally, users may need to manually delete some resources, such as ECR repositories or S3 buckets, if they were populated before CloudFormation began deleting the stack. This operation is destructive and irreversible, so it is crucial to back up any critical configurations and data (e.g., S3 RAG bucket contents, DynamoDB token tables) before proceeding with the upgrade. -1. New LiteLLM Admin Key Requirement: The new Model Management API requires an "admin" key for LiteLLM to track models for inference requests. This key, while transparent to users, must be present and conform to the required format (starting with sk-). The key is defined in the config.yaml file, and the LISA schema validator will prompt an error if it is missing or incorrectly formatted. - -## v3.0.0 to v3.1.0 - -In preparation of the v3.1.0 release, there are several changes that we needed to make in order to ensure the stability of the LISA system. -1. The CreateModel API `containerConfig` object has been changed so that the Docker Image repository is listed in `containerConfig.image.baseImage` instead of - its previous location at `containerConfig.baseImage.baseImage`. This change makes the configuration consistent with the config.yaml file in LISA v2.0 and prior. -2. The CreateModel API `containerConfig.image` object no longer requires the `path` option. We identified that this was a confusing and redundant option to set, considering - that the path was based on the LISA code repository structure, and that we already had an option to specify if a model was using TGI, TEI, or vLLM. Specifying the `inferenceContainer` - is sufficient for the system to infer which files to use so that the user does not have to provide this information. -3. The ApiDeployment stack now follows the same naming convention as the rest of the stacks that we deploy, utilization the deployment name and the deploymentStage names. This allows users - to have multiple LISA installations with different parameters in the same account without needing to change region or account entirely. After successful deployment, you may safely delete the - previous `${deploymentStage}-LisaApiDeployment` stack, as it is no longer in use. -4. If you have installed v3.0.0 or v3.0.1, you will need to **delete** the Models API stack so that the model deployer function will deploy again. The function was converted to a Docker Image - Function so that the growing Function size would fit within the Lambda constraints. We recommend that you take the following actions to avoid leaked resources: - 1. Use the Model Management UI to **delete all models** from LISA. This is needed so that we delete any CloudFormation stacks that track GPU instances. Failure to do this will require manual - resource cleanup to rid the account of inaccessible EC2 instances. Once the Models DynamoDB Table is deleted, we do not have a programmatic way to re-reference deployed models, so that is - why we recommend deleting them first. - 2. **Only after deleting all models through the Model Management UI**, manually delete the Model Management API stack in CloudFormation. This will take at least 45 minutes due to Lambda's use - of Elastic Network Interfaces for VPC access. The stack name will look like: `${deployment}-lisa-models-${deploymentStage}`. - 3. After the stack has been deleted, deploy LISA v3.1.0, which will recreate the Models API stack, along with the Docker Lambda Function. -5. The `ecsModels` section of `config.yaml` has been stripped down to only 3 fields per model: `modelName`, `inferenceContainer`, and `baseImage`. Just as before, the system will check to see if the models - defined here exist in your models S3 bucket prior to LISA deployment. These values will be needed later when invoking the Model Management API to create a model. ---- - -## Background - -LISA is a robust, AWS-native platform designed to simplify the deployment and management of Large Language Models (LLMs) in scalable, secure, and highly available environments. Drawing inspiration from the AWS open-source project [aws-genai-llm-chatbot](https://github.com/aws-samples/aws-genai-llm-chatbot), LISA builds on this foundation by offering more specialized functionality, particularly in the areas of security, modularity, and flexibility. - -One of the key differentiators of LISA is its ability to leverage the [text-generation-inference](https://github.com/huggingface/text-generation-inference/tree/main) text-generation-inference container from HuggingFace, allowing users to deploy cutting-edge LLMs. LISA also introduces several innovations that extend beyond its inspiration: - -1. **Support for Amazon Dedicated Cloud (ADC):** LISA is designed to operate in highly controlled environments like Amazon Dedicated Cloud (ADC) partitions, making it ideal for industries with stringent regulatory and security requirements. This focus on secure, isolated deployments differentiates LISA from other open-source platforms. -1. **Modular Design for Composability:** LISA's architecture is designed to be composable, splitting its components into distinct services. The core components, LISA Serve (for LLM serving and inference) and LISA Chat (for the chat interface), can be deployed as independent stacks. This modularity allows users to deploy only the parts they need, enhancing flexibility and scalability across different deployment environments. -1. **OpenAI API Specification Support:** LISA is built to support the OpenAI API specification, allowing users to replace OpenAI’s API with LISA without needing to change existing application code. This makes LISA a drop-in replacement for any workflow or application that already leverages OpenAI’s tooling, such as the OpenAI Python library or LangChain. - ---- - -## System Overview - -LISA is designed using a modular, microservices-based architecture, where each service performs a distinct function. It is composed of three core components: LISA Model Management, LISA Serve, and LISA Chat. Each of these components is responsible for specific functionality and interacts via well-defined API endpoints to ensure scalability, security, and fault tolerance across the system. - -**Key System Functionalities:** - -* **Authentication and Authorization** via AWS Cognito or OpenID Connect (OIDC) providers, ensuring secure access to both the REST API and Chat UI through token-based authentication and role-based access control. -* **Model Hosting** on AWS ECS with autoscaling and efficient traffic management using Application Load Balancers (ALBs), providing scalable and high-performance model inference. -* **Model Management** using AWS Step Functions to orchestrate complex workflows for creating, updating, and deleting models, automatically managing underlying ECS infrastructure. -* **Inference Requests** served via both the REST API and the Chat UI, dynamically routing user inputs to the appropriate ECS-hosted models for real-time inference. -* **Chat Interface** enabling users to interact with LISA through a user-friendly web interface, offering seamless real-time model interaction and session continuity. -* **Retrieval-Augmented Generation (RAG) Operations**, leveraging either OpenSearch or PGVector for efficient retrieval of relevant external data to enhance model responses. - ---- - -## LISA Components - -### LISA Model Management -![LISA Model Management Architecture](./assets/LisaModelManagement.png) -The Model Management component is responsible for managing the entire lifecycle of models in LISA. This includes creation, updating, deletion, and scaling of models deployed on ECS. The system automates and scales these operations, ensuring that the underlying infrastructure is managed efficiently. - -* **Model Hosting**: Models are containerized and deployed on AWS ECS, with each model hosted in its own isolated ECS task. This design allows models to be independently scaled based on demand. Traffic to the models is balanced using Application Load Balancers (ALBs), ensuring that the autoscaling mechanism reacts to load fluctuations in real time, optimizing both performance and availability. -* **External Model Routing**: LISA utilizes the LiteLLM proxy to route traffic to different model providers, no matter their API and payload format. Users may add models from external providers, such as SageMaker or Bedrock, to their system to allow requests to models hosted in those systems and services. LISA will simply add the configuration to LiteLLM without creating any additional supporting infrastructure. -* **Model Lifecycle Management**: AWS Step Functions are used to orchestrate the lifecycle of models, handling the creation, update, and deletion workflows. Each workflow provisions the required resources using CloudFormation templates, which manage infrastructure components like EC2 instances, security groups, and ECS services. The system ensures that the necessary security, networking, and infrastructure components are automatically deployed and configured. - * The CloudFormation stacks define essential resources using the LISA core VPC configuration, ensuring best practices for security and access across all resources in the environment. - * DynamoDB stores model metadata, while Amazon S3 securely manages model weights, enabling ECS instances to retrieve the weights dynamically during deployment. - -#### Technical Implementation - -* **Model Lifecycle**: Lifecycle operations such as creation, update, and deletion are executed by Step Functions and backed by AWS Lambda in ```lambda/models/lambda_functions.py```. -* **CloudFormation**: Infrastructure components are provisioned using CloudFormation templates, as defined in ```ecs_model_deployer/src/lib/lisa_model_stack.ts```. -* **ECS Cluster**: ECS cluster and task definitions are located in ```ecs_model_deployer/src/lib/ecsCluster.ts```, with model containers specified in ```ecs_model_deployer/src/lib/ecs-model.ts```. - ---- - -### LISA Serve -![LISA Serve Architecture](./assets/LisaServe.png) -LISA Serve is responsible for processing inference requests and serving model predictions. This component manages user requests to interact with LLMs and ensures that the models deliver low-latency responses. - -* **Inference Requests**: Requests are routed via ALB, which serves as the main entry point to LISA’s backend infrastructure. The ALB forwards requests to the appropriate ECS-hosted model or externally-hosted model based on the request parameters. For models hosted within LISA, traffic to the models is managed with model-specific ALBs, which enable autoscaling if the models are under heavy load. LISA supports both direct REST API-based interaction and interaction through the Chat UI, enabling programmatic access or a user-friendly chat experience. -* **RAG (Retrieval-Augmented Generation)**: RAG operations enhance model responses by integrating external data sources. LISA leverages OpenSearch or PGVector (PostgreSQL) as vector stores, enabling vector-based search and retrieval of relevant knowledge to augment LLM outputs dynamically. - -#### Technical Implementation - -* RAG operations are managed through ```lambda/rag/lambda_functions.py```, which handles embedding generation and document retrieval via OpenSearch and PostgreSQL. -* Direct requests to the LISA Serve ALB entrypoint must utilize the OpenAI API spec, which we support through the use of the LiteLLM proxy. - ---- - -### LISA Chat -![LISA Chatbot Architecture](./assets/LisaChat.png) -LISA Chat provides a customizable chat interface that enables users to interact with models in real-time. This component ensures that users have a seamless experience for submitting queries and maintaining session continuity. - -* **Chat Interface**: The Chat UI is hosted as a static website on Amazon S3 and is served via API Gateway. Users can interact with models directly through the web-based frontend, sending queries and viewing real-time responses from the models. The interface is integrated with LISA's backend services for model inference, retrieval augmented generation, and session management. -* **Session History Management**: LISA maintains session histories using DynamoDB, allowing users to retrieve and continue previous conversations seamlessly. This feature is crucial for maintaining continuity in multi-turn conversations with the models. - -#### Technical Implementation - -* The Chat UI is implemented in the ```lib/user-interface/react/``` folder and is deployed using the scripts in the ```scripts/``` folder. -* Session management logic is handled in ```lambda/session/lambda_functions.py```, where session data is stored and retrieved from DynamoDB. -* RAG operations are defined in lambda/repository/lambda_functions.py - ---- - -## Interaction Flow - -1. **User Interaction with Chat UI or API:** Users can interact with LISA through the Chat UI or REST API. Each interaction is authenticated using AWS Cognito or OIDC, ensuring secure access. -1. **Request Routing:** The API Gateway securely routes user requests to the appropriate backend services, whether for fetching the chat UI, performing RAG operations, or managing models. -1. **Model Management:** Administrators can deploy, update, or delete models via the Model Management API, which triggers ECS deployment and scaling workflows. -1. **Model Inference:** Inference requests are routed to ECS-hosted models or external models via the LiteLLM proxy. Responses are served back to users through the ALB. -1. **RAG Integration:** When RAG is enabled, LISA retrieves relevant documents from OpenSearch or PGVector, augmenting the model's response with external knowledge. -1. **Session Continuity:** User session data is stored in DynamoDB, ensuring that users can retrieve and continue previous conversations across multiple interactions. -1. **Autoscaling:** ECS tasks automatically scale based on system load, with ALBs distributing traffic across available instances to ensure performance. - ---- - -# Getting Started with LISA - -LISA (LLM Inference Solution for Amazon Dedicated Cloud) is an advanced infrastructure solution for deploying and -managing Large Language Models (LLMs) on AWS. This guide will walk you through the setup process, from prerequisites -to deployment. - -## Prerequisites - -Before beginning, ensure you have: - -1. An AWS account with appropriate permissions. - 1. Because of all the resource creation that happens as part of CDK deployments, we expect Administrator or Administrator-like permissions with resource creation and mutation permissions. - Installation will not succeed if this profile does not have permissions to create and edit arbitrary resources for the system. - **Note**: This level of permissions is not required for the runtime of LISA, only its deployment and subsequent updates. -2. AWS CLI installed and configured -3. Familiarity with AWS Cloud Development Kit (CDK) and infrastructure-as-code principles -4. Python 3.9 or later -5. Node.js 14 or later -6. Docker installed and running -7. Sufficient disk space for model downloads and conversions - -If you're new to CDK, review the [AWS CDK Documentation](https://docs.aws.amazon.com/cdk/v2/guide/getting_started.html) and consult with your AWS support team. - -> [!TIP] -> To minimize version conflicts and ensure a consistent deployment environment, it is recommended to execute the following steps on a dedicated EC2 instance. However, LISA can be deployed from any machine that meets the prerequisites listed above. - ---- - -## Step 1: Clone the Repository - -Ensure you're working with the latest stable release of LISA: - -```bash -git clone -b main --single-branch -cd lisa -``` - ---- - -## Step 2: Set Up Environment Variables - -Create and configure your `config.yaml` file: - -```bash -cp example_config.yaml config.yaml -``` - -Set the following environment variables: - -```bash -export PROFILE=my-aws-profile # Optional, can be left blank -export DEPLOYMENT_NAME=my-deployment -export ENV=dev # Options: dev, test, or prod -``` - ---- - -## Step 3: Set Up Python and TypeScript Environments - -Install system dependencies and set up both Python and TypeScript environments: - -```bash -# Install system dependencies -sudo apt-get update -sudo apt-get install -y jq - -# Install Python packages -pip3 install --user --upgrade pip -pip3 install yq huggingface_hub s5cmd - -# Set up Python environment -make createPythonEnvironment - -# Activate your python environment -# The command is the output from the previous make command) - -# Install Python Requirements -make installPythonRequirements - -# Set up TypeScript environment -make createTypeScriptEnvironment -make installTypeScriptRequirements -``` - ---- - -## Step 4: Configure LISA - -Edit the `config.yaml` file to customize your LISA deployment. Key configurations include: - -- AWS account and region settings -- Model configurations -- Authentication settings -- Networking and infrastructure preferences - ---- - -## Step 5: Stage Model Weights - -LISA requires model weights to be staged in the S3 bucket specified in your `config.yaml` file, assuming the S3 bucket follows this structure: - -``` -s3:/// -s3://// -s3://// -... -s3:/// -``` - -**Example:** - -``` -s3:///mistralai/Mistral-7B-Instruct-v0.2 -s3:///mistralai/Mistral-7B-Instruct-v0.2/ -s3:///mistralai/Mistral-7B-Instruct-v0.2/ -... -``` - -To automatically download and stage the model weights defined by the `ecsModels` parameter in your `config.yaml`, use the following command: - -```bash -make modelCheck -``` - -This command verifies if the model's weights are already present in your S3 bucket. If not, it downloads the weights, converts them to the required format, and uploads them to your S3 bucket. Ensure adequate disk space is available for this process. - -> **WARNING** -> As of LISA 3.0, the `ecsModels` parameter in `config.yaml` is solely for staging model weights in your S3 bucket. Previously, before models could be managed through the [API](https://github.com/awslabs/LISA/blob/develop/README.md#creating-a-model-admin-api) or via the Model Management section of the [Chatbot](https://github.com/awslabs/LISA/blob/develop/README.md#chatbot-example), this parameter also dictated which models were deployed. - -> **NOTE** -> For air-gapped systems, before running `make modelCheck` you should manually download model artifacts and place them in a `models` directory at the project root, using the structure: `models/`. - -> **NOTE** -> This process is primarily designed and tested for HuggingFace models. For other model formats, you will need to manually create and upload safetensors. - ---- - -## Step 6: Configure Identity Provider - -In the `config.yaml` file, configure the `authConfig` block for authentication. LISA supports OpenID Connect (OIDC) providers such as AWS Cognito or Keycloak. Required fields include: - -- `authority`: URL of your identity provider -- `clientId`: Client ID for your application -- `adminGroup`: Group name for users with model management permissions -- `jwtGroupsProperty`: Path to the groups field in the JWT token -- `additionalScopes` (optional): Extra scopes for group membership information - -#### Cognito Configuration Example: -In Cognito, the `authority` will be the URL to your User Pool. As an example, if your User Pool ID, not the name, is `us-east-1_example`, and if it is -running in `us-east-1`, then the URL to put in the `authority` field would be `https://cognito-idp.us-east-1.amazonaws.com/us-east-1_example`. The `clientId` -can be found in your User Pool's "App integration" tab from within the AWS Management Console, and at the bottom of the page, you will see the list of clients -and their associated Client IDs. The ID here is what we will need for the `clientId` field. - - -```yaml -authConfig: - authority: https://cognito-idp.us-east-1.amazonaws.com/us-east-1_example - clientId: your-client-id - adminGroup: AdminGroup - jwtGroupsProperty: cognito:groups -``` - -#### Keycloak Configuration Example: -In Keycloak, the `authority` will be the URL to your Keycloak server. The `clientId` is likely not a random string like in the Cognito clients, and instead -will be a string configured by your Keycloak administrator. Your administrator will be able to give you a client name or create a client for you to use for -this application. Once you have this string, use that as the `clientId` within the `authConfig` block. - -```yaml -authConfig: - authority: https://your-keycloak-server.com - clientId: your-client-name - adminGroup: AdminGroup - jwtGroupsProperty: realm_access.roles -``` - ---- - -## Step 7: Configure LiteLLM -We utilize LiteLLM under the hood to allow LISA to respond to the [OpenAI specification](https://platform.openai.com/docs/api-reference). -For LiteLLM configuration, a key must be set up so that the system may communicate with a database for tracking all the models that are added or removed -using the [Model Management API](#admin-level-model-management-api). The key must start with `sk-` and then can be any arbitrary string. We recommend generating a new UUID and then using that as -the key. Configuration example is below. - - -```yaml -litellmConfig: - general_settings: - master_key: sk-00000000-0000-0000-0000-000000000000 # needed for db operations, create your own key # pragma: allowlist-secret - model_list: [] -``` - -**Note**: It is possible to add LiteLLM-only models to this configuration, but it is not recommended as the models in this configuration will not show in the -Chat or Model Management UIs. Instead, use the [Model Management UI](#admin-level-model-management-api) to add or remove LiteLLM-only model configurations. - ---- - -## Step 8: Set Up SSL Certificates (Development Only) - -**WARNING: THIS IS FOR DEV ONLY** -When deploying for dev and testing you can use a self-signed certificate for the REST API ALB. You can create this by using the script: `gen-cert.sh` and uploading it to `IAM`. - -```bash -export REGION= -./scripts/gen-certs.sh -aws iam upload-server-certificate --server-certificate-name --certificate-body file://scripts/server.pem --private-key file://scripts/server.key -``` - -Update your `config.yaml` with the certificate ARN: - -```yaml -restApiConfig: - loadBalancerConfig: - sslCertIamArn: arn:aws:iam:::server-certificate/ -``` ---- - -## Step 9: Customize Model Deployment - -In the `ecsModels` section of `config.yaml`, allow our deployment process to pull the model weights for you. - -During the deployment process, LISA will optionally attempt to download your model weights if you specify an optional `ecsModels` -array, this will only work in non ADC regions. Specifically, see the `ecsModels` section of the [example_config.yaml](./example_config.yaml) file. -Here we define the model name, inference container, and baseImage: - -```yaml -ecsModels: - - modelName: your-model-name - inferenceContainer: tgi - baseImage: ghcr.io/huggingface/text-generation-inference:2.0.1 -``` - ---- - -## Step 10: Bootstrap CDK (If Not Already Done) - -If you haven't bootstrapped your AWS account for CDK: - -```bash -make bootstrap -``` - ---- - -## Recommended LiteLLM Configuration Options - -While LISA is designed to be flexible, configuring external models requires careful consideration. The following guide -provides a recommended minimal setup for integrating various model types with LISA using LiteLLM. - -### Configuration Overview - -This example configuration demonstrates how to set up: -1. A SageMaker Endpoint -2. An Amazon Bedrock Model -3. A self-hosted OpenAI-compatible text generation model -4. A self-hosted OpenAI-compatible embedding model - -**Note:** Ensure that all endpoints and models are in the same AWS region as your LISA installation. - -### SageMaker Endpoints and Bedrock Models - -LISA supports adding existing SageMaker Endpoints and Bedrock Models to the LiteLLM configuration. As long as these -services are in the same region as the LISA installation, LISA can use them alongside any other deployed models. - -**To use a SageMaker Endpoint:** -1. Install LISA without initially referencing the SageMaker Endpoint. -2. Create a SageMaker Model using the private subnets of the LISA deployment. -3. This setup allows the LISA REST API container to communicate with any Endpoint using that SageMaker Model. - -**SageMaker Endpoints and Bedrock Models can be configured:** -- Statically at LISA deployment time -- Dynamically using the LISA Model Management API - -**Important:** Endpoints or Models statically defined during LISA deployment cannot be removed or updated using the -LISA Model Management API, and they will not show in the Chat UI. These will only show as part of the OpenAI `/models` API. -Although there is support for it, we recommend using the [Model Management API](#admin-level-model-management-api) instead of the following static configuration. - -### Example Configuration - -```yaml -dev: - litellmConfig: - litellm_settings: - telemetry: false # Disable telemetry to LiteLLM servers (recommended for VPC deployments) - drop_params: true # Ignore unrecognized parameters instead of failing - - model_list: - # 1. SageMaker Endpoint Configuration - - model_name: test-endpoint # Human-readable name, can be anything and will be used for OpenAI API calls - litellm_params: - model: sagemaker/test-endpoint # Prefix required for SageMaker Endpoints and "test-endpoint" matches Endpoint name - api_key: ignored # Provide an ignorable placeholder key to avoid LiteLLM deployment failures - lisa_params: - model_type: textgen - streaming: true - - # 2. Amazon Bedrock Model Configuration - - model_name: bedrock-titan-express # Human-readable name for future OpenAI API calls - litellm_params: - model: bedrock/amazon.titan-text-express-v1 # Prefix required for Bedrock Models, and exact name of Model to use - api_key: ignored # Provide an ignorable placeholder key to avoid LiteLLM deployment failures - lisa_params: - model_type: textgen - streaming: true - - # 3. Custom OpenAI-compatible Text Generation Model - - model_name: custom-openai-model # Used in future OpenAI-compatible calls to LiteLLM - litellm_params: - model: openai/custom-provider/textgen-model # Format: openai// - api_base: https://your-domain-here:443/v1 # Your model's base URI - api_key: ignored # Provide an ignorable placeholder key to avoid LiteLLM deployment failures - lisa_params: - model_type: textgen - streaming: true - - # 4. Custom OpenAI-compatible Embedding Model - - model_name: custom-openai-embedding-model # Used in future OpenAI-compatible calls to LiteLLM - litellm_params: - model: openai/modelProvider/modelName # Prefix required for OpenAI-compatible models followed by model provider and name details - api_base: https://your-domain-here:443/v1 # Your model's base URI - api_key: ignored # Provide an ignorable placeholder key to avoid LiteLLM deployment failures - lisa_params: - model_type: embedding -``` - ---- - -# API Usage Overview - -LISA provides robust API endpoints for managing models, both for users and administrators. These endpoints allow for operations such as listing, creating, updating, and deleting models. - -## API Gateway and ALB Endpoints - -LISA uses two primary APIs for model management: - -1. **User-facing OpenAI-Compatible API**: Available to all users for inference tasks and accessible through the LISA Serve ALB. This API provides an interface for querying and interacting with models deployed on Amazon ECS, Amazon Bedrock, or through LiteLLM. -2. **Admin-level Model Management API**: Available only to administrators through the API Gateway (APIGW). This API allows for full control of model lifecycle management, including creating, updating, and deleting models. - -### LiteLLM Routing in All Models - -Every model request is routed through LiteLLM, regardless of whether infrastructure (like ECS) is created for it. Whether deployed on ECS, external models via Bedrock, or managed through LiteLLM, all models are added to LiteLLM for traffic routing. The distinction is whether infrastructure is created (determined by request payloads), but LiteLLM integration is consistent for all models. The model management APIs will handle adding or removing model configurations from LiteLLM, and the LISA Serve endpoint will handle the inference requests against models available in LiteLLM. - -## User-facing OpenAI-Compatible API - -The OpenAI-compatible API is accessible through the LISA Serve ALB and allows users to list models available for inference tasks. Although not specifically part of the model management APIs, any model that is added or removed from LiteLLM via the model management API Gateway APIs will be reflected immediately upon queries to LiteLLM through the LISA Serve ALB. - -### Listing Models - -The `/v2/serve/models` endpoint on the LISA Serve ALB allows users to list all models available for inference in the LISA system. - -#### Request Example: - -```bash -curl -s -H 'Authorization: Bearer ' -X GET https:///v2/serve/models -``` - -#### Response Example: - -```json -{ - "data": [ - { - "id": "bedrock-embed-text-v2", - "object": "model", - "created": 1677610602, - "owned_by": "openai" - }, - { - "id": "titan-express-v1", - "object": "model", - "created": 1677610602, - "owned_by": "openai" - }, - { - "id": "sagemaker-amazon-mistrallite", - "object": "model", - "created": 1677610602, - "owned_by": "openai" - } - ], - "object": "list" -} -``` - -#### Explanation of Response Fields: - -These fields are all defined by the OpenAI API specification, which is documented [here](https://platform.openai.com/docs/api-reference/models/list). - -- `id`: A unique identifier for the model. -- `object`: The type of object, which is "model" in this case. -- `created`: A Unix timestamp representing when the model was created. -- `owned_by`: The entity responsible for the model, such as "openai." - -## Admin-level Model Management API - -This API is only accessible by administrators via the API Gateway and is used to create, update, and delete models. It supports full model lifecycle management. - -### Listing Models (Admin API) - -The `/models` route allows admins to list all models managed by the system. This includes models that are either creating, deleting, already active, or in a failed state. Models can be deployed via ECS or managed externally through a LiteLLM configuration. - -#### Request Example: - -```bash -curl -s -H "Authorization: Bearer " -X GET https:///models -``` - -#### Response Example: - -```json -{ - "models": [ - { - "autoScalingConfig": { - "minCapacity": 1, - "maxCapacity": 1, - "cooldown": 420, - "defaultInstanceWarmup": 180, - "metricConfig": { - "albMetricName": "RequestCountPerTarget", - "targetValue": 30, - "duration": 60, - "estimatedInstanceWarmup": 330 - } - }, - "containerConfig": { - "image": { - "baseImage": "vllm/vllm-openai:v0.5.0", - "type": "asset" - }, - "sharedMemorySize": 2048, - "healthCheckConfig": { - "command": [ - "CMD-SHELL", - "exit 0" - ], - "interval": 10, - "startPeriod": 30, - "timeout": 5, - "retries": 3 - }, - "environment": { - "MAX_TOTAL_TOKENS": "2048", - "MAX_CONCURRENT_REQUESTS": "128", - "MAX_INPUT_LENGTH": "1024" - } - }, - "loadBalancerConfig": { - "healthCheckConfig": { - "path": "/health", - "interval": 60, - "timeout": 30, - "healthyThresholdCount": 2, - "unhealthyThresholdCount": 10 - } - }, - "instanceType": "g5.xlarge", - "modelId": "mistral-vllm", - "modelName": "mistralai/Mistral-7B-Instruct-v0.2", - "modelType": "textgen", - "modelUrl": null, - "status": "Creating", - "streaming": true - }, - { - "autoScalingConfig": null, - "containerConfig": null, - "loadBalancerConfig": null, - "instanceType": null, - "modelId": "titan-express-v1", - "modelName": "bedrock/amazon.titan-text-express-v1", - "modelType": "textgen", - "modelUrl": null, - "status": "InService", - "streaming": true - } - ] -} -``` - -#### Explanation of Response Fields: - -- `modelId`: A unique identifier for the model. -- `modelName`: The name of the model, typically referencing the underlying service (Bedrock, SageMaker, etc.). -- `status`: The current state of the model, e.g., "Creating," "Active," or "Failed." -- `streaming`: Whether the model supports streaming inference. -- `instanceType` (optional): The instance type if the model is deployed via ECS. - -### Creating a Model (Admin API) - -LISA provides the `/models` endpoint for creating both ECS and LiteLLM-hosted models. Depending on the request payload, infrastructure will be created or bypassed (e.g., for LiteLLM-only models). - -This API accepts the same model definition parameters that were accepted in the V2 model definitions within the config.yaml file with one notable difference: the `containerConfig.image.path` field is -now omitted because it corresponded with the `inferenceContainer` selection. As a convenience, this path is no longer required. - -#### Request Example: - -``` -POST https:///models -``` - -#### Example Payload for ECS Model: - -```json -{ - "modelId": "mistral-vllm", - "modelName": "mistralai/Mistral-7B-Instruct-v0.2", - "modelType": "textgen", - "inferenceContainer": "vllm", - "instanceType": "g5.xlarge", - "streaming": true, - "containerConfig": { - "image": { - "baseImage": "vllm/vllm-openai:v0.5.0", - "type": "asset" - }, - "sharedMemorySize": 2048, - "environment": { - "MAX_CONCURRENT_REQUESTS": "128", - "MAX_INPUT_LENGTH": "1024", - "MAX_TOTAL_TOKENS": "2048" - }, - "healthCheckConfig": { - "command": ["CMD-SHELL", "exit 0"], - "interval": 10, - "startPeriod": 30, - "timeout": 5, - "retries": 3 - } - }, - "autoScalingConfig": { - "minCapacity": 1, - "maxCapacity": 1, - "cooldown": 420, - "defaultInstanceWarmup": 180, - "metricConfig": { - "albMetricName": "RequestCountPerTarget", - "targetValue": 30, - "duration": 60, - "estimatedInstanceWarmup": 330 - } - }, - "loadBalancerConfig": { - "healthCheckConfig": { - "path": "/health", - "interval": 60, - "timeout": 30, - "healthyThresholdCount": 2, - "unhealthyThresholdCount": 10 - } - } -} -``` - -#### Creating a LiteLLM-Only Model: - -```json -{ - "modelId": "titan-express-v1", - "modelName": "bedrock/amazon.titan-text-express-v1", - "modelType": "textgen", - "streaming": true -} -``` - -#### Explanation of Key Fields for Creation Payload: - -- `modelId`: The unique identifier for the model. This is any name you would like it to be. -- `modelName`: The name of the model as it appears in the system. For LISA-hosted models, this must be the S3 Key to your model artifacts, otherwise - this is the LiteLLM-compatible reference to a SageMaker Endpoint or Bedrock Foundation Model. Note: Bedrock and SageMaker resources must exist in the - same region as your LISA deployment. If your LISA installation is in us-east-1, then all SageMaker and Bedrock calls will also happen in us-east-1. - Configuration examples: - - LISA hosting: If your model artifacts are in `s3://${lisa_models_bucket}/path/to/model/weights`, then the `modelName` value here should be `path/to/model/weights` - - LiteLLM-only, Bedrock: If you want to use `amazon.titan-text-lite-v1`, your `modelName` value should be `bedrock/amazon.titan-text-lite-v1` - - LiteLLM-only, SageMaker: If you want to use a SageMaker Endpoint named `my-sm-endpoint`, then the `modelName` value should be `sagemaker/my-sm-endpoint`. -- `modelType`: The type of model, such as text generation (textgen). -- `streaming`: Whether the model supports streaming inference. -- `instanceType`: The type of EC2 instance to be used (only applicable for ECS models). -- `containerConfig`: Details about the Docker container, memory allocation, and environment variables. -- `autoScalingConfig`: Configuration related to ECS autoscaling. -- `loadBalancerConfig`: Health check configuration for load balancers. - -### Deleting a Model (Admin API) - -Admins can delete a model using the following endpoint. Deleting a model removes the infrastructure (ECS) or disconnects from LiteLLM. - -#### Request Example: - -``` -DELETE https:///models/{modelId} -``` - -#### Response Example: - -```json -{ - "status": "success", - "message": "Model mistral-vllm has been deleted successfully." -} -``` - -### Updating a Model - -LISA offers basic updating functionality for both LISA-hosted and LiteLLM-only models. For both types, the model type and streaming support can be updated -in the cases that the models were originally created with the wrong parameters. For example, if an embedding model was accidentally created as a `textgen` -model, the UpdateModel API can be used to set it to the intended `embedding` value. Additionally, for LISA-hosted models, users may update the AutoScaling -configuration to increase or decrease capacity usage for each model. Users may use this API to completely shut down all instances behind a model until -they want to add capacity back to the model for usage later. This feature can help users to effectively manage costs so that instances do not have to stay -running in time periods of little or no expected usage. - -The UpdateModel API has mutually exclusive payload fields to avoid conflicting requests. The API does not allow for shutting off a model at the same time -as updating its AutoScaling configuration, as these would introduce ambiguous intents. The API does not allow for setting AutoScaling limits to 0 and instead -requires the usage of the enable/disable functionality to allow models to fully scale down or turn back on. Metadata updates, such as changing the model type -or streaming compatibility, can happen in either type of update or simply by themselves. - -#### Request Example - -``` -PUT https:///models/{modelId} -``` - -#### Example Payloads - -##### Update Model Metadata - -This payload will simply update the model metadata, which will complete within seconds of invoking. If setting a model as an `embedding` model, then the -`streaming` option must be set to `false` or omitted as LISA does not support streaming with embedding models. Both the `streaming` and `modelType` options -may be included in any other update request. - -```json -{ - "streaming": true, - "modelType": "textgen" -} -``` - -##### Update AutoScaling Configuration - -This payload will update the AutoScaling configuration for minimum, maximum, and desired number of instances. The desired number must be between the -minimum or maximum numbers, inclusive, and all the numbers must be strictly greater than 0. If the model currently has less than the minimum number, then -the desired count will automatically raise to the minimum if a desired count is not specified. Despite setting a desired capacity, the model will scale down -to the minimum number over time if you are not hitting the scaling thresholds set when creating the model in the first place. - -The AutoScaling configuration **can** be updated while the model is in the Stopped state, but it won't be applied immediately. Instead, the configuration will -be saved until the model is started again, in which it will use the most recently updated AutoScaling configuration. - -The request will fail if the `autoScalingInstanceConfig` is defined at the same time as the `enabled` field. These options are mutually exclusive and must be -handled as separate operations. Any or all of the options within the `autoScalingInstanceConfig` may be set as needed, so if you only wish to change the `desiredCapacity`, -then that is the only option that you need to specify in the request object within the `autoScalingInstanceConfig`. - -```json -{ - "autoScalingInstanceConfig": { - "minCapacity": 2, - "maxCapacity": 4, - "desiredCapacity": 3 - } -} -``` - -##### Stop Model - Scale Down to 0 Instances - -This payload will stop all model EC2 instances and remove the model reference from LiteLLM so that users are unable to make inference requests against a model -with no capacity. This option is useful for users who wish to manage costs and turn off instances when the model is not currently needed but will be used again -in the future. - -The request will fail if the `enabled` field is defined at the same time as the `autoScalingInstanceConfig` field. These options are mutually exclusive and must be -handled as separate operations. - -```json -{ - "enabled": false -} -``` - -##### Start Model - Restore Previous AutoScaling Configuration - -After stopping a model, this payload will turn the model back on by spinning up instances, waiting for the expected spin-up time to allow models to initialize, and then -adding the reference back to LiteLLM so that users may query the model again. This is expected to be a much faster operation than creating the model through the CreateModel -API, so as long as the model details don't have to change, this in combination with the Stop payload will help to manage costs while still providing model availability as -quickly as the system can spin it up again. - -The request will fail if the `enabled` field is defined at the same time as the `autoScalingInstanceConfig` field. These options are mutually exclusive and must be -handled as separate operations. - -```json -{ - "enabled": true -} -``` - ---- - -# Error Handling for API Requests - -In the LISA model management API, error handling is designed to ensure robustness and consistent responses when errors occur during the execution of API requests. This section provides a detailed explanation of the error handling mechanisms in place, including the types of errors that are managed, how they are raised, and what kind of responses clients can expect when these errors occur. - -## Common Errors and Their HTTP Responses - -Below is a list of common errors that can occur in the system, along with the HTTP status codes and response structures that are returned to the client. - -### ModelNotFoundError - -* **Description**: Raised when a model that is requested for retrieval or deletion is not found in the system. -* **HTTP Status Code**: `404 Not Found` -* **Response Body**: - -```json -{ - "error": "ModelNotFoundError", - "message": "The requested model with ID could not be found." -} -``` - -* **Example Scenario**: When a client attempts to fetch details of a model that does not exist in the database, the `ModelNotFoundError` is raised. - -### ModelAlreadyExistsError - -* **Description:** Raised when a request to create a model is made, but the model already exists in the system. -* **HTTP Status Code**: `400` -* **Response Body**: - -```json -{ - "error": "ModelAlreadyExistsError", - "message": "A model with the given configuration already exists." -} -``` - -* **Example Scenario:** A client attempts to create a model with an ID or name that already exists in the database. The system detects the conflict and raises the `ModelAlreadyExistsError`. - -### InvalidInputError (Hypothetical Example) - -* **Description**: Raised when the input provided by the client for creating or updating a model is invalid or does not conform to expected formats. -* **HTTP Status Code**: `400 Bad Request` -* **Response Body**: - -```json -{ - "error": "InvalidInputError", - "message": "The input provided is invalid. Please check the required fields and formats." -} -``` - -* **Example Scenario**: The client submits a malformed JSON body or omits required fields in a model creation request, triggering an `InvalidInputError`. - -## Handling Validation Errors - -Validation errors are handled across the API via utility functions and model transformation logic. These errors typically occur when user inputs fail validation checks or when required data is missing from a request. - -### Example Response for Validation Error: - -* **HTTP Status Code**: `422 Unprocessable Entity` -* **Response Body**: - -```json -{ - "error": "ValidationError", - "message": "The input provided does not meet the required validation criteria." -} -``` - ---- - -# Deployment -## Using pre-built resources - -A default configuration will build the necessary containers, lambda layers, and production optimized -web application at build time. In the event that you would like to use pre-built resources due to -network connectivity reasons or other concerns with the environment where you'll be deploying LISA -you can do so. - -- For ECS containers (Models, APIs, etc) you can modify the `containerConfig` block of - the corresponding entry in `config.yaml`. For container images you can provide a path to a directory - from which a docker container will be built (default), a path to a tarball, an ECR repository arn and - optional tag, or a public registry path. - - We provide immediate support for HuggingFace TGI and TEI containers and for vLLM containers. The `example_config.yaml` - file provides examples for TGI and TEI, and the only difference for using vLLM is to change the - `inferenceContainer`, `baseImage`, and `path` options, as indicated in the snippet below. All other options can - remain the same as the model definition examples we have for the TGI or TEI models. vLLM can also support embedding - models in this way, so all you need to do is refer to the embedding model artifacts and remove the `streaming` field - to deploy the embedding model. - - vLLM has support for the OpenAI Embeddings API, but model support for it is limited because the feature is new. Currently, - the only supported embedding model with vLLM is [intfloat/e5-mistral-7b-instruct](https://huggingface.co/intfloat/e5-mistral-7b-instruct), - but this list is expected to grow over time as vLLM updates. - ```yaml - ecsModels: - - modelName: your-model-name - inferenceContainer: tgi - baseImage: ghcr.io/huggingface/text-generation-inference:2.0.1 - ``` -- If you are deploying the LISA Chat User Interface you can optionally specify the path to the pre-built - website assets using the top level `webAppAssetsPath` parameter in `config.yaml`. Specifying this path - (typically `lib/user-interface/react/dist`) will avoid using a container to build and bundle the assets - at CDK build time. -- For the lambda layers you can specify the path to a local zip archive of the layer code by including - the optional `lambdaLayerAssets` block in `config.yaml` similar to the following: - -``` -lambdaLayerAssets: - authorizerLayerPath: lib/core/layers/authorizer_layer.zip - commonLayerPath: lib/core/layers/common_layer.zip - fastapiLayerPath: /path/to/fastapi_layer.zip - sdkLayerPath: lib/rag/layers/sdk_layer.zip -``` ---- - -## Deploying - -Now that we have everything setup we are ready to deploy. - -```bash -make deploy -``` - -By default, all stacks will be deployed but a particular stack can be deployed by providing the `STACK` argument to the `deploy` target. - -```bash -make deploy STACK=LisaServe -``` - -Available stacks can be listed by running: - -```bash -make listStacks -``` - -After the `deploy` command is run, you should see many docker build outputs and eventually a CDK progress bar. The deployment should take about 10-15 minutes and will produce a single cloud formation output for the websocket URL. - -You can test the deployment with the integration test: - -```bash -pytest lisa-sdk/tests --url --verify | false -``` - ---- - -## Programmatic API Tokens - -The LISA Serve ALB can be used for programmatic access outside the example Chat application. -An example use case would be for allowing LISA to serve LLM requests that originate from the [Continue VSCode Plugin](https://www.continue.dev/). -To facilitate communication directly with the LISA Serve ALB, a user with sufficient DynamoDB PutItem permissions may add -API keys to the APITokenTable, and once created, a user may make requests by including the `Authorization: Bearer ${token}` -header or the `Api-Key: ${token}` header with that token. If using any OpenAI-compatible library, the `api_key` fields -will use the `Authorization: Bearer ${token}` format automatically, so there is no need to include additional headers -when using those libraries. - -### Adding a Token - -An account owner may create a long-lived API Token using the following AWS CLI command. - -```bash -AWS_REGION="us-east-1" # change to your deployment region -token_string="YOUR_STRING_HERE" # change to a unique string for a user -aws --region $AWS_REGION dynamodb put-item --table-name $DEPLOYMENT_NAME-LISAApiTokenTable \ - --item '{"token": {"S": "'${token_string}'"}}' -``` - -If an account owner wants the API Token to be temporary and expire after a specific date, LISA will allow for this too. -In addition to the `token` field, the owner may specify the `tokenExpiration` field, which accepts a UNIX timestamp, -in seconds. The following command shows an example of how to do this. - -```bash -AWS_REGION="us-east-1" # change to your deployment region -token_string="YOUR_STRING_HERE" -token_expiration=$(echo $(date +%s) + 3600 | bc) # token that expires in one hour, 3600 seconds -aws --region $AWS_REGION dynamodb put-item --table-name $DEPLOYMENT_NAME-LISAApiTokenTable \ - --item '{ - "token": {"S": "'${token_string}'"}, - "tokenExpiration": {"N": "'${token_expiration}'"} - }' -``` - -Once the token is inserted into the DynamoDB Table, a user may use the token in the `Authorization` request header like -in the following snippet. - -```bash -lisa_serve_rest_url="https://" -token_string="YOUR_STRING_HERE" -curl ${lisa_serve_rest_url}/v2/serve/models \ - -H 'accept: application/json' \ - -H 'Content-Type: application/json' \ - -H "Authorization: Bearer ${token_string}" -``` - -### Updating a Token - -In the case that an owner wishes to change an existing expiration time or add one to a key that did not previously have -an expiration, this can be accomplished by editing the existing item. The following commands can be used as an example -for updating an existing token. Setting the expiration time to a time in the past will effectively remove access for -that key. - -```bash -AWS_REGION="us-east-1" # change to your deployment region -token_string="YOUR_STRING_HERE" -token_expiration=$(echo $(date +%s) + 600 | bc) # token that expires in 10 minutes from now -aws --region $AWS_REGION dynamodb update-item --table-name $DEPLOYMENT_NAME-LISAApiTokenTable \ - --key '{"token": {"S": "'${token_string}'"}}' \ - --update-expression 'SET tokenExpiration=:t' \ - --expression-attribute-values '{":t": {"N": "'${token_expiration}'"}}' -``` - -### Removing a Token - -Tokens will not be automatically removed even if they are no longer valid. An owner may remove an key, expired or not, -from the database to fully revoke the key, by deleting the item. As an example, the following commands can be used to -remove a token. - -```bash -AWS_REGION="us-east-1" # change to your deployment region -token_string="YOUR_STRING_HERE" # change to the token to remove -aws --region $AWS_REGION dynamodb delete-item --table-name $DEPLOYMENT_NAME-LISAApiTokenTable \ - --key '{"token": {"S": "'${token_string}'"}}' -``` - ---- - -## Model Compatibility - -### HuggingFace Generation Models - -For generation models, or causal language models, LISA supports models that are supported by the underlying serving container, TGI. TGI divides compatibility into two categories: optimized models and best effort supported models. The list of optimized models is found [here](https://huggingface.co/docs/text-generation-inference/supported_models). The best effort uses the `transformers` codebase under-the-hood and so should work for most causal models on HuggingFace: - -```python -AutoModelForCausalLM.from_pretrained(, device_map="auto") -``` - -or - -```python -AutoModelForSeq2SeqLM.from_pretrained(, device_map="auto") -``` - -### HuggingFace Embedding Models - -Embedding models often utilize custom codebases and are not as uniform as generation models. For this reason you will likely need to create a new `inferenceContainer`. Follow the [example](./lib/ecs-model/embedding/instructor) provided for the `instructor` model. - -### vLLM Models - -In addition to the support we have for the TGI and TEI containers, we support hosting models using the [vLLM container](https://docs.vllm.ai/en/latest/). vLLM abides by the OpenAI specification, and as such allows both text generation and embedding on the models that vLLM supports. -See the [deployment](#deployment) section for details on how to set up the vLLM container for your models. Similar to how the HuggingFace containers will serve safetensor weights downloaded from the -HuggingFace website, vLLM will do the same, and our configuration will allow you to serve these artifacts automatically. vLLM does not have many supported models for embeddings, but as they become available, -LISA will support them as long as the vLLM container version is updated in the config.yaml file and as long as the model's safetensors can be found in S3. - ---- - -# Chatbot Example - -This repository include an example chatbot web application. The react based web application can be optionally deployed to demonstrate the capabilities of LISA Serve. The chatbot consists of a static react based single page application hosted via API GW S3 proxy integration. The app connects to the LISA Serve REST API and an optional RAG API. The app integrates with an OIDC compatible IdP and allows users to interact directly with any of the textgen models hosted with LISA Serve. If the optional RAG stack is deployed then users can also leverage the embeddings models and AWS OpenSearch or PGVector to demonstrate chat with RAG. Chat sessions are maintained in dynamodb table and a number of parameters are exposed through the UI to allow experimentation with various parameters including prompt, temperature, top k, top p, max tokens, and more. - -## Local development - -### Configuring Pre-Commit Hooks - -To ensure code quality and consistency, this project uses pre-commit hooks. These hooks are configured to perform checks, such as linting and formatting, helping to catch potential issues early. These hooks are run automatically on each push to a remote branch but if you wish to run them locally before each commit, follow these steps: - -1. Install pre-commit: `pip install pre-commit` -2. Install the git hook scripts: `pre-commit install` - -The hooks will now run automatically on changed files but if you wish to test them against all files, run the following command: `pre-commit run --all-files`. - -### Run REST API locally - -``` -cd lib/serve/rest-api -pip install -r src/requirements.txt -export AWS_REGION= -export AUTHORITY= -export CLIENT_ID= -export REGISTERED_MODELS_PS_NAME= -export TOKEN_TABLE_NAME="/LISAApiTokenTable" -gunicorn -k uvicorn.workers.UvicornWorker -w 2 -b "0.0.0.0:8080" "src.main:app" -``` - -### Run example chatbot locally - -Create `lib/user-interface/react/public/env.js` file with the following contents: - -``` -window.env = { - AUTHORITY: '', - CLIENT_ID: '', - JWT_GROUPS_PROP: '', - ADMIN_GROUP: '', - CUSTOM_SCOPES:[], - // Alternatively you can set this to be your REST api elb endpoint - RESTAPI_URI: 'http://localhost:8080/', - API_BASE_URL: 'https://${deployment_id}.execute-api.${regional_domain}/${deployment_stage}', - RESTAPI_VERSION: 'v2', - "MODELS": [ - { - "model": "streaming-textgen-model", - "streaming": true, - "modelType": "textgen" - }, - { - "model": "non-streaming-textgen-model", - "streaming": false, - "modelType": "textgen" - }, - { - "model": "embedding-model", - "streaming": null, - "modelType": "embedding" - } - ] -} -``` - -Launch the Chat UI: - -``` -cd lib/user-interface/react/ -npm run dev -``` ---- - -# Usage and Features - -The LISA Serve endpoint can be used independently of the Chat UI, and the following shows a few examples of how to do that. The Serve endpoint -will still validate user auth, so if you have a Bearer token from the IdP configured with LISA, we will honor it, or if you've set up an API -token using the [DynamoDB instructions](#programmatic-api-tokens), we will also accept that. This diagram shows the LISA Serve components that -would be utilized during direct REST API requests. - -## OpenAI Specification Compatibility - -We now provide greater support for the [OpenAI specification](https://platform.openai.com/docs/api-reference) for model inference and embeddings. -We utilize LiteLLM as a proxy for both models we spin up on behalf of the user and additional models configured through the config.yaml file, and because of that, the -LISA REST API endpoint allows for a central location for making text generation and embeddings requests. We support, and are not limited to, the following popular endpoint -routes as long as your underlying models can also respond to them. - -- /models -- /chat/completions -- /completions -- /embeddings - -By supporting the OpenAI spec, we can more easily allow users to integrate their collection of models into their LLM applications and workflows. In LISA, users can authenticate -using their OpenID Connect Identity Provider, or with an API token created through the DynamoDB token workflow as described [here](#programmatic-api-tokens). Once the token -is retrieved, users can use that in direct requests to the LISA Serve REST API. If using the IdP, users must set the 'Authorization' header, otherwise if using the API token, -either the 'Api-Key' header or the 'Authorization' header. After that, requests to `https://${lisa_serve_alb}/v2/serve` will handle the OpenAI API calls. As an example, the following call can list all -models that LISA is aware of, assuming usage of the API token. If you are using a self-signed cert, you must also provide the `--cacert $path` option to specify a CA bundle to trust for SSL verification. - -```shell -curl -s -H 'Api-Key: your-token' -X GET https://${lisa_serve_alb}/v2/serve/models -``` - -If using the IdP, the request would look like the following: - -```shell -curl -s -H 'Authorization: Bearer your-token' -X GET https://${lisa_serve_alb}/v2/serve/models -``` - -When using a library that requests an OpenAI-compatible base_url, you can provide `https://${lisa_serve_alb}/v2/serve` here. All of the OpenAI routes will -automatically be added to the base URL, just as we appended `/models` to the `/v2/serve` route for listing all models tracked by LISA. - ---- - -## Continue JetBrains and VS Code Plugin - -For developers that desire an LLM assistant to help with programming tasks, we support adding LISA as an LLM provider for the [Continue plugin](https://www.continue.dev). -To add LISA as a provider, open up the Continue plugin's `config.json` file and locate the `models` list. In this list, add the following block, replacing the placeholder URL -with your own REST API domain or ALB. The `/v2/serve` is required at the end of the `apiBase`. This configuration requires an API token as created through the [DynamoDB workflow](#programmatic-api-tokens). - -```json -{ - "model": "AUTODETECT", - "title": "LISA", - "apiBase": "https:///v2/serve", - "provider": "openai", - "apiKey": "your-api-token" // pragma: allowlist-secret -} -``` - -Once you save the `config.json` file, the Continue plugin will call the `/models` API to get a list of models at your disposal. The ones provided by LISA will be prefaced -with "LISA" or with the string you place in the `title` field of the config above. Once the configuration is complete and a model is selected, you can use that model to -generate code and perform AI assistant tasks within your development environment. See the [Continue documentation](https://docs.continue.dev/how-to-use-continue) for more -information about its features, capabilities, and usage. - -### Usage in LLM Libraries - -If your workflow includes using libraries, such as [LangChain](https://python.langchain.com/v0.2/docs/introduction/) or [OpenAI](https://github.com/openai/openai-python), -then you can place LISA right in your application by changing only the endpoint and headers for the client objects. As an example, using the OpenAI library, the client would -normally be instantiated and invoked with the following block. - -```python -from openai import OpenAI - -client = OpenAI( - api_key="my_key" # pragma: allowlist-secret not a real key -) -client.models.list() -``` - -To use the models being served by LISA, the client needs only a few changes: - -1. Specify the `base_url` as the LISA Serve ALB, using the /v2/serve route at the end, similar to the apiBase in the [Continue example](#continue-jetbrains-and-vs-code-plugin) -2. Add the API key that you generated from the [token generation steps](#programmatic-api-tokens) as your `api_key` field. -3. If using a self-signed cert, you must provide a certificate path for validating SSL. If you're using an ACM or public cert, then this may be omitted. -1. We provide a convenience function in the `lisa-sdk` for generating a cert path from an IAM certificate ARN if one is provided in the `RESTAPI_SSL_CERT_ARN` environment variable. - -The Code block will now look like this and you can continue to use the library without any other modifications. - -```python -# for self-signed certificates -import boto3 -from lisapy.utils import get_cert_path -# main client library -from openai import DefaultHttpxClient, OpenAI - -iam_client = boto3.client("iam") -cert_path = get_cert_path(iam_client) - -client = OpenAI( - api_key="my_key", # pragma: allowlist-secret not a real key - base_url="https:///v2/serve", - http_client=DefaultHttpxClient(verify=cert_path), # needed for self-signed certs on your ALB, can be omitted otherwise -) -client.models.list() -``` - ---- - -# License Notice - -Although this repository is released under the Apache 2.0 license, when configured to use PGVector as a RAG store it uses -the third party `psycopg2-binary` library. The `psycopg2-binary` project's licensing includes the [LGPL with exceptions](https://github.com/psycopg/psycopg2/blob/master/LICENSE) license. +[![Full Documentation](https://img.shields.io/badge/Full%20Documentation-blue?style=for-the-badge&logo=Vite&logoColor=white)](https://awslabs.github.io/LISA/) + +## What is LISA? + +LISA is an infrastructure-as-code solution providing scalable, low latency access to customers’ generative LLMs and +embedding language models. LISA accelerates and supports customers’ GenAI experimentation and adoption, particularly in +regions where Amazon Bedrock is not available. LISA allows customers to move quickly rather than independently solve the +undifferentiated heavy lifting of hosting and inference architecture. Customers deploy LISA into a single AWS account +and integrate it with an identity provider. Customers bring their own models to LISA for self-hosting and inference +supported by Amazon Elastic Container Service (ECS). Model configuration is managed through LISA’s model management +APIs. + +As use cases and model requirements grow, customers can configure LISA with external model providers. Through OpenAI's +API spec via the LiteLLM proxy, LISA is compatible with 100+ models from various providers, including Amazon Bedrock and +Amazon Jumpstart. LISA customers can centralize communication across many model providers via LiteLLM, leveraging LISA +for model orchestration. Using LISA as a model orchestration layer allows customers to standardize integrations with +externally hosted models in a single place. Without an orchestration layer, customers must individually manage unique +API integrations with each provider. + +## Key Features + +* **Self Host Models:** Bring your own text generation and embedding models to LISA for hosting and inference. +* **Model Orchestration:** Centralize and standardize configuration with 100+ models from model providers via LiteLLM, + including Amazon Bedrock models. +* **Chatbot User Interface:** Through the chatbot user interface, users can prompt LLMs, receive responses, modify prompt + templates, change model arguments, and manage their session history. Administrators can control available features via + the configuration page. +* **Retrieval-augmented generation (RAG):** RAG reduces the need for fine-tuning, an expensive and time-consuming + undertaking, and delivers more contextually relevant outputs. LISA offers RAG through Amazon OpenSearch or + PostgreSQL’s PGVector extension on Amazon RDS. +* **Non-RAG Model Context:** Users can upload documents to their chat sessions to enhance responses or support use cases + like document summarization. +* **Model Management:** Administrators can add, remove, and update models configured with LISA through the model management + configuration page or APIs. +* **OpenAI API spec:** LISA can be configured with compatible tooling. For example, customers can configure LISA as the + model provider for the [Continue](https://www.continue.dev/) plugin, an open-source AI code assistance for JetBrains and Visual Studio Code + integrated development environments (IDEs). This allows users to select from any LISA-configured model to support LLM + prompting directly in their IDE. +* **Libraries:** If your workflow includes libraries such as [LangChain](https://python.langchain.com/) + or [OpenAI](https://github.com/openai/openai-python), then you can place LISA in your + application by changing only the endpoint and headers for the client objects. +* **FedRAMP:** The AWS services that LISA leverages are FedRAMP High compliant. +* **Ongoing Releases:** We offer on-going release with new functionality. LISA’s roadmap is customer driven. + +## Deployment Prerequisites + +### Pre-Deployment Steps + +* Set up and have access to an AWS account with appropriate permissions + * All the resource creation that happens as part of CDK deployments expects Administrator or Administrator-like + permissions with resource creation and mutation permissions. Installation will not succeed if this profile does + not have permissions to create and edit arbitrary resources for the system. Note: This level of permissions is not + required for the runtime of LISA. This is only necessary for deployment and subsequent updates. +* Familiarity with AWS Cloud Development Kit (CDK) and infrastructure-as-code principles +* Optional: If using the chat UI, Have your Identity Provider (IdP) information and access +* Optional: Have your VPC information available, if you are using an existing one for your deployment +* Note: CDK and Model Management both leverage AWS Systems Manager Agent (SSM) parameter store. Confirm that SSM is approved for use by your organization before beginning. + +### Software + +* AWS CLI installed and configured +* Python 3.9 or later +* Node.js 14 or later +* Docker installed and running +* Sufficient disk space for model downloads and conversions + + +## Getting Started + +For detailed instructions on setting up, configuring, and deploying LISA, please refer to our separate documentation on +installation and usage. + +- [Deployment Guide](lib/docs/admin/getting-started.md) +- [Configuration](lib/docs/config/configuration.md) + +## License + +Although this repository is released under the Apache 2.0 license, when configured to use PGVector as a RAG store it +uses +the third party `psycopg2-binary` library. The `psycopg2-binary` project's licensing includes +the [LGPL with exceptions](https://github.com/psycopg/psycopg2/blob/master/LICENSE) license. diff --git a/VERSION b/VERSION index fd2a0186..944880fa 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.1.0 +3.2.0 diff --git a/bin/lisa.ts b/bin/lisa.ts index 94e6385a..fbc6b305 100644 --- a/bin/lisa.ts +++ b/bin/lisa.ts @@ -21,26 +21,18 @@ import * as fs from 'fs'; import * as path from 'path'; import * as cdk from 'aws-cdk-lib'; -import { Aspects } from 'aws-cdk-lib'; -import { AwsSolutionsChecks } from 'cdk-nag'; import * as yaml from 'js-yaml'; +import _ from 'lodash'; import { Config, ConfigFile, ConfigSchema } from '../lib/schema'; import { LisaServeApplicationStage } from '../lib/stages'; -// Read configuration file -const configFilePath = path.join(__dirname, '../config.yaml'); -const configFile = yaml.load(fs.readFileSync(configFilePath, 'utf8')) as ConfigFile; -let configEnv = configFile.env || 'dev'; - -// Select configuration environment -if (process.env.ENV) { - configEnv = process.env.ENV; -} -const configData = configFile[configEnv]; -if (!configData) { - throw new Error(`Configuration for environment "${configEnv}" not found.`); -} +// Read configuration files +const baseConfigFilePath = path.join(__dirname, '../config-base.yaml'); +const customConfigFilePath = path.join(__dirname, '../config-custom.yaml'); +const baseConfigFile = yaml.load(fs.readFileSync(baseConfigFilePath, 'utf8')) as ConfigFile; +const customConfigFile = yaml.load(fs.readFileSync(customConfigFilePath, 'utf8')) as ConfigFile; +const configData = _.merge(baseConfigFile, customConfigFile); // Other command line argument overrides type EnvMapping = [string, keyof Config]; @@ -61,6 +53,7 @@ mappings.forEach(([envVar, configVar]) => { let config: Config; try { config = ConfigSchema.parse(configData); + console.log('MERGED CONFIG FILE:\n' + yaml.dump(config)); } catch (error) { if (error instanceof Error) { console.error('Error parsing the configuration:', error.message); @@ -78,10 +71,6 @@ const env: cdk.Environment = { // Application const app = new cdk.App(); -// Run CDK-nag on app if specified -if (config.runCdkNag) { - Aspects.of(app).add(new AwsSolutionsChecks({ reports: true, verbose: true })); -} new LisaServeApplicationStage(app, config.deploymentStage, { env: env, diff --git a/config-base.yaml b/config-base.yaml new file mode 100644 index 00000000..95f25aa0 --- /dev/null +++ b/config-base.yaml @@ -0,0 +1,12 @@ +mountS3DebUrl: https://s3.amazonaws.com/mountpoint-s3-release/latest/x86_64/mount-s3.deb +stackSynthesizer: CliCredentialsStackSynthesizer +ragRepositories: + - repositoryId: pgvector-rag + type: pgvector + rdsConfig: + username: postgres +ragFileProcessingConfig: + chunkSize: 512 + chunkOverlap: 51 +litellmConfig: + db_key: sk-a8814208-0388-480c-9fc7-fea59607ca38 diff --git a/ecs_model_deployer/src/lib/ecs-model.ts b/ecs_model_deployer/src/lib/ecs-model.ts index 5710767e..1deb6a7a 100644 --- a/ecs_model_deployer/src/lib/ecs-model.ts +++ b/ecs_model_deployer/src/lib/ecs-model.ts @@ -12,10 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -*/ + */ // ECS Model Construct. -import { ISecurityGroup, IVpc } from 'aws-cdk-lib/aws-ec2'; +import { ISecurityGroup, IVpc, SubnetSelection } from 'aws-cdk-lib/aws-ec2'; import { AmiHardwareType } from 'aws-cdk-lib/aws-ecs'; import { Bucket } from 'aws-cdk-lib/aws-s3'; import { Construct } from 'constructs'; @@ -40,6 +40,7 @@ type ECSModelProps = { modelConfig: ModelConfig; securityGroup: ISecurityGroup; vpc: IVpc; + subnetSelection?: SubnetSelection; } & BaseProps; /** @@ -56,7 +57,7 @@ export class EcsModel extends Construct { */ constructor (scope: Construct, id: string, props: ECSModelProps) { super(scope, id); - const { config, modelConfig, securityGroup, vpc } = props; + const { config, modelConfig, securityGroup, vpc, subnetSelection } = props; const modelCluster = new ECSCluster(scope, `${id}-ECC`, { config, @@ -74,6 +75,7 @@ export class EcsModel extends Construct { }, securityGroup, vpc, + subnetSelection }); // Single bucket for all models diff --git a/ecs_model_deployer/src/lib/ecsCluster.ts b/ecs_model_deployer/src/lib/ecsCluster.ts index f8316aef..9218d374 100644 --- a/ecs_model_deployer/src/lib/ecsCluster.ts +++ b/ecs_model_deployer/src/lib/ecsCluster.ts @@ -18,7 +18,7 @@ import { CfnOutput, Duration, RemovalPolicy } from 'aws-cdk-lib'; import { BlockDeviceVolume, GroupMetrics, Monitoring } from 'aws-cdk-lib/aws-autoscaling'; import { Metric, Stats } from 'aws-cdk-lib/aws-cloudwatch'; -import { InstanceType, ISecurityGroup, IVpc } from 'aws-cdk-lib/aws-ec2'; +import { InstanceType, ISecurityGroup, IVpc, SubnetSelection } from 'aws-cdk-lib/aws-ec2'; import { Repository } from 'aws-cdk-lib/aws-ecr'; import { AmiHardwareType, @@ -57,6 +57,7 @@ type ECSClusterProps = { ecsConfig: ECSConfig; securityGroup: ISecurityGroup; vpc: IVpc; + subnetSelection?: SubnetSelection; } & BaseProps; /** @@ -79,7 +80,7 @@ export class ECSCluster extends Construct { */ constructor (scope: Construct, id: string, props: ECSClusterProps) { super(scope, id); - const { config, vpc, securityGroup, ecsConfig } = props; + const { config, vpc, securityGroup, ecsConfig, subnetSelection } = props; // Create ECS cluster const cluster = new Cluster(this, createCdkId([ecsConfig.identifier, 'Cl']), { @@ -90,6 +91,7 @@ export class ECSCluster extends Construct { // Create auto scaling group const autoScalingGroup = cluster.addCapacity(createCdkId([ecsConfig.identifier, 'ASG']), { + vpcSubnets: subnetSelection, instanceType: new InstanceType(ecsConfig.instanceType), machineImage: EcsOptimizedImage.amazonLinux2(ecsConfig.amiHardwareType), minCapacity: ecsConfig.autoScalingConfig.minCapacity, @@ -102,7 +104,7 @@ export class ECSCluster extends Construct { blockDevices: [ { deviceName: '/dev/xvda', - volume: BlockDeviceVolume.ebs(30, { + volume: BlockDeviceVolume.ebs(ecsConfig.autoScalingConfig.blockDeviceVolumeSize, { encrypted: true, }), }, @@ -285,6 +287,7 @@ export class ECSCluster extends Construct { dropInvalidHeaderFields: true, securityGroup, vpc, + vpcSubnets: subnetSelection, idleTimeout: Duration.seconds(600) }); diff --git a/ecs_model_deployer/src/lib/lisa_model_stack.ts b/ecs_model_deployer/src/lib/lisa_model_stack.ts index 4faa5acf..01b96e7b 100644 --- a/ecs_model_deployer/src/lib/lisa_model_stack.ts +++ b/ecs_model_deployer/src/lib/lisa_model_stack.ts @@ -14,9 +14,9 @@ limitations under the License. */ -import { Stack, StackProps } from 'aws-cdk-lib'; +import { Aspects, CfnResource, IAspect, Stack, StackProps } from 'aws-cdk-lib'; -import { Vpc, SecurityGroup } from 'aws-cdk-lib/aws-ec2'; +import { Vpc, SecurityGroup, Subnet, SubnetSelection } from 'aws-cdk-lib/aws-ec2'; import { Construct } from 'constructs'; import { EcsModel } from './ecs-model'; @@ -30,6 +30,28 @@ export type LisaModelStackProps = { modelConfig: ModelConfig; } & StackProps; +/** + * Modifies all AWS::EC2::LaunchTemplate resources in a CDK application. It directly adjusts the synthesized + * CloudFormation template, setting the HttpPutResponseHopLimit within MetadataOptions to 2 and HttpTokens to required. + */ +class UpdateLaunchTemplateMetadataOptions implements IAspect { + /** + * Checks if the given node is an instance of CfnResource and specifically an AWS::EC2::LaunchTemplate resource. + * If both conditions are true, it applies a direct override to the CloudFormation resource's properties, setting + * the HttpPutResponseHopLimit to 2 and HttpTokens to 'required'. + * + * @param {Construct} node - The CDK construct being visited. + */ + public visit (node: Construct): void { + // Check if the node is a CloudFormation resource of type AWS::EC2::LaunchTemplate + if (node instanceof CfnResource && node.cfnResourceType === 'AWS::EC2::LaunchTemplate') { + // Directly modify the CloudFormation properties to include the desired settings + node.addOverride('Properties.LaunchTemplateData.MetadataOptions.HttpPutResponseHopLimit', 2); + node.addOverride('Properties.LaunchTemplateData.MetadataOptions.HttpTokens', 'required'); + } + } +} + export class LisaModelStack extends Stack { constructor (scope: Construct, id: string, props: LisaModelStackProps) { super(scope, id, props); @@ -38,13 +60,24 @@ export class LisaModelStack extends Stack { vpcId: props.vpcId }); + let subnetSelection: SubnetSelection | undefined; + + if (props.config.subnets && props.config.subnets.length > 0) { + subnetSelection = { + subnets: props.config.subnets?.map((subnet, index) => Subnet.fromSubnetId(this, index.toString(), subnet.subnetId)) + }; + } + const securityGroup = SecurityGroup.fromLookupById(this, `${id}-sg`, props.securityGroupId); new EcsModel(this, `${id}-ecsModel`, { config: props.config, modelConfig: props.modelConfig, securityGroup: securityGroup, - vpc: vpc + vpc: vpc, + subnetSelection: subnetSelection }); + + Aspects.of(this).add(new UpdateLaunchTemplateMetadataOptions()); } } diff --git a/ecs_model_deployer/src/lib/schema.ts b/ecs_model_deployer/src/lib/schema.ts index 06b4f972..11e3c5ec 100644 --- a/ecs_model_deployer/src/lib/schema.ts +++ b/ecs_model_deployer/src/lib/schema.ts @@ -417,6 +417,7 @@ const MetricConfigSchema = z.object({ * @property {MetricConfig} metricConfig - Metric configuration for auto scaling. */ const AutoScalingConfigSchema = z.object({ + blockDeviceVolumeSize: z.number().min(30).default(30), minCapacity: z.number().min(1).default(1), maxCapacity: z.number().min(1).default(2), defaultInstanceWarmup: z.number().default(180), @@ -617,6 +618,10 @@ const RawConfigSchema = z instanceProfilePrefix: z.string().optional(), }) .optional(), + subnets: z.array(z.object({ + subnetId: z.string().startsWith('subnet-'), + ipv4CidrBlock: z.string() + })).optional(), }) .refine((config) => (config.pypiConfig.indexUrl && config.region.includes('iso')) || !config.region.includes('iso'), { message: 'Must set PypiConfig if in an iso region', diff --git a/example_config.yaml b/example_config.yaml index 8ac9dd02..dd86e02f 100644 --- a/example_config.yaml +++ b/example_config.yaml @@ -1,152 +1,83 @@ -env: dev - -dev: - appName: lisa - profile: - deploymentName: - accountNumber: 012345678901 - region: us-east-1 - deploymentStage: dev - removalPolicy: destroy - runCdkNag: false - # lambdaLayerAssets: - # authorizerLayerPath: /path/to/authorizer_layer.zip - # commonLayerPath: /path/to/common_layer.zip - # fastapiLayerPath: /path/to/fastapi_layer.zip - # ragLayerPath: /path/to/rag_layer.zip - # sdkLayerPath: /path/to/sdk_layer.zip - # stackSynthesizer: CliCredentialsStackSynthesizer - # permissionsBoundaryAspect: - # permissionsBoundaryPolicyName: CustomPermissionBoundary - # rolePrefix: CustomPrefix - # policyPrefix: CustomPrefix - # instanceProfilePrefix: CustomPrefix - # systemBanner: - # text: 'LISA System' - # backgroundColor: orange - # fontColor: black - s3BucketModels: hf-models-gaiic - # aws partition mountS3 package location - mountS3DebUrl: https://s3.amazonaws.com/mountpoint-s3-release/latest/x86_64/mount-s3.deb - # aws-iso partition mountS3 package location - # mountS3DebUrl: https://mountpoint-s3-release-us-iso-east-1.s3.us-iso-east-1.c2s.ic.gov/latest/x86_64/mount-s3.deb - # aws-iso-b partition mountS3 package location - # mountS3DebUrl: https://mountpoint-s3-release-us-isob-east-1.s3.us-isob-east-1.sc2s.sgov.gov/latest/x86_64/mount-s3.deb - accountNumbersEcr: - - 012345678901 - deployRag: true - deployChat: true - deployUi: true - lambdaConfig: - pythonRuntime: PYTHON_3_10 - logLevel: DEBUG - vpcAutoscalingConfig: - provisionedConcurrentExecutions: 5 - minCapacity: 1 - maxCapacity: 50 - targetValue: 0.80 - cooldown: 30 - authConfig: - authority: - clientId: - adminGroup: - jwtGroupsProperty: - logLevel: DEBUG - # NOTE: The following configuration will allow for using a custom domain for the chat user interface. - # If this option is specified, the API Gateway invocation URL will NOT work on its own as the application URL. - # Users must use the custom domain for the user interface to work if this option is populated. - apiGatewayConfig: - domainName: - restApiConfig: - apiVersion: v2 - instanceType: m5.large - containerConfig: - image: - baseImage: python:3.9 - path: lib/serve/rest-api - type: asset - healthCheckConfig: - command: ["CMD-SHELL", "exit 0"] - interval: 10 - startPeriod: 30 - timeout: 5 - retries: 3 - autoScalingConfig: - minCapacity: 1 - maxCapacity: 1 - cooldown: 60 - defaultInstanceWarmup: 60 - metricConfig: - AlbMetricName: RequestCountPerTarget - targetValue: 1000 - duration: 60 - estimatedInstanceWarmup: 30 - internetFacing: true - loadBalancerConfig: - sslCertIamArn: arn:aws:iam::012345678901:server-certificate/lisa-self-signed-dev - healthCheckConfig: - path: /health - interval: 60 - timeout: 30 - healthyThresholdCount: 2 - unhealthyThresholdCount: 10 - domainName: - ragRepositories: - - repositoryId: pgvector-rag - type: pgvector - rdsConfig: - username: postgres - # - repositoryId: default - # type: opensearch - # opensearchConfig: - # dataNodes: 2 - # dataNodeInstanceType: r6g.large.search - # masterNodes: 0 - # masterNodeInstanceType: r6g.large.search - # volumeSize: 300 - # If adding an existing PGVector database, this configurations assumes: - # 1. The database has been configured to have pgvector installed and enabled: https://aws.amazon.com/about-aws/whats-new/2023/05/amazon-rds-postgresql-pgvector-ml-model-integration/ - # 2. The database is accessible by RAG-related lambda functions (add inbound PostgreSQL access on the database's security group for all Lambda RAG security groups) - # 3. A secret ID exists in SecretsManager holding the database password within a json block of '{"password":"your_password_here"}'. This is the same format that RDS natively provides a password in SecretsManager. - # If the passwordSecretId or dbHost are not provided, then a sample database will be created for you. Only the username is required. - # - repositoryId: pgvector-rag - # type: pgvector - # rdsConfig: - # username: postgres - # passwordSecretId: # password ID as stored in SecretsManager. Example: "rds!db-aa88493d-be8d-4a3f-96dc-c668165f7826" - # dbHost: # Host name of database. Example hostname from RDS: "my-db-name.291b2f03.us-east-1.rds.amazonaws.com" - # dbName: postgres - ragFileProcessingConfig: - chunkSize: 512 - chunkOverlap: 51 - ecsModels: - - modelName: mistralai/Mistral-7B-Instruct-v0.2 - inferenceContainer: tgi - baseImage: ghcr.io/huggingface/text-generation-inference:2.0.1 - - modelName: intfloat/e5-large-v2 - inferenceContainer: tei - baseImage: ghcr.io/huggingface/text-embeddings-inference:1.2.3 - # - modelName: mistralai/Mixtral-8x7B-Instruct-v0.1 - # inferenceContainer: tgi - # baseImage: ghcr.io/huggingface/text-generation-inference:2.0.1 - # LiteLLM Config options found here: https://litellm.vercel.app/docs/proxy/configs#all-settings - # Anything within this config is copied to a configuration for starting LiteLLM in the REST API container. - # It is suggested to put an "ignored" API key so that calls to locally hosted models don't fail on OpenAI calls - # from LiteLLM. - # We added `lisa_params` to add additional metadata for interaction with the Chat UI. Specify if the model is a - # textgen or embedding model, and if it is textgen, specify whether it supports streaming. If embedding, then - # omit the `streaming` parameter. When defining the model list, the `lisa_params` will be an object in the model - # definition that will have the `model_type` and `streaming` fields in it. A commented example is provided below. - litellmConfig: - litellm_settings: - telemetry: false # Don't try to send telemetry to LiteLLM servers. - general_settings: - master_key: sk-d7a77bcb-3e23-483c-beec-2700f2baeeb1 # A key is required for model management purposes - model_list: # Add any of your existing (not LISA-hosted) models here. -# - model_name: mymodel -# litellm_params: -# model: openai/myprovider/mymodel -# api_key: ignored -# lisa_params: -# model_type: textgen -# streaming: true +accountNumber: "012345678901" +region: us-east-1 +authConfig: + authority: + clientId: + adminGroup: + jwtGroupsProperty: +s3BucketModels: hf-models-gaiic +########################### OPTIONAL BELOW ####################################### +# profile: AWS CLI profile for deployment. +# vpcId: VPC ID for the application. (e.g. vpc-0123456789abcdef) +# The following is an array of subnet objects for the application. These contain a subnetId(e.g. [subnet-fedcba9876543210] and ipv4CidrBlock +# subnets: +# - subnetId: +# ipv4CidrBlock: +# The following configuration will allow for using a custom domain for the chat user interface. +# If this option is specified, the API Gateway invocation URL will NOT work on its own as the application URL. +# Users must use the custom domain for the user interface to work if this option is populated. +# apiGatewayConfig: +# domainName: +# restApiConfig: +# sslCertIamArn: ARN of the self-signed cert to be used throughout the system +# Some customers will want to download required libs prior to deployment, provide a path to the zipped resources +# lambdaLayerAssets: +# authorizerLayerPath: /path/to/authorizer_layer.zip +# commonLayerPath: /path/to/common_layer.zip +# fastapiLayerPath: /path/to/fastapi_layer.zip +# ragLayerPath: /path/to/rag_layer.zip +# sdkLayerPath: /path/to/sdk_layer.zip +# stackSynthesizer: CliCredentialsStackSynthesizer +# deploymentPrefix: Prefix for deployment resources. +# webAppAssetsPath: Optional path to precompiled webapp assets. If not specified the web application will be built at deploy time. +# permissionsBoundaryAspect: +# permissionsBoundaryPolicyName: CustomPermissionBoundary +# rolePrefix: CustomPrefix +# policyPrefix: CustomPrefix +# instanceProfilePrefix: CustomPrefix +# vpcId: vpc-0123456789abcdef, +# aws-iso partition mountS3 package location +# mountS3DebUrl: https://mountpoint-s3-release-us-iso-east-1.s3.us-iso-east-1.c2s.ic.gov/latest/x86_64/mount-s3.deb +# aws-iso-b partition mountS3 package location +# mountS3DebUrl: https://mountpoint-s3-release-us-isob-east-1.s3.us-isob-east-1.sc2s.sgov.gov/latest/x86_64/mount-s3.deb +# List of AWS account numbers for ECR repositories. +# accountNumbersEcr: +# - 012345678901 +# ragRepositories: +# - repositoryId: pgvector-rag +# type: pgvector +# rdsConfig: +# username: postgres +# - repositoryId: default +# type: opensearch +# opensearchConfig: +# dataNodes: 2 +# dataNodeInstanceType: r6g.large.search +# masterNodes: 0 +# masterNodeInstanceType: r6g.large.search +# volumeSize: 300 +# If adding an existing PGVector database, this configurations assumes: +# 1. The database has been configured to have pgvector installed and enabled: https://aws.amazon.com/about-aws/whats-new/2023/05/amazon-rds-postgresql-pgvector-ml-model-integration/ +# 2. The database is accessible by RAG-related lambda functions (add inbound PostgreSQL access on the database's security group for all Lambda RAG security groups) +# 3. A secret ID exists in SecretsManager holding the database password within a json block of '{"password":"your_password_here"}'. This is the same format that RDS natively provides a password in SecretsManager. +# If the passwordSecretId or dbHost are not provided, then a sample database will be created for you. Only the username is required. +# - repositoryId: pgvector-rag +# type: pgvector +# rdsConfig: +# username: postgres +# passwordSecretId: # password ID as stored in SecretsManager. Example: "rds!db-aa88493d-be8d-4a3f-96dc-c668165f7826" +# dbHost: # Host name of database. Example hostname from RDS: "my-db-name.291b2f03.us-east-1.rds.amazonaws.com" +# dbName: postgres +# You can optionally provide a list of models and the deployment process will ensure they exist in your model bucket and try to download them if they don't exist +# ecsModels: +# - modelName: mistralai/Mistral-7B-Instruct-v0.2 +# inferenceContainer: tgi +# baseImage: ghcr.io/huggingface/text-generation-inference:2.0.1 +# - modelName: intfloat/e5-large-v2 +# inferenceContainer: tei +# baseImage: ghcr.io/huggingface/text-embeddings-inference:1.2.3 +# - modelName: mistralai/Mixtral-8x7B-Instruct-v0.1 +# inferenceContainer: tgi +# baseImage: ghcr.io/huggingface/text-generation-inference:2.0.1 +# litellmConfig: +# db_key: sk-d7a77bcb-3e23-483c-beec-2700f2baeeb1 # A key is required for model management purposes - must start with sk- diff --git a/lambda/authorizer/lambda_functions.py b/lambda/authorizer/lambda_functions.py index 1f6ded07..3d535378 100644 --- a/lambda/authorizer/lambda_functions.py +++ b/lambda/authorizer/lambda_functions.py @@ -37,6 +37,7 @@ def lambda_handler(event: Dict[str, Any], context) -> Dict[str, Any]: # type: i logger.info("REST API authorization handler started") requested_resource = event["resource"] + request_method = event["httpMethod"] id_token = get_id_token(event) @@ -69,7 +70,10 @@ def lambda_handler(event: Dict[str, Any], context) -> Dict[str, Any]: # type: i username = jwt_data.get("sub", "user") logger.info(f"Deny access to {username} due to non-admin accessing /models api.") return deny_policy - + if requested_resource.startswith("/configuration") and request_method == "PUT" and not is_admin_user: + username = jwt_data.get("sub", "user") + logger.info(f"Deny access to {username} due to non-admin trying to update configuration.") + return deny_policy logger.debug(f"Generated policy: {allow_policy}") logger.info(f"REST API authorization handler completed with 'Allow' for resource {event['methodArn']}") return allow_policy diff --git a/lambda/configuration/__init__.py b/lambda/configuration/__init__.py new file mode 100644 index 00000000..4139ae4d --- /dev/null +++ b/lambda/configuration/__init__.py @@ -0,0 +1,13 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). +# You may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/lambda/configuration/lambda_functions.py b/lambda/configuration/lambda_functions.py new file mode 100644 index 00000000..23ffabdf --- /dev/null +++ b/lambda/configuration/lambda_functions.py @@ -0,0 +1,65 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). +# You may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Lambda functions for managing sessions.""" +import json +import logging +import os +import time +from decimal import Decimal +from typing import Any, Dict + +import boto3 +import create_env_variables # noqa: F401 +from botocore.exceptions import ClientError +from utilities.common_functions import api_wrapper, retry_config + +logger = logging.getLogger(__name__) + +dynamodb = boto3.resource("dynamodb", region_name=os.environ["AWS_REGION"], config=retry_config) +table = dynamodb.Table(os.environ["CONFIG_TABLE_NAME"]) + + +@api_wrapper +def get_configuration(event: dict, context: dict) -> Dict[str, Any]: + """List configuration entries by configScope from DynamoDB.""" + config_scope = event["queryStringParameters"]["configScope"] + + response = {} + try: + response = table.query( + KeyConditionExpression="#s = :configScope", + ExpressionAttributeNames={"#s": "configScope"}, + ExpressionAttributeValues={":configScope": config_scope}, + ScanIndexForward=False, + ) + except ClientError as error: + if error.response["Error"]["Code"] == "ResourceNotFoundException": + logger.warning(f"No record found with session id: {config_scope}") + else: + logger.exception("Error fetching session") + return response.get("Items", {}) # type: ignore [no-any-return] + + +@api_wrapper +def update_configuration(event: dict, context: dict) -> None: + """Update configuration in DynamoDB.""" + # from https://stackoverflow.com/a/71446846 + body = json.loads(event["body"], parse_float=Decimal) + body["created_at"] = str(Decimal(time.time())) + + try: + table.put_item(Item=body) + except ClientError: + logger.exception("Error updating session in DynamoDB") diff --git a/lambda/models/domain_objects.py b/lambda/models/domain_objects.py index 46c29027..dfa7143e 100644 --- a/lambda/models/domain_objects.py +++ b/lambda/models/domain_objects.py @@ -91,6 +91,7 @@ class LoadBalancerConfig(BaseModel): class AutoScalingConfig(BaseModel): """Autoscaling configuration upon model creation.""" + blockDeviceVolumeSize: Optional[NonNegativeInt] = 30 minCapacity: NonNegativeInt maxCapacity: NonNegativeInt cooldown: PositiveInt @@ -102,6 +103,8 @@ def validate_auto_scaling_config(self) -> Self: """Validate autoScalingConfig values.""" if self.minCapacity > self.maxCapacity: raise ValueError("minCapacity must be less than or equal to the maxCapacity.") + if self.blockDeviceVolumeSize is not None and self.blockDeviceVolumeSize < 30: + raise ValueError("blockDeviceVolumeSize must be greater than or equal to 30.") return self diff --git a/lambda/models/lambda_functions.py b/lambda/models/lambda_functions.py index e39d641d..06aa0dce 100644 --- a/lambda/models/lambda_functions.py +++ b/lambda/models/lambda_functions.py @@ -17,7 +17,10 @@ from typing import Annotated, Union import boto3 +import botocore.session from fastapi import FastAPI, Path, Request +from fastapi.encoders import jsonable_encoder +from fastapi.exceptions import RequestValidationError from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import JSONResponse from mangum import Mangum @@ -36,6 +39,7 @@ from .exception import InvalidStateTransitionError, ModelAlreadyExistsError, ModelNotFoundError from .handler import CreateModelHandler, DeleteModelHandler, GetModelHandler, ListModelsHandler, UpdateModelHandler +sess = botocore.session.Session() app = FastAPI(redirect_slashes=False, lifespan="off", docs_url="/docs", openapi_url="/openapi.json") app.add_middleware(AWSAPIGatewayMiddleware) @@ -61,6 +65,14 @@ async def model_not_found_handler(request: Request, exc: ModelNotFoundError) -> return JSONResponse(status_code=404, content={"message": str(exc)}) +@app.exception_handler(RequestValidationError) # type: ignore +async def validation_exception_handler(request: Request, exc: RequestValidationError): + """Handle exception when request fails validation and and translate to a 422 error.""" + return JSONResponse( + status_code=422, content={"detail": jsonable_encoder(exc.errors()), "type": "RequestValidationError"} + ) + + @app.exception_handler(InvalidStateTransitionError) # type: ignore @app.exception_handler(ModelAlreadyExistsError) # type: ignore @app.exception_handler(ValueError) # type: ignore @@ -135,5 +147,11 @@ async def delete_model( return delete_handler(model_id=model_id) +@app.get(path="/metadata/instances") # type: ignore +async def get_instances() -> list[str]: + """Endpoint to list available instances in this region.""" + return list(sess.get_service_model("ec2").shape_for("InstanceType").enum) + + handler = Mangum(app, lifespan="off", api_gateway_base_path="/models") docs = Mangum(app, lifespan="off") diff --git a/lib/api-base/authorizer.ts b/lib/api-base/authorizer.ts index 358b8b6c..b80c0035 100644 --- a/lib/api-base/authorizer.ts +++ b/lib/api-base/authorizer.ts @@ -16,15 +16,17 @@ import * as cdk from 'aws-cdk-lib'; import { RequestAuthorizer, IdentitySource } from 'aws-cdk-lib/aws-apigateway'; -import { ISecurityGroup, IVpc } from 'aws-cdk-lib/aws-ec2'; +import { ISecurityGroup } from 'aws-cdk-lib/aws-ec2'; import { IRole } from 'aws-cdk-lib/aws-iam'; -import { Code, Function, LayerVersion } from 'aws-cdk-lib/aws-lambda'; +import { Code, Function, LayerVersion, Runtime } from 'aws-cdk-lib/aws-lambda'; import { StringParameter } from 'aws-cdk-lib/aws-ssm'; import { Construct } from 'constructs'; import { BaseProps } from '../schema'; import { createCdkId } from '../core/utils'; import { Secret } from 'aws-cdk-lib/aws-secretsmanager'; +import { Vpc } from '../networking/vpc'; +import { Queue } from 'aws-cdk-lib/aws-sqs'; /** * Properties for RestApiGateway Construct. @@ -33,10 +35,11 @@ import { Secret } from 'aws-cdk-lib/aws-secretsmanager'; * @property {Layer} authorizerLayer - Lambda layer for authorizer lambda. * @property {IRole} role - Execution role for lambdas * @property {ISecurityGroup[]} securityGroups - Security groups for Lambdas + * @property {Map} importedSubnets for Lambdas */ type AuthorizerProps = { role?: IRole; - vpc?: IVpc; + vpc?: Vpc; securityGroups?: ISecurityGroup[]; } & BaseProps; @@ -73,10 +76,15 @@ export class CustomAuthorizer extends Construct { // Create Lambda authorizer const authorizerLambda = new Function(this, 'AuthorizerLambda', { - runtime: config.lambdaConfig.pythonRuntime, + deadLetterQueueEnabled: true, + deadLetterQueue: new Queue(this, 'AuthorizerLambdaDLQ', { + queueName: 'AuthorizerLambdaDLQ', + enforceSSL: true, + }), + runtime: Runtime.PYTHON_3_10, handler: 'authorizer.lambda_functions.lambda_handler', functionName: `${cdk.Stack.of(this).stackName}-lambda-authorizer`, - code: Code.fromAsset(config.lambdaSourcePath), + code: Code.fromAsset('./lambda'), description: 'REST API and UI Authorization Lambda', timeout: cdk.Duration.seconds(30), memorySize: 128, @@ -88,9 +96,11 @@ export class CustomAuthorizer extends Construct { JWT_GROUPS_PROP: config.authConfig!.jwtGroupsProperty, MANAGEMENT_KEY_NAME: managementKeySecretNameStringParameter.stringValue }, + reservedConcurrentExecutions: 20, role: role, - vpc: vpc, + vpc: vpc?.vpc, securityGroups: securityGroups, + vpcSubnets: vpc?.subnetSelection }); const managementKeySecret = Secret.fromSecretNameV2(this, createCdkId([id, 'managementKey']), managementKeySecretNameStringParameter.stringValue); diff --git a/lib/api-base/ecsCluster.ts b/lib/api-base/ecsCluster.ts index 69bbcf7c..7a9b381d 100644 --- a/lib/api-base/ecsCluster.ts +++ b/lib/api-base/ecsCluster.ts @@ -18,7 +18,7 @@ import { Duration, RemovalPolicy } from 'aws-cdk-lib'; import { BlockDeviceVolume, GroupMetrics, Monitoring } from 'aws-cdk-lib/aws-autoscaling'; import { Metric, Stats } from 'aws-cdk-lib/aws-cloudwatch'; -import { InstanceType, IVpc, SecurityGroup } from 'aws-cdk-lib/aws-ec2'; +import { InstanceType, SecurityGroup } from 'aws-cdk-lib/aws-ec2'; import { Repository } from 'aws-cdk-lib/aws-ecr'; import { AmiHardwareType, @@ -45,6 +45,7 @@ import { Construct } from 'constructs'; import { createCdkId } from '../core/utils'; import { BaseProps, Ec2Metadata, EcsSourceType } from '../schema'; import { ECSConfig } from '../schema'; +import { Vpc } from '../networking/vpc'; /** * Properties for the ECSCluster Construct. @@ -56,7 +57,7 @@ import { ECSConfig } from '../schema'; type ECSClusterProps = { ecsConfig: ECSConfig; securityGroup: SecurityGroup; - vpc: IVpc; + vpc: Vpc; } & BaseProps; /** @@ -84,12 +85,13 @@ export class ECSCluster extends Construct { // Create ECS cluster const cluster = new Cluster(this, createCdkId(['Cl']), { clusterName: createCdkId([config.deploymentName, ecsConfig.identifier], 32, 2), - vpc: vpc, + vpc: vpc.vpc, containerInsights: !config.region.includes('iso'), }); // Create auto scaling group const autoScalingGroup = cluster.addCapacity(createCdkId(['ASG']), { + vpcSubnets: vpc.subnetSelection, instanceType: new InstanceType(ecsConfig.instanceType), machineImage: EcsOptimizedImage.amazonLinux2(ecsConfig.amiHardwareType), minCapacity: ecsConfig.autoScalingConfig.minCapacity, @@ -102,7 +104,7 @@ export class ECSCluster extends Construct { blockDevices: [ { deviceName: '/dev/xvda', - volume: BlockDeviceVolume.ebs(30, { + volume: BlockDeviceVolume.ebs(ecsConfig.autoScalingConfig.blockDeviceVolumeSize, { encrypted: true, }), }, @@ -265,7 +267,8 @@ export class ECSCluster extends Construct { loadBalancerName: createCdkId([config.deploymentName, ecsConfig.identifier], 32, 2).toLowerCase(), dropInvalidHeaderFields: true, securityGroup, - vpc, + vpc: vpc.vpc, + vpcSubnets: vpc.subnetSelection, idleTimeout: Duration.seconds(600) }); diff --git a/lib/api-base/fastApiContainer.ts b/lib/api-base/fastApiContainer.ts index d90f0e9d..2f4ca3a6 100644 --- a/lib/api-base/fastApiContainer.ts +++ b/lib/api-base/fastApiContainer.ts @@ -16,14 +16,15 @@ import { CfnOutput } from 'aws-cdk-lib'; import { ITable } from 'aws-cdk-lib/aws-dynamodb'; -import { IVpc, SecurityGroup } from 'aws-cdk-lib/aws-ec2'; +import { SecurityGroup } from 'aws-cdk-lib/aws-ec2'; import { AmiHardwareType, ContainerDefinition } from 'aws-cdk-lib/aws-ecs'; import { IRole } from 'aws-cdk-lib/aws-iam'; import { Construct } from 'constructs'; import { dump as yamlDump } from 'js-yaml'; import { ECSCluster } from './ecsCluster'; -import { BaseProps, Ec2Metadata, EcsSourceType, FastApiContainerConfig } from '../schema'; +import { BaseProps, Ec2Metadata, EcsSourceType } from '../schema'; +import { Vpc } from '../networking/vpc'; // This is the amount of memory to buffer (or subtract off) from the total instance memory, if we don't include this, // the container can have a hard time finding available RAM resources to start and the tasks will fail deployment @@ -39,9 +40,8 @@ type FastApiContainerProps = { apiName: string; resourcePath: string; securityGroup: SecurityGroup; - taskConfig: FastApiContainerConfig; tokenTable: ITable | undefined; - vpc: IVpc; + vpc: Vpc; } & BaseProps; /** @@ -65,23 +65,20 @@ export class FastApiContainer extends Construct { constructor (scope: Construct, id: string, props: FastApiContainerProps) { super(scope, id); - const { config, securityGroup, taskConfig, tokenTable, vpc } = props; + const { config, securityGroup, tokenTable, vpc } = props; - let buildArgs: Record | undefined = undefined; - if (taskConfig.containerConfig.image.type === EcsSourceType.ASSET) { - buildArgs = { - BASE_IMAGE: taskConfig.containerConfig.image.baseImage, - PYPI_INDEX_URL: config.pypiConfig.indexUrl, - PYPI_TRUSTED_HOST: config.pypiConfig.trustedHost, - LITELLM_CONFIG: yamlDump(config.litellmConfig), - }; - } + const buildArgs: Record | undefined = { + BASE_IMAGE: 'python:3.10', + PYPI_INDEX_URL: config.pypiConfig.indexUrl, + PYPI_TRUSTED_HOST: config.pypiConfig.trustedHost, + LITELLM_CONFIG: yamlDump(config.litellmConfig), + }; const environment: Record = { LOG_LEVEL: config.logLevel, AWS_REGION: config.region, AWS_REGION_NAME: config.region, // for supporting SageMaker endpoints in LiteLLM - THREADS: Ec2Metadata.get(taskConfig.instanceType).vCpus.toString(), - LITELLM_KEY: config.litellmConfig.general_settings.master_key, + THREADS: Ec2Metadata.get('m5.large').vCpus.toString(), + LITELLM_KEY: config.litellmConfig.db_key, }; if (config.restApiConfig.internetFacing) { @@ -102,18 +99,55 @@ export class FastApiContainer extends Construct { config, ecsConfig: { amiHardwareType: AmiHardwareType.STANDARD, - autoScalingConfig: taskConfig.autoScalingConfig, + autoScalingConfig: { + blockDeviceVolumeSize: 30, + minCapacity: 1, + maxCapacity: 1, + cooldown: 60, + defaultInstanceWarmup: 60, + metricConfig: { + AlbMetricName: 'RequestCountPerTarget', + targetValue: 1000, + duration: 60, + estimatedInstanceWarmup: 30 + } + }, buildArgs, - containerConfig: taskConfig.containerConfig, + containerConfig: { + image: { + baseImage: 'python:3.10', + path: 'lib/serve/rest-api', + type: EcsSourceType.ASSET + }, + healthCheckConfig: { + command: ['CMD-SHELL', 'exit 0'], + interval: 10, + startPeriod: 30, + timeout: 5, + retries: 3 + }, + environment: {}, + sharedMemorySize: 0 + }, containerMemoryBuffer: CONTAINER_MEMORY_BUFFER, environment, identifier: props.apiName, - instanceType: taskConfig.instanceType, + instanceType: 'm5.large', internetFacing: config.restApiConfig.internetFacing, - loadBalancerConfig: taskConfig.loadBalancerConfig, + loadBalancerConfig: { + healthCheckConfig: { + path: '/health', + interval: 60, + timeout: 30, + healthyThresholdCount: 2, + unhealthyThresholdCount: 10 + }, + domainName: config.restApiConfig.domainName, + sslCertIamArn: config.restApiConfig?.sslCertIamArn ?? null, + }, }, securityGroup, - vpc, + vpc }); if (tokenTable) { diff --git a/lib/api-base/utils.ts b/lib/api-base/utils.ts index 095ce633..2422742d 100644 --- a/lib/api-base/utils.ts +++ b/lib/api-base/utils.ts @@ -34,10 +34,12 @@ import { IRestApi, Cors, } from 'aws-cdk-lib/aws-apigateway'; -import { ISecurityGroup, IVpc } from 'aws-cdk-lib/aws-ec2'; +import { ISecurityGroup } from 'aws-cdk-lib/aws-ec2'; import { IRole } from 'aws-cdk-lib/aws-iam'; import { Code, Function, Runtime, ILayerVersion, IFunction, CfnPermission } from 'aws-cdk-lib/aws-lambda'; import { Construct } from 'constructs'; +import { Vpc } from '../networking/vpc'; +import { Queue } from 'aws-cdk-lib/aws-sqs'; /** * Type representing python lambda function @@ -81,7 +83,7 @@ export function registerAPIEndpoint ( funcDef: PythonLambdaFunction, pythonRuntime: Runtime, role?: IRole, - vpc?: IVpc, + vpc?: Vpc, securityGroups?: ISecurityGroup[], ): IFunction { const functionId = `${ @@ -104,6 +106,11 @@ export function registerAPIEndpoint ( }); } else { handler = new Function(scope, functionId, { + deadLetterQueueEnabled: true, + deadLetterQueue: new Queue(scope, `${functionId}DLQ`, { + queueName: `${functionId}DLQ`, + enforceSSL: true, + }), functionName: functionId, runtime: pythonRuntime, handler: `${funcDef.resource}.lambda_functions.${funcDef.name}`, @@ -115,9 +122,11 @@ export function registerAPIEndpoint ( timeout: funcDef.timeout || Duration.seconds(180), memorySize: 512, layers, + reservedConcurrentExecutions: 20, role, - vpc, + vpc: vpc?.vpc, securityGroups, + vpcSubnets: vpc?.subnetSelection, }); } diff --git a/lib/chat/api/configuration.ts b/lib/chat/api/configuration.ts new file mode 100644 index 00000000..f0a29cef --- /dev/null +++ b/lib/chat/api/configuration.ts @@ -0,0 +1,173 @@ +/** + Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"). + You may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +import { IAuthorizer, RestApi } from 'aws-cdk-lib/aws-apigateway'; +import * as dynamodb from 'aws-cdk-lib/aws-dynamodb'; +import { ISecurityGroup } from 'aws-cdk-lib/aws-ec2'; +import { Role } from 'aws-cdk-lib/aws-iam'; +import { LayerVersion, Runtime } from 'aws-cdk-lib/aws-lambda'; +import { StringParameter } from 'aws-cdk-lib/aws-ssm'; +import { Construct } from 'constructs'; + +import { PythonLambdaFunction, registerAPIEndpoint } from '../../api-base/utils'; +import { BaseProps } from '../../schema'; +import { createLambdaRole } from '../../core/utils'; +import { Vpc } from '../../networking/vpc'; +import { AwsCustomResource, PhysicalResourceId } from 'aws-cdk-lib/custom-resources'; + +/** + * Properties for ConfigurationApi Construct. + * + * @property {IVpc} vpc - Stack VPC + * @property {Layer} commonLayer - Lambda layer for all Lambdas. + * @property {IRestApi} restAPI - REST APIGW for UI and Lambdas + * @property {IRole} lambdaExecutionRole - Execution role for lambdas + * @property {IAuthorizer} authorizer - APIGW authorizer + * @property {ISecurityGroup[]} securityGroups - Security groups for Lambdas + * @property {Map }importedSubnets for application. + */ +type ConfigurationApiProps = { + authorizer: IAuthorizer; + restApiId: string; + rootResourceId: string; + securityGroups?: ISecurityGroup[]; + vpc?: Vpc; +} & BaseProps; + +/** + * API which Maintains config state in DynamoDB + */ +export class ConfigurationApi extends Construct { + constructor (scope: Construct, id: string, props: ConfigurationApiProps) { + super(scope, id); + + const { authorizer, config, restApiId, rootResourceId, securityGroups, vpc } = props; + + // Get common layer based on arn from SSM due to issues with cross stack references + const commonLambdaLayer = LayerVersion.fromLayerVersionArn( + this, + 'configuration-common-lambda-layer', + StringParameter.valueForStringParameter(this, `${config.deploymentPrefix}/layerVersion/common`), + ); + + // Create DynamoDB table to handle config data + const configTable = new dynamodb.Table(this, 'ConfigurationTable', { + partitionKey: { + name: 'configScope', + type: dynamodb.AttributeType.STRING, + }, + sortKey: { + name: 'versionId', + type: dynamodb.AttributeType.NUMBER, + }, + billingMode: dynamodb.BillingMode.PAY_PER_REQUEST, + encryption: dynamodb.TableEncryption.AWS_MANAGED, + removalPolicy: config.removalPolicy, + }); + + const lambdaRole: Role = createLambdaRole(this, config.deploymentName, 'ConfigurationApi', configTable.tableArn); + + // Populate the App Config table with default config + const date = new Date(); + new AwsCustomResource(this, 'lisa-init-ddb-config', { + onCreate: { + service: 'DynamoDB', + action: 'putItem', + physicalResourceId: PhysicalResourceId.of('initConfigData'), + parameters: { + TableName: configTable.tableName, + Item: { + 'versionId': {'N': '0'}, + 'changedBy': {'S': 'System'}, + 'configScope': {'S': 'global'}, + 'changeReason': {'S': 'Initial deployment default config'}, + 'createdAt': {'S': Math.round(date.getTime() / 1000).toString()}, + 'configuration': {'M': { + 'enabledComponents': {'M': { + 'deleteSessionHistory': {'BOOL': 'True'}, + 'viewMetaData': {'BOOL': 'True'}, + 'editKwargs': {'BOOL': 'True'}, + 'editPromptTemplate': {'BOOL': 'True'}, + 'editChatHistoryBuffer': {'BOOL': 'True'}, + 'editNumOfRagDocument': {'BOOL': 'True'}, + 'uploadRagDocs': {'BOOL': 'True'}, + 'uploadContextDocs': {'BOOL': 'True'} + }}, + 'systemBanner': {'M': { + 'isEnabled': {'BOOL': 'False'}, + 'text': {'S': ''}, + 'textColor': {'S': ''}, + 'backgroundColor': {'S': ''} + }} + }} + }, + }, + }, + role: lambdaRole + }); + + const restApi = RestApi.fromRestApiAttributes(this, 'RestApi', { + restApiId: restApiId, + rootResourceId: rootResourceId, + }); + + // Create API Lambda functions + const apis: PythonLambdaFunction[] = [ + { + name: 'get_configuration', + resource: 'configuration', + description: 'Get configuration', + path: 'configuration', + method: 'GET', + environment: { + CONFIG_TABLE_NAME: configTable.tableName + }, + }, + { + name: 'update_configuration', + resource: 'configuration', + description: 'Updates config data', + path: 'configuration/{configScope}', + method: 'PUT', + environment: { + CONFIG_TABLE_NAME: configTable.tableName, + }, + }, + ]; + + apis.forEach((f) => { + const lambdaFunction = registerAPIEndpoint( + this, + restApi, + authorizer, + './lambda', + [commonLambdaLayer], + f, + Runtime.PYTHON_3_10, + lambdaRole, + vpc, + securityGroups, + ); + if (f.method === 'POST' || f.method === 'PUT') { + configTable.grantWriteData(lambdaFunction); + } else if (f.method === 'GET') { + configTable.grantReadData(lambdaFunction); + } else if (f.method === 'DELETE') { + configTable.grantReadWriteData(lambdaFunction); + } + }); + } +} diff --git a/lib/chat/api/session.ts b/lib/chat/api/session.ts index 64d68df1..cd81b309 100644 --- a/lib/chat/api/session.ts +++ b/lib/chat/api/session.ts @@ -16,15 +16,16 @@ import { IAuthorizer, RestApi } from 'aws-cdk-lib/aws-apigateway'; import * as dynamodb from 'aws-cdk-lib/aws-dynamodb'; -import { ISecurityGroup, IVpc } from 'aws-cdk-lib/aws-ec2'; +import { ISecurityGroup } from 'aws-cdk-lib/aws-ec2'; import { Role } from 'aws-cdk-lib/aws-iam'; -import { LayerVersion } from 'aws-cdk-lib/aws-lambda'; +import { LayerVersion, Runtime } from 'aws-cdk-lib/aws-lambda'; import { StringParameter } from 'aws-cdk-lib/aws-ssm'; import { Construct } from 'constructs'; import { PythonLambdaFunction, registerAPIEndpoint } from '../../api-base/utils'; import { BaseProps } from '../../schema'; import { createLambdaRole } from '../../core/utils'; +import { Vpc } from '../../networking/vpc'; /** * Properties for SessionApi Construct. @@ -35,13 +36,14 @@ import { createLambdaRole } from '../../core/utils'; * @property {IRole} lambdaExecutionRole - Execution role for lambdas * @property {IAuthorizer} authorizer - APIGW authorizer * @property {ISecurityGroup[]} securityGroups - Security groups for Lambdas + * @property {Map }importedSubnets for application. */ type SessionApiProps = { authorizer: IAuthorizer; restApiId: string; rootResourceId: string; securityGroups?: ISecurityGroup[]; - vpc?: IVpc; + vpc?: Vpc; } & BaseProps; /** @@ -151,10 +153,10 @@ export class SessionApi extends Construct { this, restApi, authorizer, - config.lambdaSourcePath, + './lambda', [commonLambdaLayer], f, - config.lambdaConfig.pythonRuntime, + Runtime.PYTHON_3_10, lambdaRole, vpc, securityGroups, diff --git a/lib/chat/index.ts b/lib/chat/index.ts index dfb8a11f..20e2d8e8 100644 --- a/lib/chat/index.ts +++ b/lib/chat/index.ts @@ -17,18 +17,20 @@ // LisaChat Stack. import { Stack, StackProps } from 'aws-cdk-lib'; import { IAuthorizer } from 'aws-cdk-lib/aws-apigateway'; -import { ISecurityGroup, IVpc } from 'aws-cdk-lib/aws-ec2'; +import { ISecurityGroup } from 'aws-cdk-lib/aws-ec2'; import { Construct } from 'constructs'; import { SessionApi } from './api/session'; import { BaseProps } from '../schema'; +import { Vpc } from '../networking/vpc'; +import { ConfigurationApi } from './api/configuration'; type CustomLisaChatStackProps = { authorizer: IAuthorizer; restApiId: string; rootResourceId: string; securityGroups?: ISecurityGroup[]; - vpc?: IVpc; + vpc?: Vpc; } & BaseProps; type LisaChatStackProps = CustomLisaChatStackProps & StackProps; @@ -55,5 +57,14 @@ export class LisaChatApplicationStack extends Stack { securityGroups, vpc, }); + + new ConfigurationApi(this, 'ConfigurationApi', { + authorizer, + config, + restApiId, + rootResourceId, + securityGroups, + vpc, + }); } } diff --git a/lib/core/api_base.ts b/lib/core/api_base.ts index c8de1bc0..18654357 100644 --- a/lib/core/api_base.ts +++ b/lib/core/api_base.ts @@ -15,15 +15,15 @@ */ import { Stack, StackProps } from 'aws-cdk-lib'; -import { Cors, EndpointType, Authorizer, RestApi, StageOptions } from 'aws-cdk-lib/aws-apigateway'; -import { IVpc } from 'aws-cdk-lib/aws-ec2'; +import { Authorizer, Cors, EndpointType, RestApi, StageOptions } from 'aws-cdk-lib/aws-apigateway'; import { Construct } from 'constructs'; import { CustomAuthorizer } from '../api-base/authorizer'; import { BaseProps } from '../schema'; +import { Vpc } from '../networking/vpc'; type LisaApiBaseStackProps = { - vpc: IVpc; + vpc: Vpc; } & BaseProps & StackProps; @@ -47,7 +47,7 @@ export class LisaApiBaseStack extends Stack { const restApi = new RestApi(this, `${id}-RestApi`, { description: 'Base API Gateway for LISA.', - endpointConfiguration: { types: [EndpointType.REGIONAL] }, + endpointConfiguration: { types: [config.privateEndpoints ? EndpointType.PRIVATE : EndpointType.REGIONAL] }, deploy: true, deployOptions, defaultCorsPreflightOptions: { diff --git a/lib/core/layers/index.ts b/lib/core/layers/index.ts index 82b3c171..adf1200f 100644 --- a/lib/core/layers/index.ts +++ b/lib/core/layers/index.ts @@ -15,7 +15,7 @@ */ import { BundlingOutput } from 'aws-cdk-lib'; -import { Architecture, Code, LayerVersion } from 'aws-cdk-lib/aws-lambda'; +import { Architecture, Code, LayerVersion, Runtime } from 'aws-cdk-lib/aws-lambda'; import { Asset } from 'aws-cdk-lib/aws-s3-assets'; import { Construct } from 'constructs'; @@ -84,7 +84,7 @@ export class Layer extends Construct { const layerAsset = new Asset(this, 'LayerAsset', { path, bundling: { - image: config.lambdaConfig.pythonRuntime.bundlingImage, + image: Runtime.PYTHON_3_10.bundlingImage, platform: architecture.dockerPlatform, command: ['bash', '-c', `set -e ${args.join(' ')}`], outputType: BundlingOutput.AUTO_DISCOVER, @@ -97,7 +97,7 @@ export class Layer extends Construct { const layer = new LayerVersion(this, 'Layer', { code: layerCode, - compatibleRuntimes: [config.lambdaConfig.pythonRuntime], + compatibleRuntimes: [Runtime.PYTHON_3_10], removalPolicy: config.removalPolicy, description: description, }); diff --git a/lib/docs/.gitignore b/lib/docs/.gitignore new file mode 100644 index 00000000..99d9f534 --- /dev/null +++ b/lib/docs/.gitignore @@ -0,0 +1,3 @@ +dist/ +.vitepress/cache/ +/config/schema.md diff --git a/lib/docs/.vitepress/config.mts b/lib/docs/.vitepress/config.mts new file mode 100644 index 00000000..8f69311b --- /dev/null +++ b/lib/docs/.vitepress/config.mts @@ -0,0 +1,86 @@ +/** + Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"). + You may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ + +import { defineConfig } from 'vitepress'; + +const navLinks = [ + { + text: 'System Administrator Guide', + items: [ + { text: 'What is LISA?', link: '/admin/overview' }, + { text: 'Architecture Overview', link: '/admin/architecture' }, + { text: 'Getting Started', link: '/admin/getting-started' }, + { text: 'Deployment', link: '/admin/deploy' }, + { text: 'Model Management API Usage', link: '/admin/model-management' }, + { text: 'Chat UI Configuration', link: '/admin/ui-configuration' }, + { text: 'API Request Error Handling', link: '/admin/error' }, + ], + }, + { + text: 'Advanced Configuration', + items: [ + { text: 'Configuration Schema', link: '/config/configuration' }, + { text: 'Model Compatibility', link: '/config/model-compatibility' }, + { text: 'Rag Vector Stores', link: '/config/vector-stores' }, + { text: 'Configure IdP: Cognito & Keycloak Examples', link: '/config/idp' }, + { text: 'LiteLLM', link: '/config/lite-llm' }, + ], + }, + { + text: 'User Guide', + items: [ + { text: 'LISA Chat UI', link: '/user/chat' }, + { text: 'RAG', link: '/user/rag' }, + { text: 'Context Windows', link: '/user/context-windows' }, + { text: 'Model KWARGS', link: '/user/model-kwargs' }, + { text: 'Model Management UI', link: '/user/model-management-ui' }, + { text: 'Non-RAG in Context File Management', link: '/user/nonrag-management' }, + { text: 'Prompt Engineering', link: '/user/prompt-engineering' }, + { text: 'Session History', link: '/user/history' }, + { text: 'Breaking Changes', link: '/user/breaking-changes' }, + { text: 'Change Log', link: 'https://github.com/awslabs/LISA/releases' }, + ], + }]; + +// https://vitepress.dev/reference/site-config +export default defineConfig({ + lang: 'en-US', + title: 'LISA Documentation', + description: 'LLM Inference Solution for Amazon Dedicated Cloud (LISA)', + outDir: 'dist', + base: '/LISA/', + head: [['link', { rel: 'icon', href: '/LISA/favicon.ico' }]], + // https://vitepress.dev/reference/default-theme-config + themeConfig: { + logo: { + light: '/logo-light.svg', + dark: '/logo-dark.svg', + }, + nav: [ + { text: 'Home', link: '/' }, + ...navLinks, + ], + + sidebar: navLinks, + + socialLinks: [ + { icon: 'github', link: 'https://github.com/awslabs/LISA' }, + ], + search: { + provider: 'local', + }, + }, +}); diff --git a/lib/docs/admin/api-tokens.md b/lib/docs/admin/api-tokens.md new file mode 100644 index 00000000..1a0afa75 --- /dev/null +++ b/lib/docs/admin/api-tokens.md @@ -0,0 +1,77 @@ +## Programmatic API Tokens + +The LISA Serve ALB can be used for programmatic access outside the example Chat application. +An example use case would be for allowing LISA to serve LLM requests that originate from the [Continue VSCode Plugin](https://www.continue.dev/). +To facilitate communication directly with the LISA Serve ALB, a user with sufficient DynamoDB PutItem permissions may add +API keys to the APITokenTable, and once created, a user may make requests by including the `Authorization: Bearer ${token}` +header or the `Api-Key: ${token}` header with that token. If using any OpenAI-compatible library, the `api_key` fields +will use the `Authorization: Bearer ${token}` format automatically, so there is no need to include additional headers +when using those libraries. + +### Adding a Token + +An account owner may create a long-lived API Token using the following AWS CLI command. + +```bash +AWS_REGION="us-east-1" # change to your deployment region +token_string="YOUR_STRING_HERE" # change to a unique string for a user +aws --region $AWS_REGION dynamodb put-item --table-name $DEPLOYMENT_NAME-LISAApiTokenTable \ + --item '{"token": {"S": "'${token_string}'"}}' +``` + +If an account owner wants the API Token to be temporary and expire after a specific date, LISA will allow for this too. +In addition to the `token` field, the owner may specify the `tokenExpiration` field, which accepts a UNIX timestamp, +in seconds. The following command shows an example of how to do this. + +```bash +AWS_REGION="us-east-1" # change to your deployment region +token_string="YOUR_STRING_HERE" +token_expiration=$(echo $(date +%s) + 3600 | bc) # token that expires in one hour, 3600 seconds +aws --region $AWS_REGION dynamodb put-item --table-name $DEPLOYMENT_NAME-LISAApiTokenTable \ + --item '{ + "token": {"S": "'${token_string}'"}, + "tokenExpiration": {"N": "'${token_expiration}'"} + }' +``` + +Once the token is inserted into the DynamoDB Table, a user may use the token in the `Authorization` request header like +in the following snippet. + +```bash +lisa_serve_rest_url="https://" +token_string="YOUR_STRING_HERE" +curl ${lisa_serve_rest_url}/v2/serve/models \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -H "Authorization: Bearer ${token_string}" +``` + +### Updating a Token + +In the case that an owner wishes to change an existing expiration time or add one to a key that did not previously have +an expiration, this can be accomplished by editing the existing item. The following commands can be used as an example +for updating an existing token. Setting the expiration time to a time in the past will effectively remove access for +that key. + +```bash +AWS_REGION="us-east-1" # change to your deployment region +token_string="YOUR_STRING_HERE" +token_expiration=$(echo $(date +%s) + 600 | bc) # token that expires in 10 minutes from now +aws --region $AWS_REGION dynamodb update-item --table-name $DEPLOYMENT_NAME-LISAApiTokenTable \ + --key '{"token": {"S": "'${token_string}'"}}' \ + --update-expression 'SET tokenExpiration=:t' \ + --expression-attribute-values '{":t": {"N": "'${token_expiration}'"}}' +``` + +### Removing a Token + +Tokens will not be automatically removed even if they are no longer valid. An owner may remove an key, expired or not, +from the database to fully revoke the key, by deleting the item. As an example, the following commands can be used to +remove a token. + +```bash +AWS_REGION="us-east-1" # change to your deployment region +token_string="YOUR_STRING_HERE" # change to the token to remove +aws --region $AWS_REGION dynamodb delete-item --table-name $DEPLOYMENT_NAME-LISAApiTokenTable \ + --key '{"token": {"S": "'${token_string}'"}}' +``` diff --git a/lib/docs/admin/architecture.md b/lib/docs/admin/architecture.md new file mode 100644 index 00000000..92b4326b --- /dev/null +++ b/lib/docs/admin/architecture.md @@ -0,0 +1,68 @@ +# Architecture Overview + +LISA’s major components include: LISA Serve, LISA Chat API, LISA Chatbot, LISA RAG, and LISA Model Management. + +**Key Solution Features:** + +* **Model Hosting**, LISA serve hosts your models in managed and scalable ECS Clusters. +* **Model Management**, LISA has APIs around deploying, updating, and deleting third party and internally hosted models deployed in your account. +* **Inference Requests**, interact with your models via exposed REST APIs or through the LISA Chatbot UI. +* **Chatbot UI** allows users to seamlessly interact with Models, Model Management, RAG, and Configuration APIs. +* **Retrieval-Augmented Generation (RAG) Operations**, leveraging either OpenSearch and/or PGVector for efficient retrieval of relevant external data to enhance model responses. +* **Authentication and Authorization**, LISA supports customers bringing their own OpenID IDP and the use of DynamoDB stored Tokens to interact with the exposed APIs. + +### Solution Architecture + +![LISA Architecture](../assets/LisaArchitecture.png) +- **User Interaction with Chat UI or API:** Users can interact with LISA through the Chat UI or REST API. Each interaction is authenticated using AWS Cognito or OIDC, ensuring secure access. +- **Request Routing:** The API Gateway securely routes user requests to the appropriate backend services, whether for fetching the chat UI, performing RAG operations, or managing models. +- **Model Management:** Administrators can deploy, update, or delete models via the Model Management API, which triggers ECS deployment and scaling workflows. +- **Model Inference:** Inference requests are routed to ECS-hosted models or external models via the LiteLLM proxy. Responses are served back to users through the ALB. +- **RAG Integration:** When RAG is enabled, LISA retrieves relevant documents from OpenSearch or PGVector, augmenting the model's response with external knowledge. +- **Session Continuity:** User session data is stored in DynamoDB, ensuring that users can retrieve and continue previous conversations across multiple interactions. +- **Autoscaling:** ECS tasks automatically scale based on system load, with ALBs distributing traffic across available instances to ensure performance. + +## LISA Components + +### LISA Model Management +![LISA Model Management Architecture](../assets/LisaModelManagement.png) +The Model Management component is responsible for managing the entire lifecycle of models in LISA. This includes creation, updating, deletion of models deployed on ECS or third party provided. The service integration automates and scales these operations, ensuring that the underlying infrastructure is managed efficiently. + +* **Self-Hosted Models**: Models are containerized and deployed on AWS ECS, with each model hosted in its own isolated ECS task. This design allows models to be independently scaled based on demand. Traffic to the models is balanced using Application Load Balancers (ALBs), ensuring that the autoscaling mechanism reacts to load fluctuations in real time, optimizing both performance and availability. +* **External Model Routing**: LISA utilizes the LiteLLM proxy to route traffic to different model providers, no matter their API and payload format. Users may add models from external providers, such as SageMaker or Bedrock, to LISA. LISA will simply add the configuration to LiteLLM without creating any additional supporting infrastructure. Customers do not have to independently manage the API integration with the use of LiteLLM. +* **Model Lifecycle Management**: AWS Step Functions are used to orchestrate the lifecycle of models, handling the creation, update, and deletion workflows. Each workflow provisions the required resources using CloudFormation templates, which manage infrastructure components like EC2 instances, security groups, and ECS services. LISA ensures that the necessary security, networking, and infrastructure components are automatically deployed and configured. + * The CloudFormation stacks define essential resources using the LISA core VPC configuration, ensuring best practices for security and access across all resources in the environment. + * DynamoDB stores model metadata, while Amazon S3 securely manages model weights, enabling ECS instances to retrieve the weights dynamically during deployment. + +#### Technical Implementation + +* **Model Lifecycle**: Lifecycle operations such as creation, update, and deletion are executed by Step Functions and backed by AWS Lambda in ```lambda/models/lambda_functions.py```. +* **CloudFormation**: Infrastructure components are provisioned using CloudFormation templates, as defined in ```ecs_model_deployer/src/lib/lisa_model_stack.ts```. +* **ECS Cluster**: ECS cluster and task definitions are located in ```ecs_model_deployer/src/lib/ecsCluster.ts```, with model containers specified in ```ecs_model_deployer/src/lib/ecs-model.ts```. + + +### LISA Serve +![LISA Serve Architecture](../assets/LisaServe.png) +LISA Serve is responsible for processing inference requests and serving model predictions. This component manages user requests to interact with LLMs and ensures that the models deliver low-latency responses. + +* **Inference Requests**: Requests are routed via ALB, which serves as the main entry point to LISA’s backend infrastructure. The ALB forwards requests to the appropriate ECS-hosted model or externally-hosted model based on the request parameters. For models hosted within LISA, traffic to the models is managed with model-specific ALBs, which enable autoscaling if the models are under heavy load. LISA supports both direct REST API-based interaction and interaction through the Chat UI, enabling programmatic access or a user-friendly chat experience. +* **RAG (Retrieval-Augmented Generation)**: RAG operations enhance model responses by integrating external data sources. LISA leverages OpenSearch or PGVector (PostgreSQL) as vector stores, enabling vector-based search and retrieval of relevant knowledge to augment LLM outputs dynamically. + +#### Technical Implementation + +* RAG operations are managed through ```lambda/rag/lambda_functions.py```, which handles embedding generation and document retrieval via OpenSearch and PostgreSQL. +* Direct requests to the LISA Serve ALB entrypoint must utilize the OpenAI API spec, which we support through the use of the LiteLLM proxy. + + +### LISA Chat +![LISA Chatbot Architecture](../assets/LisaChat.png) +LISA Chat provides a customizable chat interface that enables users to interact with models in real-time. This component ensures that users have a seamless experience for submitting queries and maintaining session continuity. + +* **Chat Interface**: The Chat UI is hosted as a static website on Amazon S3 and is served via API Gateway. Users can interact with models directly through the web-based frontend, sending queries and viewing real-time responses from the models. The interface is integrated with LISA's backend services for model inference, retrieval augmented generation, and session management. +* **Session History Management**: LISA maintains session histories using DynamoDB, allowing users to retrieve and continue previous conversations seamlessly. This feature is crucial for maintaining continuity in multi-turn conversations with the models. + +#### Technical Implementation + +* The Chat UI is implemented in the ```lib/user-interface/react/``` folder and is deployed using the scripts in the ```scripts/``` folder. +* Session management logic is handled in ```lambda/session/lambda_functions.py```, where session data is stored and retrieved from DynamoDB. +* RAG operations are defined in lambda/repository/lambda_functions.py diff --git a/lib/docs/admin/deploy.md b/lib/docs/admin/deploy.md new file mode 100644 index 00000000..bf44b04e --- /dev/null +++ b/lib/docs/admin/deploy.md @@ -0,0 +1,70 @@ + +# Deployment +## Using pre-built resources + +A default configuration will build the necessary containers, lambda layers, and production optimized +web application at build time. In the event that you would like to use pre-built resources due to +network connectivity reasons or other concerns with the environment where you'll be deploying LISA +you can do so. + +- For ECS containers (Models, APIs, etc) you can modify the `containerConfig` block of + the corresponding entry in `config.yaml`. For container images you can provide a path to a directory + from which a docker container will be built (default), a path to a tarball, an ECR repository arn and + optional tag, or a public registry path. + - We provide immediate support for HuggingFace TGI and TEI containers and for vLLM containers. The `example_config.yaml` + file provides examples for TGI and TEI, and the only difference for using vLLM is to change the + `inferenceContainer`, `baseImage`, and `path` options, as indicated in the snippet below. All other options can + remain the same as the model definition examples we have for the TGI or TEI models. vLLM can also support embedding + models in this way, so all you need to do is refer to the embedding model artifacts and remove the `streaming` field + to deploy the embedding model. + - vLLM has support for the OpenAI Embeddings API, but model support for it is limited because the feature is new. Currently, + the only supported embedding model with vLLM is [intfloat/e5-mistral-7b-instruct](https://huggingface.co/intfloat/e5-mistral-7b-instruct), + but this list is expected to grow over time as vLLM updates. + ```yaml + ecsModels: + - modelName: your-model-name + inferenceContainer: tgi + baseImage: ghcr.io/huggingface/text-generation-inference:2.0.1 + ``` +- If you are deploying the LISA Chat User Interface you can optionally specify the path to the pre-built + website assets using the top level `webAppAssetsPath` parameter in `config.yaml`. Specifying this path + (typically `lib/user-interface/react/dist`) will avoid using a container to build and bundle the assets + at CDK build time. +- For the lambda layers you can specify the path to a local zip archive of the layer code by including + the optional `lambdaLayerAssets` block in `config.yaml` similar to the following: + +``` +lambdaLayerAssets: + authorizerLayerPath: lib/core/layers/authorizer_layer.zip + commonLayerPath: lib/core/layers/common_layer.zip + fastapiLayerPath: /path/to/fastapi_layer.zip + sdkLayerPath: lib/rag/layers/sdk_layer.zip +``` + +## Deploying + +Now that we have everything setup we are ready to deploy. + +```bash +make deploy +``` + +By default, all stacks will be deployed but a particular stack can be deployed by providing the `STACK` argument to the `deploy` target. + +```bash +make deploy STACK=LisaServe +``` + +Available stacks can be listed by running: + +```bash +make listStacks +``` + +After the `deploy` command is run, you should see many docker build outputs and eventually a CDK progress bar. The deployment should take about 10-15 minutes and will produce a single cloud formation output for the websocket URL. + +You can test the deployment with the integration test: + +```bash +pytest lisa-sdk/tests --url --verify | false +``` diff --git a/lib/docs/admin/error.md b/lib/docs/admin/error.md new file mode 100644 index 00000000..81b9fb78 --- /dev/null +++ b/lib/docs/admin/error.md @@ -0,0 +1,69 @@ + +# Error Handling for API Requests + +In the LISA model management API, error handling is designed to ensure robustness and consistent responses when errors occur during the execution of API requests. This section provides a detailed explanation of the error handling mechanisms in place, including the types of errors that are managed, how they are raised, and what kind of responses clients can expect when these errors occur. + +## Common Errors and Their HTTP Responses + +Below is a list of common errors that can occur in the system, along with the HTTP status codes and response structures that are returned to the client. + +### ModelNotFoundError + +* **Description**: Raised when a model that is requested for retrieval or deletion is not found in the system. +* **HTTP Status Code**: `404 Not Found` +* **Response Body**: + +```json +{ + "error": "ModelNotFoundError", + "message": "The requested model with ID could not be found." +} +``` + +* **Example Scenario**: When a client attempts to fetch details of a model that does not exist in the database, the `ModelNotFoundError` is raised. + +### ModelAlreadyExistsError + +* **Description:** Raised when a request to create a model is made, but the model already exists in the system. +* **HTTP Status Code**: `400` +* **Response Body**: + +```json +{ + "error": "ModelAlreadyExistsError", + "message": "A model with the given configuration already exists." +} +``` + +* **Example Scenario:** A client attempts to create a model with an ID or name that already exists in the database. The system detects the conflict and raises the `ModelAlreadyExistsError`. + +### InvalidInputError (Hypothetical Example) + +* **Description**: Raised when the input provided by the client for creating or updating a model is invalid or does not conform to expected formats. +* **HTTP Status Code**: `400 Bad Request` +* **Response Body**: + +```json +{ + "error": "InvalidInputError", + "message": "The input provided is invalid. Please check the required fields and formats." +} +``` + +* **Example Scenario**: The client submits a malformed JSON body or omits required fields in a model creation request, triggering an `InvalidInputError`. + +## Handling Validation Errors + +Validation errors are handled across the API via utility functions and model transformation logic. These errors typically occur when user inputs fail validation checks or when required data is missing from a request. + +### Example Response for Validation Error: + +* **HTTP Status Code**: `422 Unprocessable Entity` +* **Response Body**: + +```json +{ + "error": "ValidationError", + "message": "The input provided does not meet the required validation criteria." +} +``` diff --git a/lib/docs/admin/getting-started.md b/lib/docs/admin/getting-started.md new file mode 100644 index 00000000..1c828188 --- /dev/null +++ b/lib/docs/admin/getting-started.md @@ -0,0 +1,233 @@ + +# Getting Started with LISA + +LISA is an infrastructure-as-code solution that leverages AWS services. Customers deploy LISA directly into an AWS account. + +## Deployment Prerequisites + +### Pre-Deployment Steps + +* Set up and have access to an AWS account with appropriate permissions + * All the resource creation that happens as part of CDK deployments expects Administrator or Administrator-like permissions with resource creation and mutation permissions. Installation will not succeed if this profile does not have permissions to create and edit arbitrary resources for the system. Note: This level of permissions is not required for the runtime of LISA. This is only necessary for deployment and subsequent updates. +* Familiarity with AWS Cloud Development Kit (CDK) and infrastructure-as-code principles +* Optional: If using the chat UI, Have your Identity Provider (IdP) information and access +* Optional: Have your VPC information available, if you are using an existing one for your deployment +* Note: CDK and Model Management both leverage AWS Systems Manager Agent (SSM) parameter store. Confirm that SSM is approved for use by your organization before beginning. + +### Software + +* AWS CLI installed and configured +* Python 3.9 or later +* Node.js 14 or later +* Docker installed and running +* Sufficient disk space for model downloads and conversions + + +If you're new to CDK, review the [AWS CDK Documentation](https://docs.aws.amazon.com/cdk/v2/guide/getting_started.html) and consult with your AWS support team. + +> [!TIP] +> To minimize version conflicts and ensure a consistent deployment environment, it is recommended to execute the following steps on a dedicated EC2 instance. However, LISA can be deployed from any machine that meets the prerequisites listed above. + +## Step 1: Clone the Repository + +Ensure you're working with the latest stable release of LISA: + +```bash +git clone -b main --single-branch +cd lisa +``` + +## Step 2: Set Up Environment Variables + +Create and configure your `config-custom.yaml` file: + +```bash +cp example_config.yaml config-custom.yaml +``` + +Set the following environment variables: + +```bash +export PROFILE=my-aws-profile # Optional, can be left blank +export DEPLOYMENT_NAME=my-deployment +export ENV=dev # Options: dev, test, or prod +export CDK_DOCKER=finch # Optional, only required if not using docker as container engine +``` + +## Step 3: Set Up Python and TypeScript Environments + +Install system dependencies and set up both Python and TypeScript environments: + +```bash +# Install system dependencies +sudo apt-get update +sudo apt-get install -y jq + +# Install Python packages +pip3 install --user --upgrade pip +pip3 install yq huggingface_hub s5cmd + +# Set up Python environment +make createPythonEnvironment + +# Activate your python environment +# The command is the output from the previous make command) + +# Install Python Requirements +make installPythonRequirements + +# Set up TypeScript environment +make createTypeScriptEnvironment +make installTypeScriptRequirements +``` + +## Step 4: Configure LISA + +Edit the `config-custom.yaml` file to customize your LISA deployment. Key configurations include: + +- AWS account and region settings +- Authentication settings +- Model bucket name + +## Step 5: Stage Model Weights + +LISA requires model weights to be staged in the S3 bucket specified in your `config-custom.yaml` file, assuming the S3 bucket follows this structure: + +``` +s3:/// +s3://// +s3://// +... +s3:/// +``` + +**Example:** + +``` +s3:///mistralai/Mistral-7B-Instruct-v0.2 +s3:///mistralai/Mistral-7B-Instruct-v0.2/ +s3:///mistralai/Mistral-7B-Instruct-v0.2/ +... +``` + +To automatically download and stage the model weights defined by the `ecsModels` parameter in your `config-custom.yaml`, use the following command: + +```bash +make modelCheck +``` + +This command verifies if the model's weights are already present in your S3 bucket. If not, it downloads the weights, converts them to the required format, and uploads them to your S3 bucket. Ensure adequate disk space is available for this process. + +> **WARNING** +> As of LISA 3.0, the `ecsModels` parameter in `config-custom.yaml` is solely for staging model weights in your S3 bucket. +> Previously, before models could be managed through the [API](/admin/model-management) or via the Model Management +> section of the [Chatbot](/user/chat), this parameter also +> dictated which models were deployed. + +> **NOTE** +> For air-gapped systems, before running `make modelCheck` you should manually download model artifacts and place them in a `models` directory at the project root, using the structure: `models/`. + +> **NOTE** +> This process is primarily designed and tested for HuggingFace models. For other model formats, you will need to manually create and upload safetensors. + +## Step 6: Configure Identity Provider + +In the `config-custom.yaml` file, configure the `authConfig` block for authentication. LISA supports OpenID Connect (OIDC) providers such as AWS Cognito or Keycloak. Required fields include: + +- `authority`: URL of your identity provider +- `clientId`: Client ID for your application +- `adminGroup`: Group name for users with model management permissions +- `jwtGroupsProperty`: Path to the groups field in the JWT token +- `additionalScopes` (optional): Extra scopes for group membership information + +IDP Configuration examples using AWS Cognito and Keycloak can be found: [IDP Configuration Examples](/config/idp) + + +## Step 7: Configure LiteLLM +We utilize LiteLLM under the hood to allow LISA to respond to the [OpenAI specification](https://platform.openai.com/docs/api-reference). +For LiteLLM configuration, a key must be set up so that the system may communicate with a database for tracking all the models that are added or removed +using the [Model Management API](/admin/model-management). The key must start with `sk-` and then can be any arbitrary +string. We recommend generating a new UUID and then using that as +the key. Configuration example is below. + + +```yaml +litellmConfig: + db_key: sk-00000000-0000-0000-0000-000000000000 # needed for db operations, create your own key # pragma: allowlist-secret +``` + +## Step 8: Set Up SSL Certificates (Development Only) + +**WARNING: THIS IS FOR DEV ONLY** +When deploying for dev and testing you can use a self-signed certificate for the REST API ALB. You can create this by using the script: `gen-cert.sh` and uploading it to `IAM`. + +```bash +export REGION= +./scripts/gen-certs.sh +aws iam upload-server-certificate --server-certificate-name --certificate-body file://scripts/server.pem --private-key file://scripts/server.key +``` + +Update your `config-custom.yaml` with the certificate ARN: + +```yaml +restApiConfig: + sslCertIamArn: arn:aws:iam:::server-certificate/ +``` + +## Step 9: Customize Model Deployment + +In the `ecsModels` section of `config-custom.yaml`, allow our deployment process to pull the model weights for you. + +During the deployment process, LISA will optionally attempt to download your model weights if you specify an optional `ecsModels` +array, this will only work in non ADC regions. Specifically, see the `ecsModels` section of +the [example_config.yaml](https://github.com/awslabs/LISA/blob/develop/example_config.yaml) file. +Here we define the model name, inference container, and baseImage: + +```yaml +ecsModels: + - modelName: your-model-name + inferenceContainer: tgi + baseImage: ghcr.io/huggingface/text-generation-inference:2.0.1 +``` + +## Step 10: Bootstrap CDK (If Not Already Done) + +If you haven't bootstrapped your AWS account for CDK: + +```bash +make bootstrap +``` + +## Recommended LiteLLM Configuration Options + +While LISA is designed to be flexible, configuring external models requires careful consideration. The following guide +provides a recommended minimal setup for integrating various model types with LISA using LiteLLM. + +### Configuration Overview + +This example configuration demonstrates how to set up: +1. A SageMaker Endpoint +2. An Amazon Bedrock Model +3. A self-hosted OpenAI-compatible text generation model +4. A self-hosted OpenAI-compatible embedding model + +**Note:** Ensure that all endpoints and models are in the same AWS region as your LISA installation. + +### SageMaker Endpoints and Bedrock Models + +LISA supports adding existing SageMaker Endpoints and Bedrock Models to the LiteLLM configuration. As long as these +services are in the same region as the LISA installation, LISA can use them alongside any other deployed models. + +**To use a SageMaker Endpoint:** +1. Install LISA without initially referencing the SageMaker Endpoint. +2. Create a SageMaker Model using the private subnets of the LISA deployment. +3. This setup allows the LISA REST API container to communicate with any Endpoint using that SageMaker Model. + +**SageMaker Endpoints and Bedrock Models can be configured:** +- Statically at LISA deployment time +- Dynamically using the LISA Model Management API + +**Important:** Endpoints or Models statically defined during LISA deployment cannot be removed or updated using the +LISA Model Management API, and they will not show in the Chat UI. These will only show as part of the OpenAI `/models` API. +Although there is support for it, we recommend using the [Model Management API](/admin/model-management) instead of the +following static configuration. diff --git a/lib/docs/admin/model-management.md b/lib/docs/admin/model-management.md new file mode 100644 index 00000000..cb4b7ba9 --- /dev/null +++ b/lib/docs/admin/model-management.md @@ -0,0 +1,364 @@ + +# Model Management API Usage + +LISA provides robust API endpoints for managing models, both for users and administrators. These endpoints allow for operations such as listing, creating, updating, and deleting models. + +## API Gateway and ALB Endpoints + +LISA uses two primary APIs for model management: + +1. **User-facing OpenAI-Compatible API**: Available to all users for inference tasks and accessible through the LISA Serve ALB. This API provides an interface for querying and interacting with models deployed on Amazon ECS, Amazon Bedrock, or through LiteLLM. +2. **Admin-level Model Management API**: Available only to administrators through the API Gateway (APIGW). This API allows for full control of model lifecycle management, including creating, updating, and deleting models. + +### LiteLLM Routing in All Models + +Every model request is routed through LiteLLM, regardless of whether infrastructure (like ECS) is created for it. Whether deployed on ECS, external models via Bedrock, or managed through LiteLLM, all models are added to LiteLLM for traffic routing. The distinction is whether infrastructure is created (determined by request payloads), but LiteLLM integration is consistent for all models. The model management APIs will handle adding or removing model configurations from LiteLLM, and the LISA Serve endpoint will handle the inference requests against models available in LiteLLM. + +## User-facing OpenAI-Compatible API + +The OpenAI-compatible API is accessible through the LISA Serve ALB and allows users to list models available for inference tasks. Although not specifically part of the model management APIs, any model that is added or removed from LiteLLM via the model management API Gateway APIs will be reflected immediately upon queries to LiteLLM through the LISA Serve ALB. + +### Listing Models + +The `/v2/serve/models` endpoint on the LISA Serve ALB allows users to list all models available for inference in the LISA system. + +#### Request Example: + +```bash +curl -s -H 'Authorization: Bearer ' -X GET https:///v2/serve/models +``` + +#### Response Example: + +```json +{ + "data": [ + { + "id": "bedrock-embed-text-v2", + "object": "model", + "created": 1677610602, + "owned_by": "openai" + }, + { + "id": "titan-express-v1", + "object": "model", + "created": 1677610602, + "owned_by": "openai" + }, + { + "id": "sagemaker-amazon-mistrallite", + "object": "model", + "created": 1677610602, + "owned_by": "openai" + } + ], + "object": "list" +} +``` + +#### Explanation of Response Fields: + +These fields are all defined by the OpenAI API specification, which is documented [here](https://platform.openai.com/docs/api-reference/models/list). + +- `id`: A unique identifier for the model. +- `object`: The type of object, which is "model" in this case. +- `created`: A Unix timestamp representing when the model was created. +- `owned_by`: The entity responsible for the model, such as "openai." + +## Admin-level Model Management API + +This API is only accessible by administrators via the API Gateway and is used to create, update, and delete models. It supports full model lifecycle management. + +### Listing Models (Admin API) + +The `/models` route allows admins to list all models managed by the system. This includes models that are either creating, deleting, already active, or in a failed state. Models can be deployed via ECS or managed externally through a LiteLLM configuration. + +#### Request Example: + +```bash +curl -s -H "Authorization: Bearer " -X GET https:///models +``` + +#### Response Example: + +```json +{ + "models": [ + { + "autoScalingConfig": { + "minCapacity": 1, + "maxCapacity": 1, + "cooldown": 420, + "defaultInstanceWarmup": 180, + "metricConfig": { + "albMetricName": "RequestCountPerTarget", + "targetValue": 30, + "duration": 60, + "estimatedInstanceWarmup": 330 + } + }, + "containerConfig": { + "image": { + "baseImage": "vllm/vllm-openai:v0.5.0", + "type": "asset" + }, + "sharedMemorySize": 2048, + "healthCheckConfig": { + "command": [ + "CMD-SHELL", + "exit 0" + ], + "interval": 10, + "startPeriod": 30, + "timeout": 5, + "retries": 3 + }, + "environment": { + "MAX_TOTAL_TOKENS": "2048", + "MAX_CONCURRENT_REQUESTS": "128", + "MAX_INPUT_LENGTH": "1024" + } + }, + "loadBalancerConfig": { + "healthCheckConfig": { + "path": "/health", + "interval": 60, + "timeout": 30, + "healthyThresholdCount": 2, + "unhealthyThresholdCount": 10 + } + }, + "instanceType": "g5.xlarge", + "modelId": "mistral-vllm", + "modelName": "mistralai/Mistral-7B-Instruct-v0.2", + "modelType": "textgen", + "modelUrl": null, + "status": "Creating", + "streaming": true + }, + { + "autoScalingConfig": null, + "containerConfig": null, + "loadBalancerConfig": null, + "instanceType": null, + "modelId": "titan-express-v1", + "modelName": "bedrock/amazon.titan-text-express-v1", + "modelType": "textgen", + "modelUrl": null, + "status": "InService", + "streaming": true + } + ] +} +``` + +#### Explanation of Response Fields: + +- `modelId`: A unique identifier for the model. +- `modelName`: The name of the model, typically referencing the underlying service (Bedrock, SageMaker, etc.). +- `status`: The current state of the model, e.g., "Creating," "Active," or "Failed." +- `streaming`: Whether the model supports streaming inference. +- `instanceType` (optional): The instance type if the model is deployed via ECS. + +### Creating a Model (Admin API) + +LISA provides the `/models` endpoint for creating both ECS and LiteLLM-hosted models. Depending on the request payload, infrastructure will be created or bypassed (e.g., for LiteLLM-only models). + +This API accepts the same model definition parameters that were accepted in the V2 model definitions within the config.yaml file with one notable difference: the `containerConfig.image.path` field is +now omitted because it corresponded with the `inferenceContainer` selection. As a convenience, this path is no longer required. + +#### Request Example: + +``` +POST https:///models +``` + +#### Example Payload for ECS Model: + +```json +{ + "modelId": "mistral-vllm", + "modelName": "mistralai/Mistral-7B-Instruct-v0.2", + "modelType": "textgen", + "inferenceContainer": "vllm", + "instanceType": "g5.xlarge", + "streaming": true, + "containerConfig": { + "image": { + "baseImage": "vllm/vllm-openai:v0.5.0", + "type": "asset" + }, + "sharedMemorySize": 2048, + "environment": { + "MAX_CONCURRENT_REQUESTS": "128", + "MAX_INPUT_LENGTH": "1024", + "MAX_TOTAL_TOKENS": "2048" + }, + "healthCheckConfig": { + "command": ["CMD-SHELL", "exit 0"], + "interval": 10, + "startPeriod": 30, + "timeout": 5, + "retries": 3 + } + }, + "autoScalingConfig": { + "minCapacity": 1, + "maxCapacity": 1, + "cooldown": 420, + "defaultInstanceWarmup": 180, + "metricConfig": { + "albMetricName": "RequestCountPerTarget", + "targetValue": 30, + "duration": 60, + "estimatedInstanceWarmup": 330 + } + }, + "loadBalancerConfig": { + "healthCheckConfig": { + "path": "/health", + "interval": 60, + "timeout": 30, + "healthyThresholdCount": 2, + "unhealthyThresholdCount": 10 + } + } +} +``` + +#### Creating a LiteLLM-Only Model: + +```json +{ + "modelId": "titan-express-v1", + "modelName": "bedrock/amazon.titan-text-express-v1", + "modelType": "textgen", + "streaming": true +} +``` + +#### Explanation of Key Fields for Creation Payload: + +- `modelId`: The unique identifier for the model. This is any name you would like it to be. +- `modelName`: The name of the model as it appears in the system. For LISA-hosted models, this must be the S3 Key to your model artifacts, otherwise + this is the LiteLLM-compatible reference to a SageMaker Endpoint or Bedrock Foundation Model. Note: Bedrock and SageMaker resources must exist in the + same region as your LISA deployment. If your LISA installation is in us-east-1, then all SageMaker and Bedrock calls will also happen in us-east-1. + Configuration examples: + - LISA hosting: If your model artifacts are in `s3://${lisa_models_bucket}/path/to/model/weights`, then the `modelName` value here should be `path/to/model/weights` + - LiteLLM-only, Bedrock: If you want to use `amazon.titan-text-lite-v1`, your `modelName` value should be `bedrock/amazon.titan-text-lite-v1` + - LiteLLM-only, SageMaker: If you want to use a SageMaker Endpoint named `my-sm-endpoint`, then the `modelName` value should be `sagemaker/my-sm-endpoint`. +- `modelType`: The type of model, such as text generation (textgen). +- `streaming`: Whether the model supports streaming inference. +- `instanceType`: The type of EC2 instance to be used (only applicable for ECS models). +- `containerConfig`: Details about the Docker container, memory allocation, and environment variables. +- `autoScalingConfig`: Configuration related to ECS autoscaling. +- `loadBalancerConfig`: Health check configuration for load balancers. + +### Deleting a Model (Admin API) + +Admins can delete a model using the following endpoint. Deleting a model removes the infrastructure (ECS) or disconnects from LiteLLM. + +#### Request Example: + +``` +DELETE https:///models/{modelId} +``` + +#### Response Example: + +```json +{ + "status": "success", + "message": "Model mistral-vllm has been deleted successfully." +} +``` + +### Updating a Model + +LISA offers basic updating functionality for both LISA-hosted and LiteLLM-only models. For both types, the model type and streaming support can be updated +in the cases that the models were originally created with the wrong parameters. For example, if an embedding model was accidentally created as a `textgen` +model, the UpdateModel API can be used to set it to the intended `embedding` value. Additionally, for LISA-hosted models, users may update the AutoScaling +configuration to increase or decrease capacity usage for each model. Users may use this API to completely shut down all instances behind a model until +they want to add capacity back to the model for usage later. This feature can help users to effectively manage costs so that instances do not have to stay +running in time periods of little or no expected usage. + +The UpdateModel API has mutually exclusive payload fields to avoid conflicting requests. The API does not allow for shutting off a model at the same time +as updating its AutoScaling configuration, as these would introduce ambiguous intents. The API does not allow for setting AutoScaling limits to 0 and instead +requires the usage of the enable/disable functionality to allow models to fully scale down or turn back on. Metadata updates, such as changing the model type +or streaming compatibility, can happen in either type of update or simply by themselves. + +#### Request Example + +``` +PUT https:///models/{modelId} +``` + +#### Example Payloads + +##### Update Model Metadata + +This payload will simply update the model metadata, which will complete within seconds of invoking. If setting a model as an `embedding` model, then the +`streaming` option must be set to `false` or omitted as LISA does not support streaming with embedding models. Both the `streaming` and `modelType` options +may be included in any other update request. + +```json +{ + "streaming": true, + "modelType": "textgen" +} +``` + +##### Update AutoScaling Configuration + +This payload will update the AutoScaling configuration for minimum, maximum, and desired number of instances. The desired number must be between the +minimum or maximum numbers, inclusive, and all the numbers must be strictly greater than 0. If the model currently has less than the minimum number, then +the desired count will automatically raise to the minimum if a desired count is not specified. Despite setting a desired capacity, the model will scale down +to the minimum number over time if you are not hitting the scaling thresholds set when creating the model in the first place. + +The AutoScaling configuration **can** be updated while the model is in the Stopped state, but it won't be applied immediately. Instead, the configuration will +be saved until the model is started again, in which it will use the most recently updated AutoScaling configuration. + +The request will fail if the `autoScalingInstanceConfig` is defined at the same time as the `enabled` field. These options are mutually exclusive and must be +handled as separate operations. Any or all of the options within the `autoScalingInstanceConfig` may be set as needed, so if you only wish to change the `desiredCapacity`, +then that is the only option that you need to specify in the request object within the `autoScalingInstanceConfig`. + +```json +{ + "autoScalingInstanceConfig": { + "minCapacity": 2, + "maxCapacity": 4, + "desiredCapacity": 3 + } +} +``` + +##### Stop Model - Scale Down to 0 Instances + +This payload will stop all model EC2 instances and remove the model reference from LiteLLM so that users are unable to make inference requests against a model +with no capacity. This option is useful for users who wish to manage costs and turn off instances when the model is not currently needed but will be used again +in the future. + +The request will fail if the `enabled` field is defined at the same time as the `autoScalingInstanceConfig` field. These options are mutually exclusive and must be +handled as separate operations. + +```json +{ + "enabled": false +} +``` + +##### Start Model - Restore Previous AutoScaling Configuration + +After stopping a model, this payload will turn the model back on by spinning up instances, waiting for the expected spin-up time to allow models to initialize, and then +adding the reference back to LiteLLM so that users may query the model again. This is expected to be a much faster operation than creating the model through the CreateModel +API, so as long as the model details don't have to change, this in combination with the Stop payload will help to manage costs while still providing model availability as +quickly as the system can spin it up again. + +The request will fail if the `enabled` field is defined at the same time as the `autoScalingInstanceConfig` field. These options are mutually exclusive and must be +handled as separate operations. + +```json +{ + "enabled": true +} +``` diff --git a/lib/docs/admin/overview.md b/lib/docs/admin/overview.md new file mode 100644 index 00000000..223205ba --- /dev/null +++ b/lib/docs/admin/overview.md @@ -0,0 +1,41 @@ +# What is LISA? + +LISA is an infrastructure-as-code solution providing scalable, low latency access to customers’ generative LLMs and +embedding language models. LISA accelerates and supports customers’ GenAI experimentation and adoption, particularly in +regions where Amazon Bedrock is not available. LISA allows customers to move quickly rather than independently solve the +undifferentiated heavy lifting of hosting and inference architecture. Customers deploy LISA into a single AWS account +and integrate it with an identity provider. Customers bring their own models to LISA for self-hosting and inference +supported by Amazon Elastic Container Service (ECS). Model configuration is managed through LISA’s model management +APIs. + +As use cases and model requirements grow, customers can configure LISA with external model providers. Through OpenAI's +API spec via the LiteLLM proxy, LISA is compatible with 100+ models from various providers, including Amazon Bedrock and +Amazon Jumpstart. LISA customers can centralize communication across many model providers via LiteLLM, leveraging LISA +for model orchestration. Using LISA as a model orchestration layer allows customers to standardize integrations with +externally hosted models in a single place. Without an orchestration layer, customers must individually manage unique +API integrations with each provider. + +## Key Features + +* Self Host Models: Bring your own text generation and embedding models to LISA for hosting and inference. +* Model Orchestration: Centralize and standardize configuration with 100+ models from model providers via LiteLLM, + including Amazon Bedrock models. +* Chatbot User Interface: Through the chatbot user interface, users can prompt LLMs, receive responses, modify prompt + templates, change model arguments, and manage their session history. Administrators can control available features via + the configuration page. +* Retrieval-augmented generation (RAG): RAG reduces the need for fine-tuning, an expensive and time-consuming + undertaking, and delivers more contextually relevant outputs. LISA offers RAG through Amazon OpenSearch or + PostgreSQL’s PGVector extension on Amazon RDS. +* Non-RAG Model Context: Users can upload documents to their chat sessions to enhance responses or support use cases + like document summarization. +* Model Management: Administrators can add, remove, and update models configured with LISA through the model management + configuration page or APIs. +* OpenAI API spec: LISA can be configured with compatible tooling. For example, customers can configure LISA as the + model provider for the Continue plugin, an open-source AI code assistance for JetBrains and Visual Studio Code + integrated development environments (IDEs). This allows users to select from any LISA-configured model to support LLM + prompting directly in their IDE. +* Libraries: If your workflow includes libraries such as [LangChain](https://python.langchain.com/) or [OpenAI](https://github.com/openai/openai-python), then you + can place LISA in your + application by changing only the endpoint and headers for the client objects. +* FedRAMP: The AWS services that LISA leverages are FedRAMP High compliant. +* Ongoing Releases: We offer on-going release with new functionality. LISA’s roadmap is customer driven. diff --git a/lib/docs/admin/ui-configuration.md b/lib/docs/admin/ui-configuration.md new file mode 100644 index 00000000..9c427086 --- /dev/null +++ b/lib/docs/admin/ui-configuration.md @@ -0,0 +1,32 @@ + +# Chat UI Configuration + +The release of LISA 3.2.0 introduces enhanced administrative controls for the Chat UI, allowing for granular customization of user interfaces. System administrators now have the ability to activate, deactivate, or configure specific components for all users through the application's configuration panel. + +The following features can be managed: + +1. Session History Management + - Activate or deactivate the option to delete session history + +2. Message Information + - Control visibility of message metadata + +3. Chat Parameters + - Configure chat Kwargs + - Customize prompt templates + - Adjust chat history buffer settings + +4. Retrieval-Augmented Generation (RAG) Settings + - Modify the number of RAG documents to be included in the retrieval process (TopK) + - Activate or deactivate RAG document uploads + +5. Contextual Document Management + - Control the ability to upload in-context documents + +6. System Banner Customization + - Toggle banner visibility + - Edit banner text + - Customize text color + - Adjust background color + +These new configuration options provide administrators with greater flexibility in tailoring the Chat UI to organizational needs, enhancing both security and user experience across the platform. diff --git a/lib/docs/assets/LisaArchitecture.png b/lib/docs/assets/LisaArchitecture.png new file mode 100644 index 00000000..884e275b Binary files /dev/null and b/lib/docs/assets/LisaArchitecture.png differ diff --git a/lib/docs/assets/LisaChat.png b/lib/docs/assets/LisaChat.png new file mode 100644 index 00000000..f9d958e0 Binary files /dev/null and b/lib/docs/assets/LisaChat.png differ diff --git a/lib/docs/assets/LisaModelManagement.png b/lib/docs/assets/LisaModelManagement.png new file mode 100644 index 00000000..61d47c44 Binary files /dev/null and b/lib/docs/assets/LisaModelManagement.png differ diff --git a/lib/docs/assets/LisaServe.png b/lib/docs/assets/LisaServe.png new file mode 100644 index 00000000..22365e94 Binary files /dev/null and b/lib/docs/assets/LisaServe.png differ diff --git a/lib/docs/config/configuration.md b/lib/docs/config/configuration.md new file mode 100644 index 00000000..520e9ab3 --- /dev/null +++ b/lib/docs/config/configuration.md @@ -0,0 +1,19 @@ +# Minimal Configuration + +Configurations for LISA are split into 2 configuration files, base and custom. The base configuration contains the +recommended properties that can be overridden with the custom properties file. The custom configuration should contain +the minimal properties required to deploy LISA, and any optional properties or overrides. This file should be created +at the root of your project (./config-custom.yaml) and needs to contain the following properties: + +```yaml +accountNumber: +region: +s3BucketModels: +authConfig: + authority: + clientId: + adminGroup: + jwtGroupsProperty: +``` + + diff --git a/lib/docs/config/idp.md b/lib/docs/config/idp.md new file mode 100644 index 00000000..b0771014 --- /dev/null +++ b/lib/docs/config/idp.md @@ -0,0 +1,35 @@ +# IDP Configuration Examples + +## AWS Cognito Example: + +In Cognito, the `authority` will be the URL to your User Pool. As an example, if your User Pool ID, not the name, is +`us-east-1_example`, and if it is +running in `us-east-1`, then the URL to put in the `authority` field would be +`https://cognito-idp.us-east-1.amazonaws.com/us-east-1_example`. The `clientId` +can be found in your User Pool's "App integration" tab from within the AWS Management Console, and at the bottom of the +page, you will see the list of clients +and their associated Client IDs. The ID here is what we will need for the `clientId` field. + +```yaml +authConfig: + authority: https://cognito-idp.us-east-1.amazonaws.com/us-east-1_example + clientId: your-client-id + adminGroup: AdminGroup + jwtGroupsProperty: cognito:groups +``` + +## Keycloak Example: + +In Keycloak, the `authority` will be the URL to your Keycloak server. The `clientId` is likely not a random string like +in the Cognito clients, and instead +will be a string configured by your Keycloak administrator. Your administrator will be able to give you a client name or +create a client for you to use for +this application. Once you have this string, use that as the `clientId` within the `authConfig` block. + +```yaml +authConfig: + authority: https://your-keycloak-server.com + clientId: your-client-name + adminGroup: AdminGroup + jwtGroupsProperty: realm_access.roles +``` diff --git a/lib/docs/config/lite-llm.md b/lib/docs/config/lite-llm.md new file mode 100644 index 00000000..46409041 --- /dev/null +++ b/lib/docs/config/lite-llm.md @@ -0,0 +1 @@ +# TODO diff --git a/lib/docs/config/model-compatibility.md b/lib/docs/config/model-compatibility.md new file mode 100644 index 00000000..8f7ea9b4 --- /dev/null +++ b/lib/docs/config/model-compatibility.md @@ -0,0 +1,30 @@ +# Model Compatibility + +### HuggingFace Generation Models + +For generation models, or causal language models, LISA supports models that are supported by the underlying serving container, TGI. TGI divides compatibility into two categories: optimized models and best effort supported models. The list of optimized models is found [here](https://huggingface.co/docs/text-generation-inference/supported_models). The best effort uses the `transformers` codebase under-the-hood and so should work for most causal models on HuggingFace: + +```python +AutoModelForCausalLM.from_pretrained(, device_map="auto") +``` + +or + +```python +AutoModelForSeq2SeqLM.from_pretrained(, device_map="auto") +``` + +### HuggingFace Embedding Models + +Embedding models often utilize custom codebases and are not as uniform as generation models. For this reason you will +likely need to create a new `inferenceContainer`. Follow +the [example](https://github.com/awslabs/LISA/blob/develop/lib/serve/ecs-model/embedding/instructor) provided for the +`instructor` model. + +### vLLM Models + +In addition to the support we have for the TGI and TEI containers, we support hosting models using the [vLLM container](https://docs.vllm.ai/en/latest/). vLLM abides by the OpenAI specification, and as such allows both text generation and embedding on the models that vLLM supports. +See the [deployment](/admin/deploy) section for details on how to set up the vLLM container for your models. Similar to +how the HuggingFace containers will serve safetensor weights downloaded from the +HuggingFace website, vLLM will do the same, and our configuration will allow you to serve these artifacts automatically. vLLM does not have many supported models for embeddings, but as they become available, +LISA will support them as long as the vLLM container version is updated in the config.yaml file and as long as the model's safetensors can be found in S3. diff --git a/lib/docs/config/vector-stores.md b/lib/docs/config/vector-stores.md new file mode 100644 index 00000000..46409041 --- /dev/null +++ b/lib/docs/config/vector-stores.md @@ -0,0 +1 @@ +# TODO diff --git a/lib/docs/index.md b/lib/docs/index.md new file mode 100644 index 00000000..bccc62ce --- /dev/null +++ b/lib/docs/index.md @@ -0,0 +1,31 @@ +--- +# https://vitepress.dev/reference/default-theme-home-page +layout: home + +hero: + name: "LISA Documentation" + text: "LLM Inference Solution for Amazon Dedicated Cloud (LISA)" + actions: + - theme: brand + text: Getting Started + link: /admin/getting-started + +features: + - title: Authentication and Authorization + details: via AWS Cognito or OpenID Connect (OIDC) providers, ensuring secure access to both the REST API and Chat UI through token-based authentication and role-based access control. + - title: Model Hosting + details: on AWS ECS with autoscaling and efficient traffic management using Application Load Balancers (ALBs), providing scalable and high-performance model inference. + - title: Model Management + details: using AWS Step Functions to orchestrate complex workflows for creating, updating, and deleting models, automatically managing underlying ECS infrastructure. + - title: Inference Requests + details: served via both the REST API and the Chat UI, dynamically routing user inputs to the appropriate ECS-hosted models for real-time inference. + - title: Chat Interface + details: enabling users to interact with LISA through a user-friendly web interface, offering seamless real-time model interaction and session continuity. + - title: Retrieval-Augmented Generation (RAG) Operations + details: leveraging either OpenSearch or PGVector for efficient retrieval of relevant external data to enhance model responses. +--- + +### License Notice + +Although this repository is released under the Apache 2.0 license, when configured to use PGVector as a RAG store it uses +the third party `psycopg2-binary` library. The `psycopg2-binary` project's licensing includes the [LGPL with exceptions](https://github.com/psycopg/psycopg2/blob/master/LICENSE) license. diff --git a/lib/docs/index.ts b/lib/docs/index.ts new file mode 100644 index 00000000..4baab261 --- /dev/null +++ b/lib/docs/index.ts @@ -0,0 +1,178 @@ +/** + Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"). + You may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ + +import * as path from 'node:path'; +import * as fs from 'node:fs'; + +import { CfnOutput, RemovalPolicy, Stack, StackProps } from 'aws-cdk-lib'; +import { AwsIntegration, EndpointType, RestApi } from 'aws-cdk-lib/aws-apigateway'; +import { Role, ServicePrincipal } from 'aws-cdk-lib/aws-iam'; +import { BlockPublicAccess, Bucket, BucketEncryption } from 'aws-cdk-lib/aws-s3'; +import { BucketDeployment, Source } from 'aws-cdk-lib/aws-s3-deployment'; +import { Construct } from 'constructs'; +import { BaseProps } from '../schema'; + +/** + * Properties for DocsStack Construct. + */ +type DocsProps = {} & BaseProps & StackProps; + +/** + * User Interface Construct. + */ +export class LisaDocsStack extends Stack { + + /** + * @param {Construct} scope - The parent or owner of the construct. + * @param {string} id - The unique identifier for the construct within its scope. + * @param {DocsProps} props - The properties of the construct. + */ + constructor (scope: Construct, id: string, props: DocsProps) { + super(scope, id, props); + const { config } = props; + + // Create Docs S3 bucket + const docsBucket = new Bucket(this, 'DocsBucket', { + removalPolicy: RemovalPolicy.DESTROY, + autoDeleteObjects: true, + encryption: BucketEncryption.S3_MANAGED, + enforceSSL: true, + blockPublicAccess: BlockPublicAccess.BLOCK_ALL, + websiteIndexDocument: 'index.html', + websiteErrorDocument: '404.html', + }); + + // Ensure dist folder is created (for tests) + const docsPath = path.join(__dirname, 'dist'); + if (!fs.existsSync(docsPath)) { + fs.mkdirSync(docsPath); + } + // Deploy local folder to S3 + new BucketDeployment(this, 'DeployDocsWebsite', { + sources: [Source.asset(docsPath)], + destinationBucket: docsBucket, + }); + + // REST API GW S3 role + const apiGatewayRole = new Role(this, `${Stack.of(this).stackName}-s3-reader-role`, { + assumedBy: new ServicePrincipal('apigateway.amazonaws.com'), + description: 'Allows API gateway to proxy static website assets', + }); + docsBucket.grantRead(apiGatewayRole); + + // Create API Gateway + const api = new RestApi(this, 'DocsApi', { + description: 'API Gateway for S3 hosted website', + endpointConfiguration: { types: [EndpointType.REGIONAL] }, + deployOptions: { + stageName: 'LISA', + }, + binaryMediaTypes: ['*/*'], + }); + + const defaultIntegration = new AwsIntegration({ + service: 's3', + region: config.region, + integrationHttpMethod: 'GET', + path: `${docsBucket.bucketName}/index.html`, + options: { + credentialsRole: apiGatewayRole, + integrationResponses: [{ + statusCode: '200', + responseParameters: { + 'method.response.header.Content-Type': '\'text/html\'', + }, + }], + }, + }); + + // Create API Gateway integration with S3 + const s3Integration = new AwsIntegration({ + service: 's3', + region: config.region, + integrationHttpMethod: 'GET', + path: `${docsBucket.bucketName}/{key}`, + options: { + credentialsRole: apiGatewayRole, + requestParameters: { + 'integration.request.path.key': 'method.request.path.key', + }, + integrationResponses: [ + { + statusCode: '200', + responseParameters: { + 'method.response.header.Content-Type': 'integration.response.header.Content-Type', + 'method.response.header.Content-Disposition': 'integration.response.header.Content-Disposition', + 'method.response.header.Content-Length': 'integration.response.header.Content-Length', + }, + }, + { + selectionPattern: '403', + statusCode: '404', + responseParameters: { + 'method.response.header.Content-Type': '\'text/html\'', + }, + responseTemplates: { + 'text/html': `#set($context.responseOverride.header.Content-Type = 'text/html') + #set($context.responseOverride.status = 404) + #set($context.responseOverride.header.Location = "$context.domainName/404.html")`, + }, + }, + ], + }, + }); + + // Add GET method to API Gateway + api.root.addMethod('GET', defaultIntegration, { + methodResponses: [ + { + statusCode: '200', + responseParameters: { + 'method.response.header.Content-Type': true, + }, + }, + ], + }); + + api.root.addResource('{key+}').addMethod('GET', s3Integration, { + requestParameters: { + 'method.request.path.key': true, + }, + methodResponses: [ + { + statusCode: '200', + responseParameters: { + 'method.response.header.Content-Type': true, + 'method.response.header.Content-Disposition': true, + 'method.response.header.Content-Length': true, + }, + }, + { + statusCode: '404', + responseParameters: { + 'method.response.header.Content-Type': true, + }, + }, + ], + }); + + // Output the API Gateway URL + new CfnOutput(this, 'DocsApiGatewayUrl', { + value: api.url, + description: 'API Gateway URL', + }); + } +} diff --git a/lib/docs/package-lock.json b/lib/docs/package-lock.json new file mode 100644 index 00000000..587c5e5a --- /dev/null +++ b/lib/docs/package-lock.json @@ -0,0 +1,2255 @@ +{ + "name": "lisa-docs", + "version": "1.0.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "lisa-docs", + "version": "1.0.0", + "license": "Apache-2.0", + "devDependencies": { + "vitepress": "^1.4.3" + } + }, + "node_modules/@algolia/autocomplete-core": { + "version": "1.9.3", + "resolved": "https://registry.npmjs.org/@algolia/autocomplete-core/-/autocomplete-core-1.9.3.tgz", + "integrity": "sha512-009HdfugtGCdC4JdXUbVJClA0q0zh24yyePn+KUGk3rP7j8FEe/m5Yo/z65gn6nP/cM39PxpzqKrL7A6fP6PPw==", + "dev": true, + "dependencies": { + "@algolia/autocomplete-plugin-algolia-insights": "1.9.3", + "@algolia/autocomplete-shared": "1.9.3" + } + }, + "node_modules/@algolia/autocomplete-plugin-algolia-insights": { + "version": "1.9.3", + "resolved": "https://registry.npmjs.org/@algolia/autocomplete-plugin-algolia-insights/-/autocomplete-plugin-algolia-insights-1.9.3.tgz", + "integrity": "sha512-a/yTUkcO/Vyy+JffmAnTWbr4/90cLzw+CC3bRbhnULr/EM0fGNvM13oQQ14f2moLMcVDyAx/leczLlAOovhSZg==", + "dev": true, + "dependencies": { + "@algolia/autocomplete-shared": "1.9.3" + }, + "peerDependencies": { + "search-insights": ">= 1 < 3" + } + }, + "node_modules/@algolia/autocomplete-preset-algolia": { + "version": "1.17.6", + "resolved": "https://registry.npmjs.org/@algolia/autocomplete-preset-algolia/-/autocomplete-preset-algolia-1.17.6.tgz", + "integrity": "sha512-Cvg5JENdSCMuClwhJ1ON1/jSuojaYMiUW2KePm18IkdCzPJj/NXojaOxw58RFtQFpJgfVW8h2E8mEoDtLlMdeA==", + "dev": true, + "dependencies": { + "@algolia/autocomplete-shared": "1.17.6" + }, + "peerDependencies": { + "@algolia/client-search": ">= 4.9.1 < 6", + "algoliasearch": ">= 4.9.1 < 6" + } + }, + "node_modules/@algolia/autocomplete-preset-algolia/node_modules/@algolia/autocomplete-shared": { + "version": "1.17.6", + "resolved": "https://registry.npmjs.org/@algolia/autocomplete-shared/-/autocomplete-shared-1.17.6.tgz", + "integrity": "sha512-aq/3V9E00Tw2GC/PqgyPGXtqJUlVc17v4cn1EUhSc+O/4zd04Uwb3UmPm8KDaYQQOrkt1lwvCj2vG2wRE5IKhw==", + "dev": true, + "peerDependencies": { + "@algolia/client-search": ">= 4.9.1 < 6", + "algoliasearch": ">= 4.9.1 < 6" + } + }, + "node_modules/@algolia/autocomplete-shared": { + "version": "1.9.3", + "resolved": "https://registry.npmjs.org/@algolia/autocomplete-shared/-/autocomplete-shared-1.9.3.tgz", + "integrity": "sha512-Wnm9E4Ye6Rl6sTTqjoymD+l8DjSTHsHboVRYrKgEt8Q7UHm9nYbqhN/i0fhUYA3OAEH7WA8x3jfpnmJm3rKvaQ==", + "dev": true, + "peerDependencies": { + "@algolia/client-search": ">= 4.9.1 < 6", + "algoliasearch": ">= 4.9.1 < 6" + } + }, + "node_modules/@algolia/client-abtesting": { + "version": "5.12.0", + "resolved": "https://registry.npmjs.org/@algolia/client-abtesting/-/client-abtesting-5.12.0.tgz", + "integrity": "sha512-hx4eVydkm3yrFCFxmcBtSzI/ykt0cZ6sDWch+v3JTgKpD2WtosMJU3Upv1AjQ4B6COSHCOWEX3vfFxW6OoH6aA==", + "dev": true, + "dependencies": { + "@algolia/client-common": "5.12.0", + "@algolia/requester-browser-xhr": "5.12.0", + "@algolia/requester-fetch": "5.12.0", + "@algolia/requester-node-http": "5.12.0" + }, + "engines": { + "node": ">= 14.0.0" + } + }, + "node_modules/@algolia/client-analytics": { + "version": "5.12.0", + "resolved": "https://registry.npmjs.org/@algolia/client-analytics/-/client-analytics-5.12.0.tgz", + "integrity": "sha512-EpTsSv6IW8maCfXCDIptgT7+mQJj7pImEkcNUnxR8yUKAHzTogTXv9yGm2WXOZFVuwstd2i0sImhQ1Vz8RH/hA==", + "dev": true, + "dependencies": { + "@algolia/client-common": "5.12.0", + "@algolia/requester-browser-xhr": "5.12.0", + "@algolia/requester-fetch": "5.12.0", + "@algolia/requester-node-http": "5.12.0" + }, + "engines": { + "node": ">= 14.0.0" + } + }, + "node_modules/@algolia/client-common": { + "version": "5.12.0", + "resolved": "https://registry.npmjs.org/@algolia/client-common/-/client-common-5.12.0.tgz", + "integrity": "sha512-od3WmO8qxyfNhKc+K3D17tvun3IMs/xMNmxCG9MiElAkYVbPPTRUYMkRneCpmJyQI0hNx2/EA4kZgzVfQjO86Q==", + "dev": true, + "engines": { + "node": ">= 14.0.0" + } + }, + "node_modules/@algolia/client-insights": { + "version": "5.12.0", + "resolved": "https://registry.npmjs.org/@algolia/client-insights/-/client-insights-5.12.0.tgz", + "integrity": "sha512-8alajmsYUd+7vfX5lpRNdxqv3Xx9clIHLUItyQK0Z6gwGMbVEFe6YYhgDtwslMAP0y6b0WeJEIZJMLgT7VYpRw==", + "dev": true, + "dependencies": { + "@algolia/client-common": "5.12.0", + "@algolia/requester-browser-xhr": "5.12.0", + "@algolia/requester-fetch": "5.12.0", + "@algolia/requester-node-http": "5.12.0" + }, + "engines": { + "node": ">= 14.0.0" + } + }, + "node_modules/@algolia/client-personalization": { + "version": "5.12.0", + "resolved": "https://registry.npmjs.org/@algolia/client-personalization/-/client-personalization-5.12.0.tgz", + "integrity": "sha512-bUV9HtfkTBgpoVhxFrMkmVPG03ZN1Rtn51kiaEtukucdk3ggjR9Qu1YUfRSU2lFgxr9qJc8lTxwfvhjCeJRcqw==", + "dev": true, + "dependencies": { + "@algolia/client-common": "5.12.0", + "@algolia/requester-browser-xhr": "5.12.0", + "@algolia/requester-fetch": "5.12.0", + "@algolia/requester-node-http": "5.12.0" + }, + "engines": { + "node": ">= 14.0.0" + } + }, + "node_modules/@algolia/client-query-suggestions": { + "version": "5.12.0", + "resolved": "https://registry.npmjs.org/@algolia/client-query-suggestions/-/client-query-suggestions-5.12.0.tgz", + "integrity": "sha512-Q5CszzGWfxbIDs9DJ/QJsL7bP6h+lJMg27KxieEnI9KGCu0Jt5iFA3GkREkgRZxRdzlHbZKkrIzhtHVbSHw/rg==", + "dev": true, + "dependencies": { + "@algolia/client-common": "5.12.0", + "@algolia/requester-browser-xhr": "5.12.0", + "@algolia/requester-fetch": "5.12.0", + "@algolia/requester-node-http": "5.12.0" + }, + "engines": { + "node": ">= 14.0.0" + } + }, + "node_modules/@algolia/client-search": { + "version": "5.12.0", + "resolved": "https://registry.npmjs.org/@algolia/client-search/-/client-search-5.12.0.tgz", + "integrity": "sha512-R3qzEytgVLHOGNri+bpta6NtTt7YtkvUe/QBcAmMDjW4Jk1P0eBYIPfvnzIPbINRsLxIq9fZs9uAYBgsrts4Zg==", + "dev": true, + "dependencies": { + "@algolia/client-common": "5.12.0", + "@algolia/requester-browser-xhr": "5.12.0", + "@algolia/requester-fetch": "5.12.0", + "@algolia/requester-node-http": "5.12.0" + }, + "engines": { + "node": ">= 14.0.0" + } + }, + "node_modules/@algolia/ingestion": { + "version": "1.12.0", + "resolved": "https://registry.npmjs.org/@algolia/ingestion/-/ingestion-1.12.0.tgz", + "integrity": "sha512-zpHo6qhR22tL8FsdSI4DvEraPDi/019HmMrCFB/TUX98yzh5ooAU7sNW0qPL1I7+S++VbBmNzJOEU9VI8tEC8A==", + "dev": true, + "dependencies": { + "@algolia/client-common": "5.12.0", + "@algolia/requester-browser-xhr": "5.12.0", + "@algolia/requester-fetch": "5.12.0", + "@algolia/requester-node-http": "5.12.0" + }, + "engines": { + "node": ">= 14.0.0" + } + }, + "node_modules/@algolia/monitoring": { + "version": "1.12.0", + "resolved": "https://registry.npmjs.org/@algolia/monitoring/-/monitoring-1.12.0.tgz", + "integrity": "sha512-i2AJZED/zf4uhxezAJUhMKoL5QoepCBp2ynOYol0N76+TSoohaMADdPnWCqOULF4RzOwrG8wWynAwBlXsAI1RQ==", + "dev": true, + "dependencies": { + "@algolia/client-common": "5.12.0", + "@algolia/requester-browser-xhr": "5.12.0", + "@algolia/requester-fetch": "5.12.0", + "@algolia/requester-node-http": "5.12.0" + }, + "engines": { + "node": ">= 14.0.0" + } + }, + "node_modules/@algolia/recommend": { + "version": "5.12.0", + "resolved": "https://registry.npmjs.org/@algolia/recommend/-/recommend-5.12.0.tgz", + "integrity": "sha512-0jmZyKvYnB/Bj5c7WKsKedOUjnr0UtXm0LVFUdQrxXfqOqvWv9n6Vpr65UjdYG4Q49kRQxhlwtal9WJYrYymXg==", + "dev": true, + "dependencies": { + "@algolia/client-common": "5.12.0", + "@algolia/requester-browser-xhr": "5.12.0", + "@algolia/requester-fetch": "5.12.0", + "@algolia/requester-node-http": "5.12.0" + }, + "engines": { + "node": ">= 14.0.0" + } + }, + "node_modules/@algolia/requester-browser-xhr": { + "version": "5.12.0", + "resolved": "https://registry.npmjs.org/@algolia/requester-browser-xhr/-/requester-browser-xhr-5.12.0.tgz", + "integrity": "sha512-KxwleraFuVoEGCoeW6Y1RAEbgBMS7SavqeyzWdtkJc6mXeCOJXn1iZitb8Tyn2FcpMNUKlSm0adrUTt7G47+Ow==", + "dev": true, + "dependencies": { + "@algolia/client-common": "5.12.0" + }, + "engines": { + "node": ">= 14.0.0" + } + }, + "node_modules/@algolia/requester-fetch": { + "version": "5.12.0", + "resolved": "https://registry.npmjs.org/@algolia/requester-fetch/-/requester-fetch-5.12.0.tgz", + "integrity": "sha512-FuDZXUGU1pAg2HCnrt8+q1VGHKChV/LhvjvZlLOT7e56GJie6p+EuLu4/hMKPOVuQQ8XXtrTHKIU3Lw+7O5/bQ==", + "dev": true, + "dependencies": { + "@algolia/client-common": "5.12.0" + }, + "engines": { + "node": ">= 14.0.0" + } + }, + "node_modules/@algolia/requester-node-http": { + "version": "5.12.0", + "resolved": "https://registry.npmjs.org/@algolia/requester-node-http/-/requester-node-http-5.12.0.tgz", + "integrity": "sha512-ncDDY7CxZhMs6LIoPl+vHFQceIBhYPY5EfuGF1V7beO0U38xfsCYEyutEFB2kRzf4D9Gqppn3iWX71sNtrKcuw==", + "dev": true, + "dependencies": { + "@algolia/client-common": "5.12.0" + }, + "engines": { + "node": ">= 14.0.0" + } + }, + "node_modules/@babel/helper-string-parser": { + "version": "7.25.9", + "resolved": "https://registry.npmjs.org/@babel/helper-string-parser/-/helper-string-parser-7.25.9.tgz", + "integrity": "sha512-4A/SCr/2KLd5jrtOMFzaKjVtAei3+2r/NChoBNoZ3EyP/+GlhoaEGoWOZUmFmoITP7zOJyHIMm+DYRd8o3PvHA==", + "dev": true, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-validator-identifier": { + "version": "7.25.9", + "resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.25.9.tgz", + "integrity": "sha512-Ed61U6XJc3CVRfkERJWDz4dJwKe7iLmmJsbOGu9wSloNSFttHV0I8g6UAgb7qnK5ly5bGLPd4oXZlxCdANBOWQ==", + "dev": true, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/parser": { + "version": "7.26.2", + "resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.26.2.tgz", + "integrity": "sha512-DWMCZH9WA4Maitz2q21SRKHo9QXZxkDsbNZoVD62gusNtNBBqDg9i7uOhASfTfIGNzW+O+r7+jAlM8dwphcJKQ==", + "dev": true, + "dependencies": { + "@babel/types": "^7.26.0" + }, + "bin": { + "parser": "bin/babel-parser.js" + }, + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/@babel/types": { + "version": "7.26.0", + "resolved": "https://registry.npmjs.org/@babel/types/-/types-7.26.0.tgz", + "integrity": "sha512-Z/yiTPj+lDVnF7lWeKCIJzaIkI0vYO87dMpZ4bg4TDrFe4XXLFWL1TbXU27gBP3QccxV9mZICCrnjnYlJjXHOA==", + "dev": true, + "dependencies": { + "@babel/helper-string-parser": "^7.25.9", + "@babel/helper-validator-identifier": "^7.25.9" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@docsearch/css": { + "version": "3.6.3", + "resolved": "https://registry.npmjs.org/@docsearch/css/-/css-3.6.3.tgz", + "integrity": "sha512-3uvbg8E7rhqE1C4oBAK3tGlS2qfhi9zpfZgH/yjDPF73vd9B41urVIKujF4rczcF4E3qs34SedhehiDJ4UdNBA==", + "dev": true + }, + "node_modules/@docsearch/js": { + "version": "3.6.3", + "resolved": "https://registry.npmjs.org/@docsearch/js/-/js-3.6.3.tgz", + "integrity": "sha512-2mBFomaN6VijyQQGwieERDu9GeE0hlv9TQRZBTOYsPQW7/vqtd4hnHEkbBbaBRiS4PYcy+UhikbMuDExJs63UA==", + "dev": true, + "dependencies": { + "@docsearch/react": "3.6.3", + "preact": "^10.0.0" + } + }, + "node_modules/@docsearch/react": { + "version": "3.6.3", + "resolved": "https://registry.npmjs.org/@docsearch/react/-/react-3.6.3.tgz", + "integrity": "sha512-2munr4uBuZq1PG+Ge+F+ldIdxb3Wi8OmEIv2tQQb4RvEvvph+xtQkxwHzVIEnt5s+HecwucuXwB+3JhcZboFLg==", + "dev": true, + "dependencies": { + "@algolia/autocomplete-core": "1.9.3", + "@algolia/autocomplete-preset-algolia": "1.17.6", + "@docsearch/css": "3.6.3", + "algoliasearch": "^5.11.0" + }, + "peerDependencies": { + "@types/react": ">= 16.8.0 < 19.0.0", + "react": ">= 16.8.0 < 19.0.0", + "react-dom": ">= 16.8.0 < 19.0.0", + "search-insights": ">= 1 < 3" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "react": { + "optional": true + }, + "react-dom": { + "optional": true + }, + "search-insights": { + "optional": true + } + } + }, + "node_modules/@esbuild/aix-ppc64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.21.5.tgz", + "integrity": "sha512-1SDgH6ZSPTlggy1yI6+Dbkiz8xzpHJEVAlF/AM1tHPLsf5STom9rwtjE4hKAF20FfXXNTFqEYXyJNWh1GiZedQ==", + "cpu": [ + "ppc64" + ], + "dev": true, + "optional": true, + "os": [ + "aix" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/android-arm": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.21.5.tgz", + "integrity": "sha512-vCPvzSjpPHEi1siZdlvAlsPxXl7WbOVUBBAowWug4rJHb68Ox8KualB+1ocNvT5fjv6wpkX6o/iEpbDrf68zcg==", + "cpu": [ + "arm" + ], + "dev": true, + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/android-arm64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.21.5.tgz", + "integrity": "sha512-c0uX9VAUBQ7dTDCjq+wdyGLowMdtR/GoC2U5IYk/7D1H1JYC0qseD7+11iMP2mRLN9RcCMRcjC4YMclCzGwS/A==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/android-x64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.21.5.tgz", + "integrity": "sha512-D7aPRUUNHRBwHxzxRvp856rjUHRFW1SdQATKXH2hqA0kAZb1hKmi02OpYRacl0TxIGz/ZmXWlbZgjwWYaCakTA==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/darwin-arm64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.21.5.tgz", + "integrity": "sha512-DwqXqZyuk5AiWWf3UfLiRDJ5EDd49zg6O9wclZ7kUMv2WRFr4HKjXp/5t8JZ11QbQfUS6/cRCKGwYhtNAY88kQ==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/darwin-x64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.21.5.tgz", + "integrity": "sha512-se/JjF8NlmKVG4kNIuyWMV/22ZaerB+qaSi5MdrXtd6R08kvs2qCN4C09miupktDitvh8jRFflwGFBQcxZRjbw==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/freebsd-arm64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.21.5.tgz", + "integrity": "sha512-5JcRxxRDUJLX8JXp/wcBCy3pENnCgBR9bN6JsY4OmhfUtIHe3ZW0mawA7+RDAcMLrMIZaf03NlQiX9DGyB8h4g==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/freebsd-x64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.21.5.tgz", + "integrity": "sha512-J95kNBj1zkbMXtHVH29bBriQygMXqoVQOQYA+ISs0/2l3T9/kj42ow2mpqerRBxDJnmkUDCaQT/dfNXWX/ZZCQ==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-arm": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.21.5.tgz", + "integrity": "sha512-bPb5AHZtbeNGjCKVZ9UGqGwo8EUu4cLq68E95A53KlxAPRmUyYv2D6F0uUI65XisGOL1hBP5mTronbgo+0bFcA==", + "cpu": [ + "arm" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-arm64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.21.5.tgz", + "integrity": "sha512-ibKvmyYzKsBeX8d8I7MH/TMfWDXBF3db4qM6sy+7re0YXya+K1cem3on9XgdT2EQGMu4hQyZhan7TeQ8XkGp4Q==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-ia32": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.21.5.tgz", + "integrity": "sha512-YvjXDqLRqPDl2dvRODYmmhz4rPeVKYvppfGYKSNGdyZkA01046pLWyRKKI3ax8fbJoK5QbxblURkwK/MWY18Tg==", + "cpu": [ + "ia32" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-loong64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.21.5.tgz", + "integrity": "sha512-uHf1BmMG8qEvzdrzAqg2SIG/02+4/DHB6a9Kbya0XDvwDEKCoC8ZRWI5JJvNdUjtciBGFQ5PuBlpEOXQj+JQSg==", + "cpu": [ + "loong64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-mips64el": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.21.5.tgz", + "integrity": "sha512-IajOmO+KJK23bj52dFSNCMsz1QP1DqM6cwLUv3W1QwyxkyIWecfafnI555fvSGqEKwjMXVLokcV5ygHW5b3Jbg==", + "cpu": [ + "mips64el" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-ppc64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.21.5.tgz", + "integrity": "sha512-1hHV/Z4OEfMwpLO8rp7CvlhBDnjsC3CttJXIhBi+5Aj5r+MBvy4egg7wCbe//hSsT+RvDAG7s81tAvpL2XAE4w==", + "cpu": [ + "ppc64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-riscv64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.21.5.tgz", + "integrity": "sha512-2HdXDMd9GMgTGrPWnJzP2ALSokE/0O5HhTUvWIbD3YdjME8JwvSCnNGBnTThKGEB91OZhzrJ4qIIxk/SBmyDDA==", + "cpu": [ + "riscv64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-s390x": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.21.5.tgz", + "integrity": "sha512-zus5sxzqBJD3eXxwvjN1yQkRepANgxE9lgOW2qLnmr8ikMTphkjgXu1HR01K4FJg8h1kEEDAqDcZQtbrRnB41A==", + "cpu": [ + "s390x" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-x64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.21.5.tgz", + "integrity": "sha512-1rYdTpyv03iycF1+BhzrzQJCdOuAOtaqHTWJZCWvijKD2N5Xu0TtVC8/+1faWqcP9iBCWOmjmhoH94dH82BxPQ==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/netbsd-x64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.21.5.tgz", + "integrity": "sha512-Woi2MXzXjMULccIwMnLciyZH4nCIMpWQAs049KEeMvOcNADVxo0UBIQPfSmxB3CWKedngg7sWZdLvLczpe0tLg==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "netbsd" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/openbsd-x64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.21.5.tgz", + "integrity": "sha512-HLNNw99xsvx12lFBUwoT8EVCsSvRNDVxNpjZ7bPn947b8gJPzeHWyNVhFsaerc0n3TsbOINvRP2byTZ5LKezow==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "openbsd" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/sunos-x64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.21.5.tgz", + "integrity": "sha512-6+gjmFpfy0BHU5Tpptkuh8+uw3mnrvgs+dSPQXQOv3ekbordwnzTVEb4qnIvQcYXq6gzkyTnoZ9dZG+D4garKg==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "sunos" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/win32-arm64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.21.5.tgz", + "integrity": "sha512-Z0gOTd75VvXqyq7nsl93zwahcTROgqvuAcYDUr+vOv8uHhNSKROyU961kgtCD1e95IqPKSQKH7tBTslnS3tA8A==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/win32-ia32": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.21.5.tgz", + "integrity": "sha512-SWXFF1CL2RVNMaVs+BBClwtfZSvDgtL//G/smwAc5oVK/UPu2Gu9tIaRgFmYFFKrmg3SyAjSrElf0TiJ1v8fYA==", + "cpu": [ + "ia32" + ], + "dev": true, + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/win32-x64": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.21.5.tgz", + "integrity": "sha512-tQd/1efJuzPC6rCFwEvLtci/xNFcTZknmXs98FYDfGE4wP9ClFV98nyKrzJKVPMhdDnjzLhdUyMX4PsQAPjwIw==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@jridgewell/sourcemap-codec": { + "version": "1.5.0", + "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.0.tgz", + "integrity": "sha512-gv3ZRaISU3fjPAgNsriBRqGWQL6quFx04YMPW/zD8XMLsU32mhCCbfbO6KZFLjvYpCZ8zyDEgqsgf+PwPaM7GQ==", + "dev": true + }, + "node_modules/@rollup/rollup-android-arm-eabi": { + "version": "4.24.3", + "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.24.3.tgz", + "integrity": "sha512-ufb2CH2KfBWPJok95frEZZ82LtDl0A6QKTa8MoM+cWwDZvVGl5/jNb79pIhRvAalUu+7LD91VYR0nwRD799HkQ==", + "cpu": [ + "arm" + ], + "dev": true, + "optional": true, + "os": [ + "android" + ] + }, + "node_modules/@rollup/rollup-android-arm64": { + "version": "4.24.3", + "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.24.3.tgz", + "integrity": "sha512-iAHpft/eQk9vkWIV5t22V77d90CRofgR2006UiCjHcHJFVI1E0oBkQIAbz+pLtthFw3hWEmVB4ilxGyBf48i2Q==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "android" + ] + }, + "node_modules/@rollup/rollup-darwin-arm64": { + "version": "4.24.3", + "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.24.3.tgz", + "integrity": "sha512-QPW2YmkWLlvqmOa2OwrfqLJqkHm7kJCIMq9kOz40Zo9Ipi40kf9ONG5Sz76zszrmIZZ4hgRIkez69YnTHgEz1w==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "darwin" + ] + }, + "node_modules/@rollup/rollup-darwin-x64": { + "version": "4.24.3", + "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.24.3.tgz", + "integrity": "sha512-KO0pN5x3+uZm1ZXeIfDqwcvnQ9UEGN8JX5ufhmgH5Lz4ujjZMAnxQygZAVGemFWn+ZZC0FQopruV4lqmGMshow==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "darwin" + ] + }, + "node_modules/@rollup/rollup-freebsd-arm64": { + "version": "4.24.3", + "resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-arm64/-/rollup-freebsd-arm64-4.24.3.tgz", + "integrity": "sha512-CsC+ZdIiZCZbBI+aRlWpYJMSWvVssPuWqrDy/zi9YfnatKKSLFCe6fjna1grHuo/nVaHG+kiglpRhyBQYRTK4A==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "freebsd" + ] + }, + "node_modules/@rollup/rollup-freebsd-x64": { + "version": "4.24.3", + "resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-x64/-/rollup-freebsd-x64-4.24.3.tgz", + "integrity": "sha512-F0nqiLThcfKvRQhZEzMIXOQG4EeX61im61VYL1jo4eBxv4aZRmpin6crnBJQ/nWnCsjH5F6J3W6Stdm0mBNqBg==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "freebsd" + ] + }, + "node_modules/@rollup/rollup-linux-arm-gnueabihf": { + "version": "4.24.3", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.24.3.tgz", + "integrity": "sha512-KRSFHyE/RdxQ1CSeOIBVIAxStFC/hnBgVcaiCkQaVC+EYDtTe4X7z5tBkFyRoBgUGtB6Xg6t9t2kulnX6wJc6A==", + "cpu": [ + "arm" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-arm-musleabihf": { + "version": "4.24.3", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.24.3.tgz", + "integrity": "sha512-h6Q8MT+e05zP5BxEKz0vi0DhthLdrNEnspdLzkoFqGwnmOzakEHSlXfVyA4HJ322QtFy7biUAVFPvIDEDQa6rw==", + "cpu": [ + "arm" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-arm64-gnu": { + "version": "4.24.3", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.24.3.tgz", + "integrity": "sha512-fKElSyXhXIJ9pqiYRqisfirIo2Z5pTTve5K438URf08fsypXrEkVmShkSfM8GJ1aUyvjakT+fn2W7Czlpd/0FQ==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-arm64-musl": { + "version": "4.24.3", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.24.3.tgz", + "integrity": "sha512-YlddZSUk8G0px9/+V9PVilVDC6ydMz7WquxozToozSnfFK6wa6ne1ATUjUvjin09jp34p84milxlY5ikueoenw==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-powerpc64le-gnu": { + "version": "4.24.3", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-powerpc64le-gnu/-/rollup-linux-powerpc64le-gnu-4.24.3.tgz", + "integrity": "sha512-yNaWw+GAO8JjVx3s3cMeG5Esz1cKVzz8PkTJSfYzE5u7A+NvGmbVFEHP+BikTIyYWuz0+DX9kaA3pH9Sqxp69g==", + "cpu": [ + "ppc64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-riscv64-gnu": { + "version": "4.24.3", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.24.3.tgz", + "integrity": "sha512-lWKNQfsbpv14ZCtM/HkjCTm4oWTKTfxPmr7iPfp3AHSqyoTz5AgLemYkWLwOBWc+XxBbrU9SCokZP0WlBZM9lA==", + "cpu": [ + "riscv64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-s390x-gnu": { + "version": "4.24.3", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.24.3.tgz", + "integrity": "sha512-HoojGXTC2CgCcq0Woc/dn12wQUlkNyfH0I1ABK4Ni9YXyFQa86Fkt2Q0nqgLfbhkyfQ6003i3qQk9pLh/SpAYw==", + "cpu": [ + "s390x" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-x64-gnu": { + "version": "4.24.3", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.24.3.tgz", + "integrity": "sha512-mnEOh4iE4USSccBOtcrjF5nj+5/zm6NcNhbSEfR3Ot0pxBwvEn5QVUXcuOwwPkapDtGZ6pT02xLoPaNv06w7KQ==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-x64-musl": { + "version": "4.24.3", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.24.3.tgz", + "integrity": "sha512-rMTzawBPimBQkG9NKpNHvquIUTQPzrnPxPbCY1Xt+mFkW7pshvyIS5kYgcf74goxXOQk0CP3EoOC1zcEezKXhw==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-win32-arm64-msvc": { + "version": "4.24.3", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.24.3.tgz", + "integrity": "sha512-2lg1CE305xNvnH3SyiKwPVsTVLCg4TmNCF1z7PSHX2uZY2VbUpdkgAllVoISD7JO7zu+YynpWNSKAtOrX3AiuA==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "win32" + ] + }, + "node_modules/@rollup/rollup-win32-ia32-msvc": { + "version": "4.24.3", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.24.3.tgz", + "integrity": "sha512-9SjYp1sPyxJsPWuhOCX6F4jUMXGbVVd5obVpoVEi8ClZqo52ViZewA6eFz85y8ezuOA+uJMP5A5zo6Oz4S5rVQ==", + "cpu": [ + "ia32" + ], + "dev": true, + "optional": true, + "os": [ + "win32" + ] + }, + "node_modules/@rollup/rollup-win32-x64-msvc": { + "version": "4.24.3", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.24.3.tgz", + "integrity": "sha512-HGZgRFFYrMrP3TJlq58nR1xy8zHKId25vhmm5S9jETEfDf6xybPxsavFTJaufe2zgOGYJBskGlj49CwtEuFhWQ==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "win32" + ] + }, + "node_modules/@shikijs/core": { + "version": "1.22.2", + "resolved": "https://registry.npmjs.org/@shikijs/core/-/core-1.22.2.tgz", + "integrity": "sha512-bvIQcd8BEeR1yFvOYv6HDiyta2FFVePbzeowf5pPS1avczrPK+cjmaxxh0nx5QzbON7+Sv0sQfQVciO7bN72sg==", + "dev": true, + "dependencies": { + "@shikijs/engine-javascript": "1.22.2", + "@shikijs/engine-oniguruma": "1.22.2", + "@shikijs/types": "1.22.2", + "@shikijs/vscode-textmate": "^9.3.0", + "@types/hast": "^3.0.4", + "hast-util-to-html": "^9.0.3" + } + }, + "node_modules/@shikijs/engine-javascript": { + "version": "1.22.2", + "resolved": "https://registry.npmjs.org/@shikijs/engine-javascript/-/engine-javascript-1.22.2.tgz", + "integrity": "sha512-iOvql09ql6m+3d1vtvP8fLCVCK7BQD1pJFmHIECsujB0V32BJ0Ab6hxk1ewVSMFA58FI0pR2Had9BKZdyQrxTw==", + "dev": true, + "dependencies": { + "@shikijs/types": "1.22.2", + "@shikijs/vscode-textmate": "^9.3.0", + "oniguruma-to-js": "0.4.3" + } + }, + "node_modules/@shikijs/engine-oniguruma": { + "version": "1.22.2", + "resolved": "https://registry.npmjs.org/@shikijs/engine-oniguruma/-/engine-oniguruma-1.22.2.tgz", + "integrity": "sha512-GIZPAGzQOy56mGvWMoZRPggn0dTlBf1gutV5TdceLCZlFNqWmuc7u+CzD0Gd9vQUTgLbrt0KLzz6FNprqYAxlA==", + "dev": true, + "dependencies": { + "@shikijs/types": "1.22.2", + "@shikijs/vscode-textmate": "^9.3.0" + } + }, + "node_modules/@shikijs/transformers": { + "version": "1.22.2", + "resolved": "https://registry.npmjs.org/@shikijs/transformers/-/transformers-1.22.2.tgz", + "integrity": "sha512-8f78OiBa6pZDoZ53lYTmuvpFPlWtevn23bzG+azpPVvZg7ITax57o/K3TC91eYL3OMJOO0onPbgnQyZjRos8XQ==", + "dev": true, + "dependencies": { + "shiki": "1.22.2" + } + }, + "node_modules/@shikijs/types": { + "version": "1.22.2", + "resolved": "https://registry.npmjs.org/@shikijs/types/-/types-1.22.2.tgz", + "integrity": "sha512-NCWDa6LGZqTuzjsGfXOBWfjS/fDIbDdmVDug+7ykVe1IKT4c1gakrvlfFYp5NhAXH/lyqLM8wsAPo5wNy73Feg==", + "dev": true, + "dependencies": { + "@shikijs/vscode-textmate": "^9.3.0", + "@types/hast": "^3.0.4" + } + }, + "node_modules/@shikijs/vscode-textmate": { + "version": "9.3.0", + "resolved": "https://registry.npmjs.org/@shikijs/vscode-textmate/-/vscode-textmate-9.3.0.tgz", + "integrity": "sha512-jn7/7ky30idSkd/O5yDBfAnVt+JJpepofP/POZ1iMOxK59cOfqIgg/Dj0eFsjOTMw+4ycJN0uhZH/Eb0bs/EUA==", + "dev": true + }, + "node_modules/@types/estree": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.6.tgz", + "integrity": "sha512-AYnb1nQyY49te+VRAVgmzfcgjYS91mY5P0TKUDCLEM+gNnA+3T6rWITXRLYCpahpqSQbN5cE+gHpnPyXjHWxcw==", + "dev": true + }, + "node_modules/@types/hast": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/@types/hast/-/hast-3.0.4.tgz", + "integrity": "sha512-WPs+bbQw5aCj+x6laNGWLH3wviHtoCv/P3+otBhbOhJgG8qtpdAMlTCxLtsTWA7LH1Oh/bFCHsBn0TPS5m30EQ==", + "dev": true, + "dependencies": { + "@types/unist": "*" + } + }, + "node_modules/@types/linkify-it": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/@types/linkify-it/-/linkify-it-5.0.0.tgz", + "integrity": "sha512-sVDA58zAw4eWAffKOaQH5/5j3XeayukzDk+ewSsnv3p4yJEZHCCzMDiZM8e0OUrRvmpGZ85jf4yDHkHsgBNr9Q==", + "dev": true + }, + "node_modules/@types/markdown-it": { + "version": "14.1.2", + "resolved": "https://registry.npmjs.org/@types/markdown-it/-/markdown-it-14.1.2.tgz", + "integrity": "sha512-promo4eFwuiW+TfGxhi+0x3czqTYJkG8qB17ZUJiVF10Xm7NLVRSLUsfRTU/6h1e24VvRnXCx+hG7li58lkzog==", + "dev": true, + "dependencies": { + "@types/linkify-it": "^5", + "@types/mdurl": "^2" + } + }, + "node_modules/@types/mdast": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/@types/mdast/-/mdast-4.0.4.tgz", + "integrity": "sha512-kGaNbPh1k7AFzgpud/gMdvIm5xuECykRR+JnWKQno9TAXVa6WIVCGTPvYGekIDL4uwCZQSYbUxNBSb1aUo79oA==", + "dev": true, + "dependencies": { + "@types/unist": "*" + } + }, + "node_modules/@types/mdurl": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/@types/mdurl/-/mdurl-2.0.0.tgz", + "integrity": "sha512-RGdgjQUZba5p6QEFAVx2OGb8rQDL/cPRG7GiedRzMcJ1tYnUANBncjbSB1NRGwbvjcPeikRABz2nshyPk1bhWg==", + "dev": true + }, + "node_modules/@types/unist": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/@types/unist/-/unist-3.0.3.tgz", + "integrity": "sha512-ko/gIFJRv177XgZsZcBwnqJN5x/Gien8qNOn0D5bQU/zAzVf9Zt3BlcUiLqhV9y4ARk0GbT3tnUiPNgnTXzc/Q==", + "dev": true + }, + "node_modules/@types/web-bluetooth": { + "version": "0.0.20", + "resolved": "https://registry.npmjs.org/@types/web-bluetooth/-/web-bluetooth-0.0.20.tgz", + "integrity": "sha512-g9gZnnXVq7gM7v3tJCWV/qw7w+KeOlSHAhgF9RytFyifW6AF61hdT2ucrYhPq9hLs5JIryeupHV3qGk95dH9ow==", + "dev": true + }, + "node_modules/@ungap/structured-clone": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/@ungap/structured-clone/-/structured-clone-1.2.0.tgz", + "integrity": "sha512-zuVdFrMJiuCDQUMCzQaD6KL28MjnqqN8XnAqiEq9PNm/hCPTSGfrXCOfwj1ow4LFb/tNymJPwsNbVePc1xFqrQ==", + "dev": true + }, + "node_modules/@vitejs/plugin-vue": { + "version": "5.1.4", + "resolved": "https://registry.npmjs.org/@vitejs/plugin-vue/-/plugin-vue-5.1.4.tgz", + "integrity": "sha512-N2XSI2n3sQqp5w7Y/AN/L2XDjBIRGqXko+eDp42sydYSBeJuSm5a1sLf8zakmo8u7tA8NmBgoDLA1HeOESjp9A==", + "dev": true, + "engines": { + "node": "^18.0.0 || >=20.0.0" + }, + "peerDependencies": { + "vite": "^5.0.0", + "vue": "^3.2.25" + } + }, + "node_modules/@vue/compiler-core": { + "version": "3.5.12", + "resolved": "https://registry.npmjs.org/@vue/compiler-core/-/compiler-core-3.5.12.tgz", + "integrity": "sha512-ISyBTRMmMYagUxhcpyEH0hpXRd/KqDU4ymofPgl2XAkY9ZhQ+h0ovEZJIiPop13UmR/54oA2cgMDjgroRelaEw==", + "dev": true, + "dependencies": { + "@babel/parser": "^7.25.3", + "@vue/shared": "3.5.12", + "entities": "^4.5.0", + "estree-walker": "^2.0.2", + "source-map-js": "^1.2.0" + } + }, + "node_modules/@vue/compiler-dom": { + "version": "3.5.12", + "resolved": "https://registry.npmjs.org/@vue/compiler-dom/-/compiler-dom-3.5.12.tgz", + "integrity": "sha512-9G6PbJ03uwxLHKQ3P42cMTi85lDRvGLB2rSGOiQqtXELat6uI4n8cNz9yjfVHRPIu+MsK6TE418Giruvgptckg==", + "dev": true, + "dependencies": { + "@vue/compiler-core": "3.5.12", + "@vue/shared": "3.5.12" + } + }, + "node_modules/@vue/compiler-sfc": { + "version": "3.5.12", + "resolved": "https://registry.npmjs.org/@vue/compiler-sfc/-/compiler-sfc-3.5.12.tgz", + "integrity": "sha512-2k973OGo2JuAa5+ZlekuQJtitI5CgLMOwgl94BzMCsKZCX/xiqzJYzapl4opFogKHqwJk34vfsaKpfEhd1k5nw==", + "dev": true, + "dependencies": { + "@babel/parser": "^7.25.3", + "@vue/compiler-core": "3.5.12", + "@vue/compiler-dom": "3.5.12", + "@vue/compiler-ssr": "3.5.12", + "@vue/shared": "3.5.12", + "estree-walker": "^2.0.2", + "magic-string": "^0.30.11", + "postcss": "^8.4.47", + "source-map-js": "^1.2.0" + } + }, + "node_modules/@vue/compiler-ssr": { + "version": "3.5.12", + "resolved": "https://registry.npmjs.org/@vue/compiler-ssr/-/compiler-ssr-3.5.12.tgz", + "integrity": "sha512-eLwc7v6bfGBSM7wZOGPmRavSWzNFF6+PdRhE+VFJhNCgHiF8AM7ccoqcv5kBXA2eWUfigD7byekvf/JsOfKvPA==", + "dev": true, + "dependencies": { + "@vue/compiler-dom": "3.5.12", + "@vue/shared": "3.5.12" + } + }, + "node_modules/@vue/devtools-api": { + "version": "7.6.2", + "resolved": "https://registry.npmjs.org/@vue/devtools-api/-/devtools-api-7.6.2.tgz", + "integrity": "sha512-NCT0ujqlwAhoFvCsAG7G5qS8w/A/dhvFSt2BhmNxyqgpYDrf9CG1zYyWLQkE3dsZ+5lCT6ULUic2VKNaE07Vzg==", + "dev": true, + "dependencies": { + "@vue/devtools-kit": "^7.6.2" + } + }, + "node_modules/@vue/devtools-kit": { + "version": "7.6.2", + "resolved": "https://registry.npmjs.org/@vue/devtools-kit/-/devtools-kit-7.6.2.tgz", + "integrity": "sha512-k61BxHRmcTtIQZFouF9QWt9nCCNtSdw12lhg8VNtHq5/XOBGD+ewiK27a40UJ8UPYoCJvi80hbvbYr5E/Zeu1g==", + "dev": true, + "dependencies": { + "@vue/devtools-shared": "^7.6.2", + "birpc": "^0.2.19", + "hookable": "^5.5.3", + "mitt": "^3.0.1", + "perfect-debounce": "^1.0.0", + "speakingurl": "^14.0.1", + "superjson": "^2.2.1" + } + }, + "node_modules/@vue/devtools-shared": { + "version": "7.6.2", + "resolved": "https://registry.npmjs.org/@vue/devtools-shared/-/devtools-shared-7.6.2.tgz", + "integrity": "sha512-lcjyJ7hCC0W0kNwnCGMLVTMvDLoZgjcq9BvboPgS+6jQyDul7fpzRSKTGtGhCHoxrDox7qBAKGbAl2Rcf7GE1A==", + "dev": true, + "dependencies": { + "rfdc": "^1.4.1" + } + }, + "node_modules/@vue/reactivity": { + "version": "3.5.12", + "resolved": "https://registry.npmjs.org/@vue/reactivity/-/reactivity-3.5.12.tgz", + "integrity": "sha512-UzaN3Da7xnJXdz4Okb/BGbAaomRHc3RdoWqTzlvd9+WBR5m3J39J1fGcHes7U3za0ruYn/iYy/a1euhMEHvTAg==", + "dev": true, + "dependencies": { + "@vue/shared": "3.5.12" + } + }, + "node_modules/@vue/runtime-core": { + "version": "3.5.12", + "resolved": "https://registry.npmjs.org/@vue/runtime-core/-/runtime-core-3.5.12.tgz", + "integrity": "sha512-hrMUYV6tpocr3TL3Ad8DqxOdpDe4zuQY4HPY3X/VRh+L2myQO8MFXPAMarIOSGNu0bFAjh1yBkMPXZBqCk62Uw==", + "dev": true, + "dependencies": { + "@vue/reactivity": "3.5.12", + "@vue/shared": "3.5.12" + } + }, + "node_modules/@vue/runtime-dom": { + "version": "3.5.12", + "resolved": "https://registry.npmjs.org/@vue/runtime-dom/-/runtime-dom-3.5.12.tgz", + "integrity": "sha512-q8VFxR9A2MRfBr6/55Q3umyoN7ya836FzRXajPB6/Vvuv0zOPL+qltd9rIMzG/DbRLAIlREmnLsplEF/kotXKA==", + "dev": true, + "dependencies": { + "@vue/reactivity": "3.5.12", + "@vue/runtime-core": "3.5.12", + "@vue/shared": "3.5.12", + "csstype": "^3.1.3" + } + }, + "node_modules/@vue/server-renderer": { + "version": "3.5.12", + "resolved": "https://registry.npmjs.org/@vue/server-renderer/-/server-renderer-3.5.12.tgz", + "integrity": "sha512-I3QoeDDeEPZm8yR28JtY+rk880Oqmj43hreIBVTicisFTx/Dl7JpG72g/X7YF8hnQD3IFhkky5i2bPonwrTVPg==", + "dev": true, + "dependencies": { + "@vue/compiler-ssr": "3.5.12", + "@vue/shared": "3.5.12" + }, + "peerDependencies": { + "vue": "3.5.12" + } + }, + "node_modules/@vue/shared": { + "version": "3.5.12", + "resolved": "https://registry.npmjs.org/@vue/shared/-/shared-3.5.12.tgz", + "integrity": "sha512-L2RPSAwUFbgZH20etwrXyVyCBu9OxRSi8T/38QsvnkJyvq2LufW2lDCOzm7t/U9C1mkhJGWYfCuFBCmIuNivrg==", + "dev": true + }, + "node_modules/@vueuse/core": { + "version": "11.2.0", + "resolved": "https://registry.npmjs.org/@vueuse/core/-/core-11.2.0.tgz", + "integrity": "sha512-JIUwRcOqOWzcdu1dGlfW04kaJhW3EXnnjJJfLTtddJanymTL7lF1C0+dVVZ/siLfc73mWn+cGP1PE1PKPruRSA==", + "dev": true, + "dependencies": { + "@types/web-bluetooth": "^0.0.20", + "@vueuse/metadata": "11.2.0", + "@vueuse/shared": "11.2.0", + "vue-demi": ">=0.14.10" + }, + "funding": { + "url": "https://github.com/sponsors/antfu" + } + }, + "node_modules/@vueuse/core/node_modules/vue-demi": { + "version": "0.14.10", + "resolved": "https://registry.npmjs.org/vue-demi/-/vue-demi-0.14.10.tgz", + "integrity": "sha512-nMZBOwuzabUO0nLgIcc6rycZEebF6eeUfaiQx9+WSk8e29IbLvPU9feI6tqW4kTo3hvoYAJkMh8n8D0fuISphg==", + "dev": true, + "hasInstallScript": true, + "bin": { + "vue-demi-fix": "bin/vue-demi-fix.js", + "vue-demi-switch": "bin/vue-demi-switch.js" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/antfu" + }, + "peerDependencies": { + "@vue/composition-api": "^1.0.0-rc.1", + "vue": "^3.0.0-0 || ^2.6.0" + }, + "peerDependenciesMeta": { + "@vue/composition-api": { + "optional": true + } + } + }, + "node_modules/@vueuse/integrations": { + "version": "11.2.0", + "resolved": "https://registry.npmjs.org/@vueuse/integrations/-/integrations-11.2.0.tgz", + "integrity": "sha512-zGXz3dsxNHKwiD9jPMvR3DAxQEOV6VWIEYTGVSB9PNpk4pTWR+pXrHz9gvXWcP2sTk3W2oqqS6KwWDdntUvNVA==", + "dev": true, + "dependencies": { + "@vueuse/core": "11.2.0", + "@vueuse/shared": "11.2.0", + "vue-demi": ">=0.14.10" + }, + "funding": { + "url": "https://github.com/sponsors/antfu" + }, + "peerDependencies": { + "async-validator": "^4", + "axios": "^1", + "change-case": "^5", + "drauu": "^0.4", + "focus-trap": "^7", + "fuse.js": "^7", + "idb-keyval": "^6", + "jwt-decode": "^4", + "nprogress": "^0.2", + "qrcode": "^1.5", + "sortablejs": "^1", + "universal-cookie": "^7" + }, + "peerDependenciesMeta": { + "async-validator": { + "optional": true + }, + "axios": { + "optional": true + }, + "change-case": { + "optional": true + }, + "drauu": { + "optional": true + }, + "focus-trap": { + "optional": true + }, + "fuse.js": { + "optional": true + }, + "idb-keyval": { + "optional": true + }, + "jwt-decode": { + "optional": true + }, + "nprogress": { + "optional": true + }, + "qrcode": { + "optional": true + }, + "sortablejs": { + "optional": true + }, + "universal-cookie": { + "optional": true + } + } + }, + "node_modules/@vueuse/integrations/node_modules/vue-demi": { + "version": "0.14.10", + "resolved": "https://registry.npmjs.org/vue-demi/-/vue-demi-0.14.10.tgz", + "integrity": "sha512-nMZBOwuzabUO0nLgIcc6rycZEebF6eeUfaiQx9+WSk8e29IbLvPU9feI6tqW4kTo3hvoYAJkMh8n8D0fuISphg==", + "dev": true, + "hasInstallScript": true, + "bin": { + "vue-demi-fix": "bin/vue-demi-fix.js", + "vue-demi-switch": "bin/vue-demi-switch.js" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/antfu" + }, + "peerDependencies": { + "@vue/composition-api": "^1.0.0-rc.1", + "vue": "^3.0.0-0 || ^2.6.0" + }, + "peerDependenciesMeta": { + "@vue/composition-api": { + "optional": true + } + } + }, + "node_modules/@vueuse/metadata": { + "version": "11.2.0", + "resolved": "https://registry.npmjs.org/@vueuse/metadata/-/metadata-11.2.0.tgz", + "integrity": "sha512-L0ZmtRmNx+ZW95DmrgD6vn484gSpVeRbgpWevFKXwqqQxW9hnSi2Ppuh2BzMjnbv4aJRiIw8tQatXT9uOB23dQ==", + "dev": true, + "funding": { + "url": "https://github.com/sponsors/antfu" + } + }, + "node_modules/@vueuse/shared": { + "version": "11.2.0", + "resolved": "https://registry.npmjs.org/@vueuse/shared/-/shared-11.2.0.tgz", + "integrity": "sha512-VxFjie0EanOudYSgMErxXfq6fo8vhr5ICI+BuE3I9FnX7ePllEsVrRQ7O6Q1TLgApeLuPKcHQxAXpP+KnlrJsg==", + "dev": true, + "dependencies": { + "vue-demi": ">=0.14.10" + }, + "funding": { + "url": "https://github.com/sponsors/antfu" + } + }, + "node_modules/@vueuse/shared/node_modules/vue-demi": { + "version": "0.14.10", + "resolved": "https://registry.npmjs.org/vue-demi/-/vue-demi-0.14.10.tgz", + "integrity": "sha512-nMZBOwuzabUO0nLgIcc6rycZEebF6eeUfaiQx9+WSk8e29IbLvPU9feI6tqW4kTo3hvoYAJkMh8n8D0fuISphg==", + "dev": true, + "hasInstallScript": true, + "bin": { + "vue-demi-fix": "bin/vue-demi-fix.js", + "vue-demi-switch": "bin/vue-demi-switch.js" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/antfu" + }, + "peerDependencies": { + "@vue/composition-api": "^1.0.0-rc.1", + "vue": "^3.0.0-0 || ^2.6.0" + }, + "peerDependenciesMeta": { + "@vue/composition-api": { + "optional": true + } + } + }, + "node_modules/algoliasearch": { + "version": "5.12.0", + "resolved": "https://registry.npmjs.org/algoliasearch/-/algoliasearch-5.12.0.tgz", + "integrity": "sha512-psGBRYdGgik8I6m28iAB8xpubvjEt7UQU+w5MAJUA2324WHiGoHap5BPkkjB14rMaXeRts6pmOsrVIglGyOVwg==", + "dev": true, + "dependencies": { + "@algolia/client-abtesting": "5.12.0", + "@algolia/client-analytics": "5.12.0", + "@algolia/client-common": "5.12.0", + "@algolia/client-insights": "5.12.0", + "@algolia/client-personalization": "5.12.0", + "@algolia/client-query-suggestions": "5.12.0", + "@algolia/client-search": "5.12.0", + "@algolia/ingestion": "1.12.0", + "@algolia/monitoring": "1.12.0", + "@algolia/recommend": "5.12.0", + "@algolia/requester-browser-xhr": "5.12.0", + "@algolia/requester-fetch": "5.12.0", + "@algolia/requester-node-http": "5.12.0" + }, + "engines": { + "node": ">= 14.0.0" + } + }, + "node_modules/birpc": { + "version": "0.2.19", + "resolved": "https://registry.npmjs.org/birpc/-/birpc-0.2.19.tgz", + "integrity": "sha512-5WeXXAvTmitV1RqJFppT5QtUiz2p1mRSYU000Jkft5ZUCLJIk4uQriYNO50HknxKwM6jd8utNc66K1qGIwwWBQ==", + "dev": true, + "funding": { + "url": "https://github.com/sponsors/antfu" + } + }, + "node_modules/ccount": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/ccount/-/ccount-2.0.1.tgz", + "integrity": "sha512-eyrF0jiFpY+3drT6383f1qhkbGsLSifNAjA61IUjZjmLCWjItY6LB9ft9YhoDgwfmclB2zhu51Lc7+95b8NRAg==", + "dev": true, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/character-entities-html4": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/character-entities-html4/-/character-entities-html4-2.1.0.tgz", + "integrity": "sha512-1v7fgQRj6hnSwFpq1Eu0ynr/CDEw0rXo2B61qXrLNdHZmPKgb7fqS1a2JwF0rISo9q77jDI8VMEHoApn8qDoZA==", + "dev": true, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/character-entities-legacy": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/character-entities-legacy/-/character-entities-legacy-3.0.0.tgz", + "integrity": "sha512-RpPp0asT/6ufRm//AJVwpViZbGM/MkjQFxJccQRHmISF/22NBtsHqAWmL+/pmkPWoIUJdWyeVleTl1wydHATVQ==", + "dev": true, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/comma-separated-tokens": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/comma-separated-tokens/-/comma-separated-tokens-2.0.3.tgz", + "integrity": "sha512-Fu4hJdvzeylCfQPp9SGWidpzrMs7tTrlu6Vb8XGaRGck8QSNZJJp538Wrb60Lax4fPwR64ViY468OIUTbRlGZg==", + "dev": true, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/copy-anything": { + "version": "3.0.5", + "resolved": "https://registry.npmjs.org/copy-anything/-/copy-anything-3.0.5.tgz", + "integrity": "sha512-yCEafptTtb4bk7GLEQoM8KVJpxAfdBJYaXyzQEgQQQgYrZiDp8SJmGKlYza6CYjEDNstAdNdKA3UuoULlEbS6w==", + "dev": true, + "dependencies": { + "is-what": "^4.1.8" + }, + "engines": { + "node": ">=12.13" + }, + "funding": { + "url": "https://github.com/sponsors/mesqueeb" + } + }, + "node_modules/csstype": { + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/csstype/-/csstype-3.1.3.tgz", + "integrity": "sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw==", + "dev": true + }, + "node_modules/dequal": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/dequal/-/dequal-2.0.3.tgz", + "integrity": "sha512-0je+qPKHEMohvfRTCEo3CrPG6cAzAYgmzKyxRiYSSDkS6eGJdyVJm7WaYA5ECaAD9wLB2T4EEeymA5aFVcYXCA==", + "dev": true, + "engines": { + "node": ">=6" + } + }, + "node_modules/devlop": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/devlop/-/devlop-1.1.0.tgz", + "integrity": "sha512-RWmIqhcFf1lRYBvNmr7qTNuyCt/7/ns2jbpp1+PalgE/rDQcBT0fioSMUpJ93irlUhC5hrg4cYqe6U+0ImW0rA==", + "dev": true, + "dependencies": { + "dequal": "^2.0.0" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/entities": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz", + "integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==", + "dev": true, + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, + "node_modules/esbuild": { + "version": "0.21.5", + "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.21.5.tgz", + "integrity": "sha512-mg3OPMV4hXywwpoDxu3Qda5xCKQi+vCTZq8S9J/EpkhB2HzKXq4SNFZE3+NK93JYxc8VMSep+lOUSC/RVKaBqw==", + "dev": true, + "hasInstallScript": true, + "bin": { + "esbuild": "bin/esbuild" + }, + "engines": { + "node": ">=12" + }, + "optionalDependencies": { + "@esbuild/aix-ppc64": "0.21.5", + "@esbuild/android-arm": "0.21.5", + "@esbuild/android-arm64": "0.21.5", + "@esbuild/android-x64": "0.21.5", + "@esbuild/darwin-arm64": "0.21.5", + "@esbuild/darwin-x64": "0.21.5", + "@esbuild/freebsd-arm64": "0.21.5", + "@esbuild/freebsd-x64": "0.21.5", + "@esbuild/linux-arm": "0.21.5", + "@esbuild/linux-arm64": "0.21.5", + "@esbuild/linux-ia32": "0.21.5", + "@esbuild/linux-loong64": "0.21.5", + "@esbuild/linux-mips64el": "0.21.5", + "@esbuild/linux-ppc64": "0.21.5", + "@esbuild/linux-riscv64": "0.21.5", + "@esbuild/linux-s390x": "0.21.5", + "@esbuild/linux-x64": "0.21.5", + "@esbuild/netbsd-x64": "0.21.5", + "@esbuild/openbsd-x64": "0.21.5", + "@esbuild/sunos-x64": "0.21.5", + "@esbuild/win32-arm64": "0.21.5", + "@esbuild/win32-ia32": "0.21.5", + "@esbuild/win32-x64": "0.21.5" + } + }, + "node_modules/estree-walker": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/estree-walker/-/estree-walker-2.0.2.tgz", + "integrity": "sha512-Rfkk/Mp/DL7JVje3u18FxFujQlTNR2q6QfMSMB7AvCBx91NGj/ba3kCfza0f6dVDbw7YlRf/nDrn7pQrCCyQ/w==", + "dev": true + }, + "node_modules/focus-trap": { + "version": "7.6.0", + "resolved": "https://registry.npmjs.org/focus-trap/-/focus-trap-7.6.0.tgz", + "integrity": "sha512-1td0l3pMkWJLFipobUcGaf+5DTY4PLDDrcqoSaKP8ediO/CoWCCYk/fT/Y2A4e6TNB+Sh6clRJCjOPPnKoNHnQ==", + "dev": true, + "dependencies": { + "tabbable": "^6.2.0" + } + }, + "node_modules/fsevents": { + "version": "2.3.3", + "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", + "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==", + "dev": true, + "hasInstallScript": true, + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^8.16.0 || ^10.6.0 || >=11.0.0" + } + }, + "node_modules/hast-util-to-html": { + "version": "9.0.3", + "resolved": "https://registry.npmjs.org/hast-util-to-html/-/hast-util-to-html-9.0.3.tgz", + "integrity": "sha512-M17uBDzMJ9RPCqLMO92gNNUDuBSq10a25SDBI08iCCxmorf4Yy6sYHK57n9WAbRAAaU+DuR4W6GN9K4DFZesYg==", + "dev": true, + "dependencies": { + "@types/hast": "^3.0.0", + "@types/unist": "^3.0.0", + "ccount": "^2.0.0", + "comma-separated-tokens": "^2.0.0", + "hast-util-whitespace": "^3.0.0", + "html-void-elements": "^3.0.0", + "mdast-util-to-hast": "^13.0.0", + "property-information": "^6.0.0", + "space-separated-tokens": "^2.0.0", + "stringify-entities": "^4.0.0", + "zwitch": "^2.0.4" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/hast-util-whitespace": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/hast-util-whitespace/-/hast-util-whitespace-3.0.0.tgz", + "integrity": "sha512-88JUN06ipLwsnv+dVn+OIYOvAuvBMy/Qoi6O7mQHxdPXpjy+Cd6xRkWwux7DKO+4sYILtLBRIKgsdpS2gQc7qw==", + "dev": true, + "dependencies": { + "@types/hast": "^3.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/hookable": { + "version": "5.5.3", + "resolved": "https://registry.npmjs.org/hookable/-/hookable-5.5.3.tgz", + "integrity": "sha512-Yc+BQe8SvoXH1643Qez1zqLRmbA5rCL+sSmk6TVos0LWVfNIB7PGncdlId77WzLGSIB5KaWgTaNTs2lNVEI6VQ==", + "dev": true + }, + "node_modules/html-void-elements": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/html-void-elements/-/html-void-elements-3.0.0.tgz", + "integrity": "sha512-bEqo66MRXsUGxWHV5IP0PUiAWwoEjba4VCzg0LjFJBpchPaTfyfCKTG6bc5F8ucKec3q5y6qOdGyYTSBEvhCrg==", + "dev": true, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/is-what": { + "version": "4.1.16", + "resolved": "https://registry.npmjs.org/is-what/-/is-what-4.1.16.tgz", + "integrity": "sha512-ZhMwEosbFJkA0YhFnNDgTM4ZxDRsS6HqTo7qsZM08fehyRYIYa0yHu5R6mgo1n/8MgaPBXiPimPD77baVFYg+A==", + "dev": true, + "engines": { + "node": ">=12.13" + }, + "funding": { + "url": "https://github.com/sponsors/mesqueeb" + } + }, + "node_modules/magic-string": { + "version": "0.30.12", + "resolved": "https://registry.npmjs.org/magic-string/-/magic-string-0.30.12.tgz", + "integrity": "sha512-Ea8I3sQMVXr8JhN4z+H/d8zwo+tYDgHE9+5G4Wnrwhs0gaK9fXTKx0Tw5Xwsd/bCPTTZNRAdpyzvoeORe9LYpw==", + "dev": true, + "dependencies": { + "@jridgewell/sourcemap-codec": "^1.5.0" + } + }, + "node_modules/mark.js": { + "version": "8.11.1", + "resolved": "https://registry.npmjs.org/mark.js/-/mark.js-8.11.1.tgz", + "integrity": "sha512-1I+1qpDt4idfgLQG+BNWmrqku+7/2bi5nLf4YwF8y8zXvmfiTBY3PV3ZibfrjBueCByROpuBjLLFCajqkgYoLQ==", + "dev": true + }, + "node_modules/mdast-util-to-hast": { + "version": "13.2.0", + "resolved": "https://registry.npmjs.org/mdast-util-to-hast/-/mdast-util-to-hast-13.2.0.tgz", + "integrity": "sha512-QGYKEuUsYT9ykKBCMOEDLsU5JRObWQusAolFMeko/tYPufNkRffBAQjIE+99jbA87xv6FgmjLtwjh9wBWajwAA==", + "dev": true, + "dependencies": { + "@types/hast": "^3.0.0", + "@types/mdast": "^4.0.0", + "@ungap/structured-clone": "^1.0.0", + "devlop": "^1.0.0", + "micromark-util-sanitize-uri": "^2.0.0", + "trim-lines": "^3.0.0", + "unist-util-position": "^5.0.0", + "unist-util-visit": "^5.0.0", + "vfile": "^6.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/micromark-util-character": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/micromark-util-character/-/micromark-util-character-2.1.0.tgz", + "integrity": "sha512-KvOVV+X1yLBfs9dCBSopq/+G1PcgT3lAK07mC4BzXi5E7ahzMAF8oIupDDJ6mievI6F+lAATkbQQlQixJfT3aQ==", + "dev": true, + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "dependencies": { + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + } + }, + "node_modules/micromark-util-encode": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/micromark-util-encode/-/micromark-util-encode-2.0.0.tgz", + "integrity": "sha512-pS+ROfCXAGLWCOc8egcBvT0kf27GoWMqtdarNfDcjb6YLuV5cM3ioG45Ys2qOVqeqSbjaKg72vU+Wby3eddPsA==", + "dev": true, + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ] + }, + "node_modules/micromark-util-sanitize-uri": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/micromark-util-sanitize-uri/-/micromark-util-sanitize-uri-2.0.0.tgz", + "integrity": "sha512-WhYv5UEcZrbAtlsnPuChHUAsu/iBPOVaEVsntLBIdpibO0ddy8OzavZz3iL2xVvBZOpolujSliP65Kq0/7KIYw==", + "dev": true, + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "dependencies": { + "micromark-util-character": "^2.0.0", + "micromark-util-encode": "^2.0.0", + "micromark-util-symbol": "^2.0.0" + } + }, + "node_modules/micromark-util-symbol": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/micromark-util-symbol/-/micromark-util-symbol-2.0.0.tgz", + "integrity": "sha512-8JZt9ElZ5kyTnO94muPxIGS8oyElRJaiJO8EzV6ZSyGQ1Is8xwl4Q45qU5UOg+bGH4AikWziz0iN4sFLWs8PGw==", + "dev": true, + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ] + }, + "node_modules/micromark-util-types": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/micromark-util-types/-/micromark-util-types-2.0.0.tgz", + "integrity": "sha512-oNh6S2WMHWRZrmutsRmDDfkzKtxF+bc2VxLC9dvtrDIRFln627VsFP6fLMgTryGDljgLPjkrzQSDcPrjPyDJ5w==", + "dev": true, + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ] + }, + "node_modules/minisearch": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/minisearch/-/minisearch-7.1.0.tgz", + "integrity": "sha512-tv7c/uefWdEhcu6hvrfTihflgeEi2tN6VV7HJnCjK6VxM75QQJh4t9FwJCsA2EsRS8LCnu3W87CuGPWMocOLCA==", + "dev": true + }, + "node_modules/mitt": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/mitt/-/mitt-3.0.1.tgz", + "integrity": "sha512-vKivATfr97l2/QBCYAkXYDbrIWPM2IIKEl7YPhjCvKlG3kE2gm+uBo6nEXK3M5/Ffh/FLpKExzOQ3JJoJGFKBw==", + "dev": true + }, + "node_modules/nanoid": { + "version": "3.3.7", + "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.7.tgz", + "integrity": "sha512-eSRppjcPIatRIMC1U6UngP8XFcz8MQWGQdt1MTBQ7NaAmvXDfvNxbvWV3x2y6CdEUciCSsDHDQZbhYaB8QEo2g==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "bin": { + "nanoid": "bin/nanoid.cjs" + }, + "engines": { + "node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1" + } + }, + "node_modules/oniguruma-to-js": { + "version": "0.4.3", + "resolved": "https://registry.npmjs.org/oniguruma-to-js/-/oniguruma-to-js-0.4.3.tgz", + "integrity": "sha512-X0jWUcAlxORhOqqBREgPMgnshB7ZGYszBNspP+tS9hPD3l13CdaXcHbgImoHUHlrvGx/7AvFEkTRhAGYh+jzjQ==", + "dev": true, + "dependencies": { + "regex": "^4.3.2" + }, + "funding": { + "url": "https://github.com/sponsors/antfu" + } + }, + "node_modules/perfect-debounce": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/perfect-debounce/-/perfect-debounce-1.0.0.tgz", + "integrity": "sha512-xCy9V055GLEqoFaHoC1SoLIaLmWctgCUaBaWxDZ7/Zx4CTyX7cJQLJOok/orfjZAh9kEYpjJa4d0KcJmCbctZA==", + "dev": true + }, + "node_modules/picocolors": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz", + "integrity": "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==", + "dev": true + }, + "node_modules/postcss": { + "version": "8.4.47", + "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.4.47.tgz", + "integrity": "sha512-56rxCq7G/XfB4EkXq9Egn5GCqugWvDFjafDOThIdMBsI15iqPqR5r15TfSr1YPYeEI19YeaXMCbY6u88Y76GLQ==", + "dev": true, + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/postcss/" + }, + { + "type": "tidelift", + "url": "https://tidelift.com/funding/github/npm/postcss" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "dependencies": { + "nanoid": "^3.3.7", + "picocolors": "^1.1.0", + "source-map-js": "^1.2.1" + }, + "engines": { + "node": "^10 || ^12 || >=14" + } + }, + "node_modules/preact": { + "version": "10.24.3", + "resolved": "https://registry.npmjs.org/preact/-/preact-10.24.3.tgz", + "integrity": "sha512-Z2dPnBnMUfyQfSQ+GBdsGa16hz35YmLmtTLhM169uW944hYL6xzTYkJjC07j+Wosz733pMWx0fgON3JNw1jJQA==", + "dev": true, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/preact" + } + }, + "node_modules/property-information": { + "version": "6.5.0", + "resolved": "https://registry.npmjs.org/property-information/-/property-information-6.5.0.tgz", + "integrity": "sha512-PgTgs/BlvHxOu8QuEN7wi5A0OmXaBcHpmCSTehcs6Uuu9IkDIEo13Hy7n898RHfrQ49vKCoGeWZSaAK01nwVig==", + "dev": true, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/regex": { + "version": "4.3.3", + "resolved": "https://registry.npmjs.org/regex/-/regex-4.3.3.tgz", + "integrity": "sha512-r/AadFO7owAq1QJVeZ/nq9jNS1vyZt+6t1p/E59B56Rn2GCya+gr1KSyOzNL/er+r+B7phv5jG2xU2Nz1YkmJg==", + "dev": true + }, + "node_modules/rfdc": { + "version": "1.4.1", + "resolved": "https://registry.npmjs.org/rfdc/-/rfdc-1.4.1.tgz", + "integrity": "sha512-q1b3N5QkRUWUl7iyylaaj3kOpIT0N2i9MqIEQXP73GVsN9cw3fdx8X63cEmWhJGi2PPCF23Ijp7ktmd39rawIA==", + "dev": true + }, + "node_modules/rollup": { + "version": "4.24.3", + "resolved": "https://registry.npmjs.org/rollup/-/rollup-4.24.3.tgz", + "integrity": "sha512-HBW896xR5HGmoksbi3JBDtmVzWiPAYqp7wip50hjQ67JbDz61nyoMPdqu1DvVW9asYb2M65Z20ZHsyJCMqMyDg==", + "dev": true, + "dependencies": { + "@types/estree": "1.0.6" + }, + "bin": { + "rollup": "dist/bin/rollup" + }, + "engines": { + "node": ">=18.0.0", + "npm": ">=8.0.0" + }, + "optionalDependencies": { + "@rollup/rollup-android-arm-eabi": "4.24.3", + "@rollup/rollup-android-arm64": "4.24.3", + "@rollup/rollup-darwin-arm64": "4.24.3", + "@rollup/rollup-darwin-x64": "4.24.3", + "@rollup/rollup-freebsd-arm64": "4.24.3", + "@rollup/rollup-freebsd-x64": "4.24.3", + "@rollup/rollup-linux-arm-gnueabihf": "4.24.3", + "@rollup/rollup-linux-arm-musleabihf": "4.24.3", + "@rollup/rollup-linux-arm64-gnu": "4.24.3", + "@rollup/rollup-linux-arm64-musl": "4.24.3", + "@rollup/rollup-linux-powerpc64le-gnu": "4.24.3", + "@rollup/rollup-linux-riscv64-gnu": "4.24.3", + "@rollup/rollup-linux-s390x-gnu": "4.24.3", + "@rollup/rollup-linux-x64-gnu": "4.24.3", + "@rollup/rollup-linux-x64-musl": "4.24.3", + "@rollup/rollup-win32-arm64-msvc": "4.24.3", + "@rollup/rollup-win32-ia32-msvc": "4.24.3", + "@rollup/rollup-win32-x64-msvc": "4.24.3", + "fsevents": "~2.3.2" + } + }, + "node_modules/search-insights": { + "version": "2.17.2", + "resolved": "https://registry.npmjs.org/search-insights/-/search-insights-2.17.2.tgz", + "integrity": "sha512-zFNpOpUO+tY2D85KrxJ+aqwnIfdEGi06UH2+xEb+Bp9Mwznmauqc9djbnBibJO5mpfUPPa8st6Sx65+vbeO45g==", + "dev": true, + "peer": true + }, + "node_modules/shiki": { + "version": "1.22.2", + "resolved": "https://registry.npmjs.org/shiki/-/shiki-1.22.2.tgz", + "integrity": "sha512-3IZau0NdGKXhH2bBlUk4w1IHNxPh6A5B2sUpyY+8utLu2j/h1QpFkAaUA1bAMxOWWGtTWcAh531vnS4NJKS/lA==", + "dev": true, + "dependencies": { + "@shikijs/core": "1.22.2", + "@shikijs/engine-javascript": "1.22.2", + "@shikijs/engine-oniguruma": "1.22.2", + "@shikijs/types": "1.22.2", + "@shikijs/vscode-textmate": "^9.3.0", + "@types/hast": "^3.0.4" + } + }, + "node_modules/source-map-js": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz", + "integrity": "sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA==", + "dev": true, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/space-separated-tokens": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/space-separated-tokens/-/space-separated-tokens-2.0.2.tgz", + "integrity": "sha512-PEGlAwrG8yXGXRjW32fGbg66JAlOAwbObuqVoJpv/mRgoWDQfgH1wDPvtzWyUSNAXBGSk8h755YDbbcEy3SH2Q==", + "dev": true, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/speakingurl": { + "version": "14.0.1", + "resolved": "https://registry.npmjs.org/speakingurl/-/speakingurl-14.0.1.tgz", + "integrity": "sha512-1POYv7uv2gXoyGFpBCmpDVSNV74IfsWlDW216UPjbWufNf+bSU6GdbDsxdcxtfwb4xlI3yxzOTKClUosxARYrQ==", + "dev": true, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/stringify-entities": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/stringify-entities/-/stringify-entities-4.0.4.tgz", + "integrity": "sha512-IwfBptatlO+QCJUo19AqvrPNqlVMpW9YEL2LIVY+Rpv2qsjCGxaDLNRgeGsQWJhfItebuJhsGSLjaBbNSQ+ieg==", + "dev": true, + "dependencies": { + "character-entities-html4": "^2.0.0", + "character-entities-legacy": "^3.0.0" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/superjson": { + "version": "2.2.1", + "resolved": "https://registry.npmjs.org/superjson/-/superjson-2.2.1.tgz", + "integrity": "sha512-8iGv75BYOa0xRJHK5vRLEjE2H/i4lulTjzpUXic3Eg8akftYjkmQDa8JARQ42rlczXyFR3IeRoeFCc7RxHsYZA==", + "dev": true, + "dependencies": { + "copy-anything": "^3.0.2" + }, + "engines": { + "node": ">=16" + } + }, + "node_modules/tabbable": { + "version": "6.2.0", + "resolved": "https://registry.npmjs.org/tabbable/-/tabbable-6.2.0.tgz", + "integrity": "sha512-Cat63mxsVJlzYvN51JmVXIgNoUokrIaT2zLclCXjRd8boZ0004U4KCs/sToJ75C6sdlByWxpYnb5Boif1VSFew==", + "dev": true + }, + "node_modules/trim-lines": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/trim-lines/-/trim-lines-3.0.1.tgz", + "integrity": "sha512-kRj8B+YHZCc9kQYdWfJB2/oUl9rA99qbowYYBtr4ui4mZyAQ2JpvVBd/6U2YloATfqBhBTSMhTpgBHtU0Mf3Rg==", + "dev": true, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/unist-util-is": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/unist-util-is/-/unist-util-is-6.0.0.tgz", + "integrity": "sha512-2qCTHimwdxLfz+YzdGfkqNlH0tLi9xjTnHddPmJwtIG9MGsdbutfTc4P+haPD7l7Cjxf/WZj+we5qfVPvvxfYw==", + "dev": true, + "dependencies": { + "@types/unist": "^3.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/unist-util-position": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/unist-util-position/-/unist-util-position-5.0.0.tgz", + "integrity": "sha512-fucsC7HjXvkB5R3kTCO7kUjRdrS0BJt3M/FPxmHMBOm8JQi2BsHAHFsy27E0EolP8rp0NzXsJ+jNPyDWvOJZPA==", + "dev": true, + "dependencies": { + "@types/unist": "^3.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/unist-util-stringify-position": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/unist-util-stringify-position/-/unist-util-stringify-position-4.0.0.tgz", + "integrity": "sha512-0ASV06AAoKCDkS2+xw5RXJywruurpbC4JZSm7nr7MOt1ojAzvyyaO+UxZf18j8FCF6kmzCZKcAgN/yu2gm2XgQ==", + "dev": true, + "dependencies": { + "@types/unist": "^3.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/unist-util-visit": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/unist-util-visit/-/unist-util-visit-5.0.0.tgz", + "integrity": "sha512-MR04uvD+07cwl/yhVuVWAtw+3GOR/knlL55Nd/wAdblk27GCVt3lqpTivy/tkJcZoNPzTwS1Y+KMojlLDhoTzg==", + "dev": true, + "dependencies": { + "@types/unist": "^3.0.0", + "unist-util-is": "^6.0.0", + "unist-util-visit-parents": "^6.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/unist-util-visit-parents": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/unist-util-visit-parents/-/unist-util-visit-parents-6.0.1.tgz", + "integrity": "sha512-L/PqWzfTP9lzzEa6CKs0k2nARxTdZduw3zyh8d2NVBnsyvHjSX4TWse388YrrQKbvI8w20fGjGlhgT96WwKykw==", + "dev": true, + "dependencies": { + "@types/unist": "^3.0.0", + "unist-util-is": "^6.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/vfile": { + "version": "6.0.3", + "resolved": "https://registry.npmjs.org/vfile/-/vfile-6.0.3.tgz", + "integrity": "sha512-KzIbH/9tXat2u30jf+smMwFCsno4wHVdNmzFyL+T/L3UGqqk6JKfVqOFOZEpZSHADH1k40ab6NUIXZq422ov3Q==", + "dev": true, + "dependencies": { + "@types/unist": "^3.0.0", + "vfile-message": "^4.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/vfile-message": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/vfile-message/-/vfile-message-4.0.2.tgz", + "integrity": "sha512-jRDZ1IMLttGj41KcZvlrYAaI3CfqpLpfpf+Mfig13viT6NKvRzWZ+lXz0Y5D60w6uJIBAOGq9mSHf0gktF0duw==", + "dev": true, + "dependencies": { + "@types/unist": "^3.0.0", + "unist-util-stringify-position": "^4.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/vite": { + "version": "5.4.10", + "resolved": "https://registry.npmjs.org/vite/-/vite-5.4.10.tgz", + "integrity": "sha512-1hvaPshuPUtxeQ0hsVH3Mud0ZanOLwVTneA1EgbAM5LhaZEqyPWGRQ7BtaMvUrTDeEaC8pxtj6a6jku3x4z6SQ==", + "dev": true, + "dependencies": { + "esbuild": "^0.21.3", + "postcss": "^8.4.43", + "rollup": "^4.20.0" + }, + "bin": { + "vite": "bin/vite.js" + }, + "engines": { + "node": "^18.0.0 || >=20.0.0" + }, + "funding": { + "url": "https://github.com/vitejs/vite?sponsor=1" + }, + "optionalDependencies": { + "fsevents": "~2.3.3" + }, + "peerDependencies": { + "@types/node": "^18.0.0 || >=20.0.0", + "less": "*", + "lightningcss": "^1.21.0", + "sass": "*", + "sass-embedded": "*", + "stylus": "*", + "sugarss": "*", + "terser": "^5.4.0" + }, + "peerDependenciesMeta": { + "@types/node": { + "optional": true + }, + "less": { + "optional": true + }, + "lightningcss": { + "optional": true + }, + "sass": { + "optional": true + }, + "sass-embedded": { + "optional": true + }, + "stylus": { + "optional": true + }, + "sugarss": { + "optional": true + }, + "terser": { + "optional": true + } + } + }, + "node_modules/vitepress": { + "version": "1.4.3", + "resolved": "https://registry.npmjs.org/vitepress/-/vitepress-1.4.3.tgz", + "integrity": "sha512-956c2K2Mr0ubY9bTc2lCJD3g0mgo0mARB1iJC/BqUt4s0AM8Wl60wSU4zbFnzV7X2miFK1XJDKzGZnuEN90umw==", + "dev": true, + "dependencies": { + "@docsearch/css": "^3.6.2", + "@docsearch/js": "^3.6.2", + "@shikijs/core": "^1.22.2", + "@shikijs/transformers": "^1.22.2", + "@shikijs/types": "^1.22.2", + "@types/markdown-it": "^14.1.2", + "@vitejs/plugin-vue": "^5.1.4", + "@vue/devtools-api": "^7.5.4", + "@vue/shared": "^3.5.12", + "@vueuse/core": "^11.1.0", + "@vueuse/integrations": "^11.1.0", + "focus-trap": "^7.6.0", + "mark.js": "8.11.1", + "minisearch": "^7.1.0", + "shiki": "^1.22.2", + "vite": "^5.4.10", + "vue": "^3.5.12" + }, + "bin": { + "vitepress": "bin/vitepress.js" + }, + "peerDependencies": { + "markdown-it-mathjax3": "^4", + "postcss": "^8" + }, + "peerDependenciesMeta": { + "markdown-it-mathjax3": { + "optional": true + }, + "postcss": { + "optional": true + } + } + }, + "node_modules/vue": { + "version": "3.5.12", + "resolved": "https://registry.npmjs.org/vue/-/vue-3.5.12.tgz", + "integrity": "sha512-CLVZtXtn2ItBIi/zHZ0Sg1Xkb7+PU32bJJ8Bmy7ts3jxXTcbfsEfBivFYYWz1Hur+lalqGAh65Coin0r+HRUfg==", + "dev": true, + "dependencies": { + "@vue/compiler-dom": "3.5.12", + "@vue/compiler-sfc": "3.5.12", + "@vue/runtime-dom": "3.5.12", + "@vue/server-renderer": "3.5.12", + "@vue/shared": "3.5.12" + }, + "peerDependencies": { + "typescript": "*" + }, + "peerDependenciesMeta": { + "typescript": { + "optional": true + } + } + }, + "node_modules/zwitch": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/zwitch/-/zwitch-2.0.4.tgz", + "integrity": "sha512-bXE4cR/kVZhKZX/RjPEflHaKVhUVl85noU3v6b8apfQEc1x4A+zBxjZ4lN8LqGd6WZ3dl98pY4o717VFmoPp+A==", + "dev": true, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + } + } +} diff --git a/lib/docs/package.json b/lib/docs/package.json new file mode 100644 index 00000000..f794a384 --- /dev/null +++ b/lib/docs/package.json @@ -0,0 +1,18 @@ +{ + "name": "lisa-docs", + "private": true, + "version": "1.0.0", + "description": "Documentation of LISA", + "scripts": { + "prebuild": "(cd ../../ && npm run generateSchemaDocs)", + "build": "npm run docs:build", + "docs:dev": "vitepress dev .", + "docs:build": "vitepress build .", + "docs:preview": "vitepress preview ." + }, + "author": "", + "license": "Apache-2.0", + "devDependencies": { + "vitepress": "^1.4.3" + } +} diff --git a/lib/docs/public/favicon.ico b/lib/docs/public/favicon.ico new file mode 100644 index 00000000..643489f2 Binary files /dev/null and b/lib/docs/public/favicon.ico differ diff --git a/lib/docs/public/logo-dark.svg b/lib/docs/public/logo-dark.svg new file mode 100644 index 00000000..31321de6 --- /dev/null +++ b/lib/docs/public/logo-dark.svg @@ -0,0 +1 @@ + diff --git a/lib/docs/public/logo-light.svg b/lib/docs/public/logo-light.svg new file mode 100644 index 00000000..af06d08f --- /dev/null +++ b/lib/docs/public/logo-light.svg @@ -0,0 +1 @@ + diff --git a/lib/docs/user/breaking-changes.md b/lib/docs/user/breaking-changes.md new file mode 100644 index 00000000..b71fc184 --- /dev/null +++ b/lib/docs/user/breaking-changes.md @@ -0,0 +1,80 @@ +# Breaking Changes + +## Migrating to v3.2.0 + +With the release of LISA v3.2.0, we have implemented a significant update to the configuration file schema to streamline +the deployment process. The previous single config.yaml file has been deprecated in favor of a more flexible two-file +system: config-base.yaml and config-custom.yaml. + +The config-base.yaml file now contains default properties, which can be selectively overridden using the +config-custom.yaml file. This new structure allows for greater customization while maintaining a standardized base +configuration. + +To facilitate the transition to this new configuration system, we have developed a migration utility. Users can execute +the command `npm run migrate-properties` to automatically convert their existing config.yaml file into the new +config-custom.yaml format. + +This update enhances the overall flexibility and maintainability of LISA configurations, providing a more robust +foundation for future developments and easier customization for end-users. + +## v2 to v3 Migration + +With the release of LISA v3.0.0, we have introduced several architectural changes that are incompatible with previous +versions. Although these changes may cause some friction for existing users, they aim to simplify the deployment +experience and enhance long-term scalability. The following breaking changes are critical for existing users planning to +upgrade: + +1. Model Deletion Upon Upgrade: Models deployed via EC2 and ECS using the config.yaml file’s ecsModels list will be + deleted during the upgrade process. LISA has migrated to a new model deployment system that manages models + internally, rendering the ecsModels list obsolete. We recommend backing up your model settings to facilitate their + redeployment through the new Model Management API with minimal downtime. +1. Networking Changes and Full Teardown: Core networking changes require a complete teardown of the existing LISA + installation using the make destroy command before upgrading. Cross-stack dependencies have been modified, + necessitating this full teardown to ensure proper application of the v3 infrastructure changes. Additionally, users + may need to manually delete some resources, such as ECR repositories or S3 buckets, if they were populated before + CloudFormation began deleting the stack. This operation is destructive and irreversible, so it is crucial to back up + any critical configurations and data (e.g., S3 RAG bucket contents, DynamoDB token tables) before proceeding with the + upgrade. +1. New LiteLLM Admin Key Requirement: The new Model Management API requires an "admin" key for LiteLLM to track models + for inference requests. This key, while transparent to users, must be present and conform to the required format ( + starting with sk-). The key is defined in the config.yaml file, and the LISA schema validator will prompt an error if + it is missing or incorrectly formatted. + +## v3.0.0 to v3.1.0 + +In preparation of the v3.1.0 release, there are several changes that we needed to make in order to ensure the stability +of the LISA system. + +1. The CreateModel API `containerConfig` object has been changed so that the Docker Image repository is listed in + `containerConfig.image.baseImage` instead of + its previous location at `containerConfig.baseImage.baseImage`. This change makes the configuration consistent with + the config.yaml file in LISA v2.0 and prior. +2. The CreateModel API `containerConfig.image` object no longer requires the `path` option. We identified that this was + a confusing and redundant option to set, considering + that the path was based on the LISA code repository structure, and that we already had an option to specify if a + model was using TGI, TEI, or vLLM. Specifying the `inferenceContainer` + is sufficient for the system to infer which files to use so that the user does not have to provide this information. +3. The ApiDeployment stack now follows the same naming convention as the rest of the stacks that we deploy, utilization + the deployment name and the deploymentStage names. This allows users + to have multiple LISA installations with different parameters in the same account without needing to change region or + account entirely. After successful deployment, you may safely delete the + previous `${deploymentStage}-LisaApiDeployment` stack, as it is no longer in use. +4. If you have installed v3.0.0 or v3.0.1, you will need to **delete** the Models API stack so that the model deployer + function will deploy again. The function was converted to a Docker Image + Function so that the growing Function size would fit within the Lambda constraints. We recommend that you take the + following actions to avoid leaked resources: + 1. Use the Model Management UI to **delete all models** from LISA. This is needed so that we delete any + CloudFormation stacks that track GPU instances. Failure to do this will require manual + resource cleanup to rid the account of inaccessible EC2 instances. Once the Models DynamoDB Table is deleted, we + do not have a programmatic way to re-reference deployed models, so that is + why we recommend deleting them first. + 2. **Only after deleting all models through the Model Management UI**, manually delete the Model Management API + stack in CloudFormation. This will take at least 45 minutes due to Lambda's use + of Elastic Network Interfaces for VPC access. The stack name will look like: + `${deployment}-lisa-models-${deploymentStage}`. + 3. After the stack has been deleted, deploy LISA v3.1.0, which will recreate the Models API stack, along with the + Docker Lambda Function. +5. The `ecsModels` section of `config.yaml` has been stripped down to only 3 fields per model: `modelName`, + `inferenceContainer`, and `baseImage`. Just as before, the system will check to see if the models + defined here exist in your models S3 bucket prior to LISA deployment. These values will be needed later when invoking + the Model Management API to create a model. diff --git a/lib/docs/user/chat.md b/lib/docs/user/chat.md new file mode 100644 index 00000000..35425f65 --- /dev/null +++ b/lib/docs/user/chat.md @@ -0,0 +1,175 @@ + +# Chatbot Example + +This repository include an example chatbot web application. The react based web application can be optionally deployed to demonstrate the capabilities of LISA Serve. The chatbot consists of a static react based single page application hosted via API GW S3 proxy integration. The app connects to the LISA Serve REST API and an optional RAG API. The app integrates with an OIDC compatible IdP and allows users to interact directly with any of the textgen models hosted with LISA Serve. If the optional RAG stack is deployed then users can also leverage the embeddings models and AWS OpenSearch or PGVector to demonstrate chat with RAG. Chat sessions are maintained in dynamodb table and a number of parameters are exposed through the UI to allow experimentation with various parameters including prompt, temperature, top k, top p, max tokens, and more. + +## Local development + +### Configuring Pre-Commit Hooks + +To ensure code quality and consistency, this project uses pre-commit hooks. These hooks are configured to perform checks, such as linting and formatting, helping to catch potential issues early. These hooks are run automatically on each push to a remote branch but if you wish to run them locally before each commit, follow these steps: + +1. Install pre-commit: `pip install pre-commit` +2. Install the git hook scripts: `pre-commit install` + +The hooks will now run automatically on changed files but if you wish to test them against all files, run the following command: `pre-commit run --all-files`. + +### Run REST API locally + +``` +cd lib/serve/rest-api +pip install -r src/requirements.txt +export AWS_REGION= +export AUTHORITY= +export CLIENT_ID= +export REGISTERED_MODELS_PS_NAME= +export TOKEN_TABLE_NAME="/LISAApiTokenTable" +gunicorn -k uvicorn.workers.UvicornWorker -w 2 -b "0.0.0.0:8080" "src.main:app" +``` + +### Run example chatbot locally + +Create `lib/user-interface/react/public/env.js` file with the following contents: + +``` +window.env = { + AUTHORITY: '', + CLIENT_ID: '', + JWT_GROUPS_PROP: '', + ADMIN_GROUP: '', + CUSTOM_SCOPES:[], + // Alternatively you can set this to be your REST api elb endpoint + RESTAPI_URI: 'http://localhost:8080/', + API_BASE_URL: 'https://${deployment_id}.execute-api.${regional_domain}/${deployment_stage}', + RESTAPI_VERSION: 'v2', + "MODELS": [ + { + "model": "streaming-textgen-model", + "streaming": true, + "modelType": "textgen" + }, + { + "model": "non-streaming-textgen-model", + "streaming": false, + "modelType": "textgen" + }, + { + "model": "embedding-model", + "streaming": null, + "modelType": "embedding" + } + ] +} +``` + +Launch the Chat UI: + +``` +cd lib/user-interface/react/ +npm run dev +``` + +# Usage and Features + +The LISA Serve endpoint can be used independently of the Chat UI, and the following shows a few examples of how to do that. The Serve endpoint +will still validate user auth, so if you have a Bearer token from the IdP configured with LISA, we will honor it, or if you've set up an API +token using the [DynamoDB instructions](/admin/api-tokens), we will also accept that. This diagram shows the LISA Serve +components that +would be utilized during direct REST API requests. + +## OpenAI Specification Compatibility + +We now provide greater support for the [OpenAI specification](https://platform.openai.com/docs/api-reference) for model inference and embeddings. +We utilize LiteLLM as a proxy for both models we spin up on behalf of the user and additional models configured through the config.yaml file, and because of that, the +LISA REST API endpoint allows for a central location for making text generation and embeddings requests. We support, and are not limited to, the following popular endpoint +routes as long as your underlying models can also respond to them. + +- /models +- /chat/completions +- /completions +- /embeddings + +By supporting the OpenAI spec, we can more easily allow users to integrate their collection of models into their LLM applications and workflows. In LISA, users can authenticate +using their OpenID Connect Identity Provider, or with an API token created through the DynamoDB token workflow as +described [here](/admin/api-tokens). Once the token +is retrieved, users can use that in direct requests to the LISA Serve REST API. If using the IdP, users must set the 'Authorization' header, otherwise if using the API token, +either the 'Api-Key' header or the 'Authorization' header. After that, requests to `https://${lisa_serve_alb}/v2/serve` will handle the OpenAI API calls. As an example, the following call can list all +models that LISA is aware of, assuming usage of the API token. If you are using a self-signed cert, you must also provide the `--cacert $path` option to specify a CA bundle to trust for SSL verification. + +```shell +curl -s -H 'Api-Key: your-token' -X GET https://${lisa_serve_alb}/v2/serve/models +``` + +If using the IdP, the request would look like the following: + +```shell +curl -s -H 'Authorization: Bearer your-token' -X GET https://${lisa_serve_alb}/v2/serve/models +``` + +When using a library that requests an OpenAI-compatible base_url, you can provide `https://${lisa_serve_alb}/v2/serve` here. All of the OpenAI routes will +automatically be added to the base URL, just as we appended `/models` to the `/v2/serve` route for listing all models tracked by LISA. + + +## Continue JetBrains and VS Code Plugin + +For developers that desire an LLM assistant to help with programming tasks, we support adding LISA as an LLM provider for the [Continue plugin](https://www.continue.dev). +To add LISA as a provider, open up the Continue plugin's `config.json` file and locate the `models` list. In this list, add the following block, replacing the placeholder URL +with your own REST API domain or ALB. The `/v2/serve` is required at the end of the `apiBase`. This configuration +requires an API token as created through the [DynamoDB workflow](/admin/api-tokens). + +```json +{ + "model": "AUTODETECT", + "title": "LISA", + "apiBase": "https:///v2/serve", + "provider": "openai", + "apiKey": "your-api-token" // pragma: allowlist-secret +} +``` + +Once you save the `config.json` file, the Continue plugin will call the `/models` API to get a list of models at your disposal. The ones provided by LISA will be prefaced +with "LISA" or with the string you place in the `title` field of the config above. Once the configuration is complete and a model is selected, you can use that model to +generate code and perform AI assistant tasks within your development environment. See the [Continue documentation](https://docs.continue.dev/how-to-use-continue) for more +information about its features, capabilities, and usage. + +### Usage in LLM Libraries + +If your workflow includes using libraries, such as [LangChain](https://python.langchain.com/v0.2/docs/introduction/) or [OpenAI](https://github.com/openai/openai-python), +then you can place LISA right in your application by changing only the endpoint and headers for the client objects. As an example, using the OpenAI library, the client would +normally be instantiated and invoked with the following block. + +```python +from openai import OpenAI + +client = OpenAI( + api_key="my_key" # pragma: allowlist-secret not a real key +) +client.models.list() +``` + +To use the models being served by LISA, the client needs only a few changes: + +1. Specify the `base_url` as the LISA Serve ALB, using the /v2/serve route at the end, similar to the apiBase in the [Continue example](#continue-jetbrains-and-vs-code-plugin) +2. Add the API key that you generated from the [token generation steps](/admin/api-tokens) as your `api_key` field. +3. If using a self-signed cert, you must provide a certificate path for validating SSL. If you're using an ACM or public cert, then this may be omitted. +1. We provide a convenience function in the `lisa-sdk` for generating a cert path from an IAM certificate ARN if one is provided in the `RESTAPI_SSL_CERT_ARN` environment variable. + +The Code block will now look like this and you can continue to use the library without any other modifications. + +```python +# for self-signed certificates +import boto3 +from lisapy.utils import get_cert_path +# main client library +from openai import DefaultHttpxClient, OpenAI + +iam_client = boto3.client("iam") +cert_path = get_cert_path(iam_client) + +client = OpenAI( + api_key="my_key", # pragma: allowlist-secret not a real key + base_url="https:///v2/serve", + http_client=DefaultHttpxClient(verify=cert_path), # needed for self-signed certs on your ALB, can be omitted otherwise +) +client.models.list() +``` diff --git a/lib/docs/user/context-windows.md b/lib/docs/user/context-windows.md new file mode 100644 index 00000000..46409041 --- /dev/null +++ b/lib/docs/user/context-windows.md @@ -0,0 +1 @@ +# TODO diff --git a/lib/docs/user/history.md b/lib/docs/user/history.md new file mode 100644 index 00000000..46409041 --- /dev/null +++ b/lib/docs/user/history.md @@ -0,0 +1 @@ +# TODO diff --git a/lib/docs/user/model-kwargs.md b/lib/docs/user/model-kwargs.md new file mode 100644 index 00000000..46409041 --- /dev/null +++ b/lib/docs/user/model-kwargs.md @@ -0,0 +1 @@ +# TODO diff --git a/lib/docs/user/model-management-ui.md b/lib/docs/user/model-management-ui.md new file mode 100644 index 00000000..46409041 --- /dev/null +++ b/lib/docs/user/model-management-ui.md @@ -0,0 +1 @@ +# TODO diff --git a/lib/docs/user/models.md b/lib/docs/user/models.md new file mode 100644 index 00000000..46409041 --- /dev/null +++ b/lib/docs/user/models.md @@ -0,0 +1 @@ +# TODO diff --git a/lib/docs/user/nonrag-management.md b/lib/docs/user/nonrag-management.md new file mode 100644 index 00000000..46409041 --- /dev/null +++ b/lib/docs/user/nonrag-management.md @@ -0,0 +1 @@ +# TODO diff --git a/lib/docs/user/prompt-engineering.md b/lib/docs/user/prompt-engineering.md new file mode 100644 index 00000000..46409041 --- /dev/null +++ b/lib/docs/user/prompt-engineering.md @@ -0,0 +1 @@ +# TODO diff --git a/lib/docs/user/rag.md b/lib/docs/user/rag.md new file mode 100644 index 00000000..46409041 --- /dev/null +++ b/lib/docs/user/rag.md @@ -0,0 +1 @@ +# TODO diff --git a/lib/models/docker-image-builder.ts b/lib/models/docker-image-builder.ts index 664843d6..98c05bd9 100644 --- a/lib/models/docker-image-builder.ts +++ b/lib/models/docker-image-builder.ts @@ -15,18 +15,28 @@ */ import { Construct } from 'constructs'; -import { Code, Function } from 'aws-cdk-lib/aws-lambda'; -import { Role, InstanceProfile, ServicePrincipal, ManagedPolicy, Policy, PolicyStatement } from 'aws-cdk-lib/aws-iam'; +import { Code, Function, Runtime } from 'aws-cdk-lib/aws-lambda'; +import { + Role, + InstanceProfile, + ServicePrincipal, + ManagedPolicy, + Policy, + PolicyStatement +} from 'aws-cdk-lib/aws-iam'; import { Stack, Duration } from 'aws-cdk-lib'; import { Bucket } from 'aws-cdk-lib/aws-s3'; import { BucketDeployment, Source } from 'aws-cdk-lib/aws-s3-deployment'; import { createCdkId } from '../core/utils'; import { BaseProps } from '../schema'; +import { Vpc } from '../networking/vpc'; +import { Queue } from 'aws-cdk-lib/aws-sqs'; export type DockerImageBuilderProps = BaseProps & { ecrUri: string; mountS3DebUrl: string; + vpc?: Vpc; }; export class DockerImageBuilder extends Construct { @@ -88,7 +98,13 @@ export class DockerImageBuilder extends Construct { new PolicyStatement({ actions: [ 'ec2:RunInstances', - 'ec2:CreateTags' + 'ec2:CreateTags', + 'ec2:CreateNetworkInterface', + 'ec2:DescribeNetworkInterfaces', + 'ec2:DescribeSubnets', + 'ec2:DeleteNetworkInterface', + 'ec2:AssignPrivateIpAddresses', + 'ec2:UnassignPrivateIpAddresses' ], resources: ['*'] }), @@ -114,19 +130,27 @@ export class DockerImageBuilder extends Construct { const functionId = createCdkId([stackName, 'docker-image-builder']); this.dockerImageBuilderFn = new Function(this, functionId, { + deadLetterQueueEnabled: true, + deadLetterQueue: new Queue(this, 'docker-image-builderDLQ', { + queueName: 'docker-image-builderDLQ', + enforceSSL: true, + }), functionName: functionId, - runtime: props.config.lambdaConfig.pythonRuntime, + runtime: Runtime.PYTHON_3_10, handler: 'dockerimagebuilder.handler', code: Code.fromAsset('./lambda/'), timeout: Duration.minutes(1), memorySize: 1024, + reservedConcurrentExecutions: 10, role: role, environment: { 'LISA_DOCKER_BUCKET': ec2DockerBucket.bucketName, 'LISA_ECR_URI': props.ecrUri, 'LISA_INSTANCE_PROFILE': ec2InstanceProfile.instanceProfileArn, 'LISA_MOUNTS3_DEB_URL': props.mountS3DebUrl - } + }, + vpc: props.vpc?.subnetSelection ? props.vpc?.vpc : undefined, + vpcSubnets: props.vpc?.subnetSelection, }); } diff --git a/lib/models/ecs-model-deployer.ts b/lib/models/ecs-model-deployer.ts index 9c64949d..27a0866f 100644 --- a/lib/models/ecs-model-deployer.ts +++ b/lib/models/ecs-model-deployer.ts @@ -16,16 +16,17 @@ import { Construct } from 'constructs'; import { DockerImageCode, DockerImageFunction, IFunction } from 'aws-cdk-lib/aws-lambda'; -import { Role, ServicePrincipal, ManagedPolicy, Policy, PolicyStatement } from 'aws-cdk-lib/aws-iam'; +import { Role, ServicePrincipal, ManagedPolicy, Policy, PolicyStatement, Effect } from 'aws-cdk-lib/aws-iam'; import { Stack, Duration, Size } from 'aws-cdk-lib'; import { createCdkId } from '../core/utils'; import { BaseProps, Config } from '../schema'; +import { Vpc } from '../networking/vpc'; export type ECSModelDeployerProps = { - vpcId: string; securityGroupId: string; config: Config; + vpc: Vpc; } & BaseProps; export class ECSModelDeployer extends Construct { @@ -42,6 +43,18 @@ export class ECSModelDeployer extends Construct { new PolicyStatement({ actions: ['sts:AssumeRole'], resources: ['arn:*:iam::*:role/cdk-*'] + }), + new PolicyStatement({ + effect: Effect.ALLOW, + actions: [ + 'ec2:CreateNetworkInterface', + 'ec2:DescribeNetworkInterfaces', + 'ec2:DescribeSubnets', + 'ec2:DeleteNetworkInterface', + 'ec2:AssignPrivateIpAddresses', + 'ec2:UnassignPrivateIpAddresses' + ], + resources: ['*'], }) ] }); @@ -57,7 +70,8 @@ export class ECSModelDeployer extends Construct { 'removalPolicy': props.config.removalPolicy, 's3BucketModels': props.config.s3BucketModels, 'mountS3DebUrl': props.config.mountS3DebUrl, - 'permissionsBoundaryAspect': props.config.permissionsBoundaryAspect + 'permissionsBoundaryAspect': props.config.permissionsBoundaryAspect, + 'subnets': props.config.subnets }; const functionId = createCdkId([stackName, 'ecs_model_deployer']); @@ -69,10 +83,12 @@ export class ECSModelDeployer extends Construct { memorySize: 1024, role: role, environment: { - 'LISA_VPC_ID': props.vpcId, + 'LISA_VPC_ID': props.vpc?.vpc.vpcId, 'LISA_SECURITY_GROUP_ID': props.securityGroupId, 'LISA_CONFIG': JSON.stringify(stripped_config) - } + }, + vpc: props.vpc?.subnetSelection ? props.vpc?.vpc : undefined, + vpcSubnets: props.vpc?.subnetSelection, }); } } diff --git a/lib/models/model-api.ts b/lib/models/model-api.ts index e18afc5c..cdb2224e 100644 --- a/lib/models/model-api.ts +++ b/lib/models/model-api.ts @@ -28,7 +28,7 @@ import { Role, ServicePrincipal, } from 'aws-cdk-lib/aws-iam'; -import { LayerVersion } from 'aws-cdk-lib/aws-lambda'; +import { LayerVersion, Runtime } from 'aws-cdk-lib/aws-lambda'; import { StringParameter } from 'aws-cdk-lib/aws-ssm'; import { Construct } from 'constructs'; @@ -114,14 +114,15 @@ export class ModelsApi extends Construct { const ecsModelDeployer = new ECSModelDeployer(this, 'ecs-model-deployer', { securityGroupId: vpc.securityGroups.ecsModelAlbSg.securityGroupId, - vpcId: vpc.vpc.vpcId, - config: config + config: config, + vpc: vpc }); const dockerImageBuilder = new DockerImageBuilder(this, 'docker-image-builder', { ecrUri: ecsModelBuildRepo.repositoryUri, mountS3DebUrl: config.mountS3DebUrl!, - config: config + config: config, + vpc }); const managementKeyName = StringParameter.valueForStringParameter(this, `${config.deploymentPrefix}/managementKeySecretName`); @@ -175,6 +176,18 @@ export class ModelsApi extends Construct { ], resources: ['*'] }), + new PolicyStatement({ + effect: Effect.ALLOW, + actions: [ + 'ec2:CreateNetworkInterface', + 'ec2:DescribeNetworkInterfaces', + 'ec2:DescribeSubnets', + 'ec2:DeleteNetworkInterface', + 'ec2:AssignPrivateIpAddresses', + 'ec2:UnassignPrivateIpAddresses' + ], + resources: ['*'], + }), new PolicyStatement({ effect: Effect.ALLOW, actions: [ @@ -219,7 +232,7 @@ export class ModelsApi extends Construct { modelTable: modelTable, lambdaLayers: [commonLambdaLayer, fastapiLambdaLayer], role: stateMachinesLambdaRole, - vpc: vpc.vpc, + vpc: vpc, securityGroups: securityGroups, dockerImageBuilderFnArn: dockerImageBuilder.dockerImageBuilderFn.functionArn, ecsModelDeployerFnArn: ecsModelDeployer.ecsModelDeployerFn.functionArn, @@ -233,7 +246,7 @@ export class ModelsApi extends Construct { modelTable: modelTable, lambdaLayers: [commonLambdaLayer, fastapiLambdaLayer], role: stateMachinesLambdaRole, - vpc: vpc.vpc, + vpc: vpc, securityGroups: securityGroups, restApiContainerEndpointPs: lisaServeEndpointUrlPs, managementKeyName: managementKeyName, @@ -244,7 +257,7 @@ export class ModelsApi extends Construct { modelTable: modelTable, lambdaLayers: [commonLambdaLayer, fastapiLambdaLayer], role: stateMachinesLambdaRole, - vpc: vpc.vpc, + vpc: vpc, securityGroups: securityGroups, restApiContainerEndpointPs: lisaServeEndpointUrlPs, managementKeyName: managementKeyName, @@ -252,8 +265,8 @@ export class ModelsApi extends Construct { const environment = { LISA_API_URL_PS_NAME: lisaServeEndpointUrlPs.parameterName, - REST_API_VERSION: config.restApiConfig.apiVersion, - RESTAPI_SSL_CERT_ARN: config.restApiConfig.loadBalancerConfig.sslCertIamArn ?? '', + REST_API_VERSION: 'v2', + RESTAPI_SSL_CERT_ARN: config.restApiConfig?.sslCertIamArn ?? '', CREATE_SFN_ARN: createModelStateMachine.stateMachineArn, DELETE_SFN_ARN: deleteModelStateMachine.stateMachineArn, UPDATE_SFN_ARN: updateModelStateMachine.stateMachineArn, @@ -266,7 +279,7 @@ export class ModelsApi extends Construct { this, restApi, authorizer, - config.lambdaSourcePath, + './lambda', [commonLambdaLayer, fastapiLambdaLayer], { name: 'handler', @@ -276,19 +289,19 @@ export class ModelsApi extends Construct { method: 'ANY', environment }, - config.lambdaConfig.pythonRuntime, + Runtime.PYTHON_3_10, lambdaRole, - vpc.vpc, + vpc, securityGroups, ); lisaServeEndpointUrlPs.grantRead(lambdaFunction.role!); - if (config.restApiConfig.loadBalancerConfig.sslCertIamArn) { + if (config.restApiConfig?.sslCertIamArn) { const certPerms = new Policy(this, 'ModelsApiCertPerms', { statements: [ new PolicyStatement({ actions: ['iam:GetServerCertificate'], - resources: [config.restApiConfig.loadBalancerConfig.sslCertIamArn], + resources: [config.restApiConfig?.sslCertIamArn], effect: Effect.ALLOW, }) ] @@ -345,12 +358,12 @@ export class ModelsApi extends Construct { this, restApi, authorizer, - config.lambdaSourcePath, + './lambda', [commonLambdaLayer], f, - config.lambdaConfig.pythonRuntime, + Runtime.PYTHON_3_10, lambdaRole, - vpc.vpc, + vpc, securityGroups, ); }); diff --git a/lib/models/state-machine/create-model.ts b/lib/models/state-machine/create-model.ts index 946553f4..d76ec044 100644 --- a/lib/models/state-machine/create-model.ts +++ b/lib/models/state-machine/create-model.ts @@ -27,13 +27,15 @@ import { Construct } from 'constructs'; import { Duration } from 'aws-cdk-lib'; import { BaseProps } from '../../schema'; import { ITable } from 'aws-cdk-lib/aws-dynamodb'; -import { Code, Function, ILayerVersion } from 'aws-cdk-lib/aws-lambda'; +import { Code, Function, ILayerVersion, Runtime } from 'aws-cdk-lib/aws-lambda'; import { IRole } from 'aws-cdk-lib/aws-iam'; import { LAMBDA_MEMORY, LAMBDA_TIMEOUT, OUTPUT_PATH, POLLING_TIMEOUT } from './constants'; -import { ISecurityGroup, IVpc } from 'aws-cdk-lib/aws-ec2'; +import { ISecurityGroup } from 'aws-cdk-lib/aws-ec2'; import { LambdaInvoke } from 'aws-cdk-lib/aws-stepfunctions-tasks'; import { Repository } from 'aws-cdk-lib/aws-ecr'; import { IStringParameter } from 'aws-cdk-lib/aws-ssm'; +import { Vpc } from '../../networking/vpc'; +import { Queue } from 'aws-cdk-lib/aws-sqs'; type CreateModelStateMachineProps = BaseProps & { modelTable: ITable, @@ -42,7 +44,7 @@ type CreateModelStateMachineProps = BaseProps & { ecsModelDeployerFnArn: string; ecsModelImageRepository: Repository; role?: IRole, - vpc?: IVpc, + vpc?: Vpc, securityGroups?: ISecurityGroup[]; restApiContainerEndpointPs: IStringParameter; managementKeyName: string @@ -66,20 +68,27 @@ export class CreateModelStateMachine extends Construct { ECS_MODEL_DEPLOYER_FN_ARN: ecsModelDeployerFnArn, LISA_API_URL_PS_NAME: restApiContainerEndpointPs.parameterName, MODEL_TABLE_NAME: modelTable.tableName, - REST_API_VERSION: config.restApiConfig.apiVersion, + REST_API_VERSION: 'v2', MANAGEMENT_KEY_NAME: managementKeyName, - RESTAPI_SSL_CERT_ARN: config.restApiConfig.loadBalancerConfig.sslCertIamArn ?? '', + RESTAPI_SSL_CERT_ARN: config.restApiConfig?.sslCertIamArn ?? '', }; const setModelToCreating = new LambdaInvoke(this, 'SetModelToCreating', { lambdaFunction: new Function(this, 'SetModelToCreatingFunc', { - runtime: config.lambdaConfig.pythonRuntime, + deadLetterQueueEnabled: true, + deadLetterQueue: new Queue(this, 'SetModelToCreatingDLQ', { + queueName: 'SetModelToCreatingDLQ', + enforceSSL: true, + }), + runtime: Runtime.PYTHON_3_10, handler: 'models.state_machine.create_model.handle_set_model_to_creating', - code: Code.fromAsset(config.lambdaSourcePath), + code: Code.fromAsset('./lambda'), timeout: LAMBDA_TIMEOUT, memorySize: LAMBDA_MEMORY, + reservedConcurrentExecutions: 5, role: role, - vpc: vpc, + vpc: vpc?.vpc, + vpcSubnets: vpc?.subnetSelection, securityGroups: securityGroups, layers: lambdaLayers, environment: environment, @@ -91,13 +100,20 @@ export class CreateModelStateMachine extends Construct { const startCopyDockerImage = new LambdaInvoke(this, 'StartCopyDockerImage', { lambdaFunction: new Function(this, 'StartCopyDockerImageFunc', { - runtime: config.lambdaConfig.pythonRuntime, + deadLetterQueueEnabled: true, + deadLetterQueue: new Queue(this, 'StartCopyDockerImageDLQ', { + queueName: 'StartCopyDockerImageDLQ', + enforceSSL: true, + }), + runtime: Runtime.PYTHON_3_10, handler: 'models.state_machine.create_model.handle_start_copy_docker_image', - code: Code.fromAsset(config.lambdaSourcePath), + code: Code.fromAsset('./lambda'), timeout: LAMBDA_TIMEOUT, memorySize: LAMBDA_MEMORY, + reservedConcurrentExecutions: 5, role: role, - vpc: vpc, + vpc: vpc?.vpc, + vpcSubnets: vpc?.subnetSelection, securityGroups: securityGroups, layers: lambdaLayers, environment: environment, @@ -107,13 +123,20 @@ export class CreateModelStateMachine extends Construct { const pollDockerImageAvailable = new LambdaInvoke(this, 'PollDockerImageAvailable', { lambdaFunction: new Function(this, 'PollDockerImageAvailableFunc', { - runtime: config.lambdaConfig.pythonRuntime, + deadLetterQueueEnabled: true, + deadLetterQueue: new Queue(this, 'PollDockerImageAvailableDLQ', { + queueName: 'PollDockerImageAvailableDLQ', + enforceSSL: true, + }), + runtime: Runtime.PYTHON_3_10, handler: 'models.state_machine.create_model.handle_poll_docker_image_available', - code: Code.fromAsset(config.lambdaSourcePath), + code: Code.fromAsset('./lambda'), timeout: LAMBDA_TIMEOUT, memorySize: LAMBDA_MEMORY, + reservedConcurrentExecutions: 5, role: role, - vpc: vpc, + vpc: vpc?.vpc, + vpcSubnets: vpc?.subnetSelection, securityGroups: securityGroups, layers: lambdaLayers, environment: environment, @@ -123,13 +146,20 @@ export class CreateModelStateMachine extends Construct { const handleFailureState = new LambdaInvoke(this, 'HandleFailure', { lambdaFunction: new Function(this, 'HandleFailureFunc', { - runtime: config.lambdaConfig.pythonRuntime, + deadLetterQueueEnabled: true, + deadLetterQueue: new Queue(this, 'HandleFailureDLQ', { + queueName: 'HandleFailureDLQ', + enforceSSL: true, + }), + runtime: Runtime.PYTHON_3_10, handler: 'models.state_machine.create_model.handle_failure', - code: Code.fromAsset(config.lambdaSourcePath), + code: Code.fromAsset('./lambda'), timeout: LAMBDA_TIMEOUT, memorySize: LAMBDA_MEMORY, + reservedConcurrentExecutions: 5, role: role, - vpc: vpc, + vpc: vpc?.vpc, + vpcSubnets: vpc?.subnetSelection, securityGroups: securityGroups, layers: lambdaLayers, environment: environment, @@ -145,13 +175,20 @@ export class CreateModelStateMachine extends Construct { const startCreateStack = new LambdaInvoke(this, 'StartCreateStack', { lambdaFunction: new Function(this, 'StartCreateStackFunc', { - runtime: config.lambdaConfig.pythonRuntime, + deadLetterQueueEnabled: true, + deadLetterQueue: new Queue(this, 'StartCreateStackDLQ', { + queueName: 'StartCreateStackDLQ', + enforceSSL: true, + }), + runtime: Runtime.PYTHON_3_10, handler: 'models.state_machine.create_model.handle_start_create_stack', - code: Code.fromAsset(config.lambdaSourcePath), + code: Code.fromAsset('./lambda'), timeout: Duration.minutes(8), memorySize: LAMBDA_MEMORY, + reservedConcurrentExecutions: 5, role: role, - vpc: vpc, + vpc: vpc?.vpc, + vpcSubnets: vpc?.subnetSelection, securityGroups: securityGroups, layers: lambdaLayers, environment: environment, @@ -161,13 +198,20 @@ export class CreateModelStateMachine extends Construct { const pollCreateStack = new LambdaInvoke(this, 'PollCreateStack', { lambdaFunction: new Function(this, 'PollCreateStackFunc', { - runtime: config.lambdaConfig.pythonRuntime, + deadLetterQueueEnabled: true, + deadLetterQueue: new Queue(this, 'PollCreateStackDLQ', { + queueName: 'PollCreateStackDLQ', + enforceSSL: true, + }), + runtime: Runtime.PYTHON_3_10, handler: 'models.state_machine.create_model.handle_poll_create_stack', - code: Code.fromAsset(config.lambdaSourcePath), + code: Code.fromAsset('./lambda'), timeout: LAMBDA_TIMEOUT, memorySize: LAMBDA_MEMORY, + reservedConcurrentExecutions: 5, role: role, - vpc: vpc, + vpc: vpc?.vpc, + vpcSubnets: vpc?.subnetSelection, securityGroups: securityGroups, layers: lambdaLayers, environment: environment, @@ -183,13 +227,20 @@ export class CreateModelStateMachine extends Construct { const addModelToLitellm = new LambdaInvoke(this, 'AddModelToLitellm', { lambdaFunction: new Function(this, 'AddModelToLitellmFunc', { - runtime: config.lambdaConfig.pythonRuntime, + deadLetterQueueEnabled: true, + deadLetterQueue: new Queue(this, 'AddModelToLitellmDLQ', { + queueName: 'AddModelToLitellmDLQ', + enforceSSL: true, + }), + runtime: Runtime.PYTHON_3_10, handler: 'models.state_machine.create_model.handle_add_model_to_litellm', - code: Code.fromAsset(config.lambdaSourcePath), + code: Code.fromAsset('./lambda'), timeout: LAMBDA_TIMEOUT, memorySize: LAMBDA_MEMORY, + reservedConcurrentExecutions: 5, role: role, - vpc: vpc, + vpc: vpc?.vpc, + vpcSubnets: vpc?.subnetSelection, securityGroups: securityGroups, layers: lambdaLayers, environment: environment, diff --git a/lib/models/state-machine/delete-model.ts b/lib/models/state-machine/delete-model.ts index 3016a08b..29b2389e 100644 --- a/lib/models/state-machine/delete-model.ts +++ b/lib/models/state-machine/delete-model.ts @@ -25,19 +25,21 @@ import { Succeed, Wait, } from 'aws-cdk-lib/aws-stepfunctions'; -import { Code, Function, ILayerVersion } from 'aws-cdk-lib/aws-lambda'; +import { Code, Function, ILayerVersion, Runtime } from 'aws-cdk-lib/aws-lambda'; import { BaseProps } from '../../schema'; import { IRole } from 'aws-cdk-lib/aws-iam'; -import { ISecurityGroup, IVpc } from 'aws-cdk-lib/aws-ec2'; +import { ISecurityGroup } from 'aws-cdk-lib/aws-ec2'; import { ITable } from 'aws-cdk-lib/aws-dynamodb'; import { LAMBDA_MEMORY, LAMBDA_TIMEOUT, OUTPUT_PATH, POLLING_TIMEOUT } from './constants'; import { IStringParameter } from 'aws-cdk-lib/aws-ssm'; +import { Vpc } from '../../networking/vpc'; +import { Queue } from 'aws-cdk-lib/aws-sqs'; type DeleteModelStateMachineProps = BaseProps & { modelTable: ITable, lambdaLayers: ILayerVersion[], role?: IRole, - vpc?: IVpc, + vpc?: Vpc, securityGroups?: ISecurityGroup[]; restApiContainerEndpointPs: IStringParameter; managementKeyName: string; @@ -58,22 +60,29 @@ export class DeleteModelStateMachine extends Construct { const environment = { // Environment variables to set in all Lambda functions MODEL_TABLE_NAME: modelTable.tableName, LISA_API_URL_PS_NAME: restApiContainerEndpointPs.parameterName, - REST_API_VERSION: config.restApiConfig.apiVersion, + REST_API_VERSION: 'v2', MANAGEMENT_KEY_NAME: managementKeyName, - RESTAPI_SSL_CERT_ARN: config.restApiConfig.loadBalancerConfig.sslCertIamArn ?? '', + RESTAPI_SSL_CERT_ARN: config.restApiConfig?.sslCertIamArn ?? '', }; // Needs to return if model has a stack to delete or if it is only in LiteLLM. Updates model state to DELETING. // Input payload to state machine contains the model name that we want to delete. const setModelToDeleting = new LambdaInvoke(this, 'SetModelToDeleting', { lambdaFunction: new Function(this, 'SetModelToDeletingFunc', { - runtime: config.lambdaConfig.pythonRuntime, + deadLetterQueueEnabled: true, + deadLetterQueue: new Queue(this, 'SetModelToDeletingDLQ', { + queueName: 'SetModelToDeletingDLQ', + enforceSSL: true, + }), + runtime: Runtime.PYTHON_3_10, handler: 'models.state_machine.delete_model.handle_set_model_to_deleting', - code: Code.fromAsset(config.lambdaSourcePath), + code: Code.fromAsset('./lambda'), timeout: LAMBDA_TIMEOUT, memorySize: LAMBDA_MEMORY, + reservedConcurrentExecutions: 5, role: role, - vpc: vpc, + vpc: vpc?.vpc, + vpcSubnets: vpc?.subnetSelection, securityGroups: securityGroups, layers: lambdaLayers, environment: environment, @@ -83,13 +92,20 @@ export class DeleteModelStateMachine extends Construct { const deleteFromLitellm = new LambdaInvoke(this, 'DeleteFromLitellm', { lambdaFunction: new Function(this, 'DeleteFromLitellmFunc', { - runtime: config.lambdaConfig.pythonRuntime, + deadLetterQueueEnabled: true, + deadLetterQueue: new Queue(this, 'DeleteFromLitellmDLQ', { + queueName: 'DeleteFromLitellmDLQ', + enforceSSL: true, + }), + runtime: Runtime.PYTHON_3_10, handler: 'models.state_machine.delete_model.handle_delete_from_litellm', - code: Code.fromAsset(config.lambdaSourcePath), + code: Code.fromAsset('./lambda'), timeout: LAMBDA_TIMEOUT, memorySize: LAMBDA_MEMORY, + reservedConcurrentExecutions: 5, role: role, - vpc: vpc, + vpc: vpc?.vpc, + vpcSubnets: vpc?.subnetSelection, securityGroups: securityGroups, layers: lambdaLayers, environment: environment, @@ -99,13 +115,20 @@ export class DeleteModelStateMachine extends Construct { const deleteStack = new LambdaInvoke(this, 'DeleteStack', { lambdaFunction: new Function(this, 'DeleteStackFunc', { - runtime: config.lambdaConfig.pythonRuntime, + deadLetterQueueEnabled: true, + deadLetterQueue: new Queue(this, 'DeleteStackDLQ', { + queueName: 'DeleteStackDLQ', + enforceSSL: true, + }), + runtime: Runtime.PYTHON_3_10, handler: 'models.state_machine.delete_model.handle_delete_stack', - code: Code.fromAsset(config.lambdaSourcePath), + code: Code.fromAsset('./lambda'), timeout: LAMBDA_TIMEOUT, memorySize: LAMBDA_MEMORY, + reservedConcurrentExecutions: 5, role: role, - vpc: vpc, + vpc: vpc?.vpc, + vpcSubnets: vpc?.subnetSelection, securityGroups: securityGroups, layers: lambdaLayers, environment: environment, @@ -115,13 +138,20 @@ export class DeleteModelStateMachine extends Construct { const monitorDeleteStack = new LambdaInvoke(this, 'MonitorDeleteStack', { lambdaFunction: new Function(this, 'MonitorDeleteStackFunc', { - runtime: config.lambdaConfig.pythonRuntime, + deadLetterQueueEnabled: true, + deadLetterQueue: new Queue(this, 'MonitorDeleteStackDLQ', { + queueName: 'MonitorDeleteStackDLQ', + enforceSSL: true, + }), + runtime: Runtime.PYTHON_3_10, handler: 'models.state_machine.delete_model.handle_monitor_delete_stack', - code: Code.fromAsset(config.lambdaSourcePath), + code: Code.fromAsset('./lambda'), timeout: LAMBDA_TIMEOUT, memorySize: LAMBDA_MEMORY, + reservedConcurrentExecutions: 5, role: role, - vpc: vpc, + vpc: vpc?.vpc, + vpcSubnets: vpc?.subnetSelection, securityGroups: securityGroups, layers: lambdaLayers, environment: environment, @@ -131,13 +161,20 @@ export class DeleteModelStateMachine extends Construct { const deleteFromDdb = new LambdaInvoke(this, 'DeleteFromDdb', { lambdaFunction: new Function(this, 'DeleteFromDdbFunc', { - runtime: config.lambdaConfig.pythonRuntime, + deadLetterQueueEnabled: true, + deadLetterQueue: new Queue(this, 'DeleteFromDdbDLQ', { + queueName: 'DeleteFromDdbDLQ', + enforceSSL: true, + }), + runtime: Runtime.PYTHON_3_10, handler: 'models.state_machine.delete_model.handle_delete_from_ddb', - code: Code.fromAsset(config.lambdaSourcePath), + code: Code.fromAsset('./lambda'), timeout: LAMBDA_TIMEOUT, memorySize: LAMBDA_MEMORY, + reservedConcurrentExecutions: 5, role: role, - vpc: vpc, + vpc: vpc?.vpc, + vpcSubnets: vpc?.subnetSelection, securityGroups: securityGroups, layers: lambdaLayers, environment: environment, diff --git a/lib/models/state-machine/update-model.ts b/lib/models/state-machine/update-model.ts index 87fc89a4..aa59e989 100644 --- a/lib/models/state-machine/update-model.ts +++ b/lib/models/state-machine/update-model.ts @@ -17,21 +17,23 @@ import { BaseProps } from '../../schema'; import { ITable } from 'aws-cdk-lib/aws-dynamodb'; -import { Code, Function, ILayerVersion } from 'aws-cdk-lib/aws-lambda'; +import { Code, Function, ILayerVersion, Runtime } from 'aws-cdk-lib/aws-lambda'; import { IRole } from 'aws-cdk-lib/aws-iam'; -import { ISecurityGroup, IVpc } from 'aws-cdk-lib/aws-ec2'; +import { ISecurityGroup } from 'aws-cdk-lib/aws-ec2'; import { IStringParameter } from 'aws-cdk-lib/aws-ssm'; import { Construct } from 'constructs'; import { LambdaInvoke } from 'aws-cdk-lib/aws-stepfunctions-tasks'; import { LAMBDA_MEMORY, LAMBDA_TIMEOUT, OUTPUT_PATH, POLLING_TIMEOUT } from './constants'; import { Choice, Condition, DefinitionBody, StateMachine, Succeed, Wait, WaitTime } from 'aws-cdk-lib/aws-stepfunctions'; +import { Vpc } from '../../networking/vpc'; +import { Queue } from 'aws-cdk-lib/aws-sqs'; type UpdateModelStateMachineProps = BaseProps & { modelTable: ITable, lambdaLayers: ILayerVersion[], role?: IRole, - vpc?: IVpc, + vpc?: Vpc, securityGroups?: ISecurityGroup[]; restApiContainerEndpointPs: IStringParameter; managementKeyName: string; @@ -61,20 +63,27 @@ export class UpdateModelStateMachine extends Construct { const environment = { // Environment variables to set in all Lambda functions MODEL_TABLE_NAME: modelTable.tableName, LISA_API_URL_PS_NAME: restApiContainerEndpointPs.parameterName, - REST_API_VERSION: config.restApiConfig.apiVersion, + REST_API_VERSION: 'v2', MANAGEMENT_KEY_NAME: managementKeyName, - RESTAPI_SSL_CERT_ARN: config.restApiConfig.loadBalancerConfig.sslCertIamArn ?? '', + RESTAPI_SSL_CERT_ARN: config.restApiConfig?.sslCertIamArn ?? '', }; const handleJobIntake = new LambdaInvoke(this, 'HandleJobIntake', { lambdaFunction: new Function(this, 'HandleJobIntakeFunc', { - runtime: config.lambdaConfig.pythonRuntime, + deadLetterQueueEnabled: true, + deadLetterQueue: new Queue(this, 'HandleJobIntakeDLQ', { + queueName: 'HandleJobIntakeDLQ', + enforceSSL: true, + }), + runtime: Runtime.PYTHON_3_10, handler: 'models.state_machine.update_model.handle_job_intake', - code: Code.fromAsset(config.lambdaSourcePath), + code: Code.fromAsset('./lambda'), timeout: LAMBDA_TIMEOUT, memorySize: LAMBDA_MEMORY, + reservedConcurrentExecutions: 5, role: role, - vpc: vpc, + vpc: vpc?.vpc, + vpcSubnets: vpc?.subnetSelection, securityGroups: securityGroups, layers: lambdaLayers, environment: environment, @@ -84,13 +93,20 @@ export class UpdateModelStateMachine extends Construct { const handlePollCapacity = new LambdaInvoke(this, 'HandlePollCapacity', { lambdaFunction: new Function(this, 'HandlePollCapacityFunc', { - runtime: config.lambdaConfig.pythonRuntime, + deadLetterQueueEnabled: true, + deadLetterQueue: new Queue(this, 'HandlePollCapacityDLQ', { + queueName: 'HandlePollCapacityDLQ', + enforceSSL: true, + }), + runtime: Runtime.PYTHON_3_10, handler: 'models.state_machine.update_model.handle_poll_capacity', - code: Code.fromAsset(config.lambdaSourcePath), + code: Code.fromAsset('./lambda'), timeout: LAMBDA_TIMEOUT, memorySize: LAMBDA_MEMORY, + reservedConcurrentExecutions: 5, role: role, - vpc: vpc, + vpc: vpc?.vpc, + vpcSubnets: vpc?.subnetSelection, securityGroups: securityGroups, layers: lambdaLayers, environment: environment, @@ -100,13 +116,20 @@ export class UpdateModelStateMachine extends Construct { const handleFinishUpdate = new LambdaInvoke(this, 'HandleFinishUpdate', { lambdaFunction: new Function(this, 'HandleFinishUpdateFunc', { - runtime: config.lambdaConfig.pythonRuntime, + deadLetterQueueEnabled: true, + deadLetterQueue: new Queue(this, 'HandleFinishUpdateDLQ', { + queueName: 'HandleFinishUpdateDLQ', + enforceSSL: true, + }), + runtime: Runtime.PYTHON_3_10, handler: 'models.state_machine.update_model.handle_finish_update', - code: Code.fromAsset(config.lambdaSourcePath), + code: Code.fromAsset('./lambda'), timeout: LAMBDA_TIMEOUT, memorySize: LAMBDA_MEMORY, + reservedConcurrentExecutions: 5, role: role, - vpc: vpc, + vpc: vpc?.vpc, + vpcSubnets: vpc?.subnetSelection, securityGroups: securityGroups, layers: lambdaLayers, environment: environment, diff --git a/lib/networking/vpc/index.ts b/lib/networking/vpc/index.ts index d9a767ad..89b608ce 100644 --- a/lib/networking/vpc/index.ts +++ b/lib/networking/vpc/index.ts @@ -26,11 +26,13 @@ import { Port, SecurityGroup, SubnetType, + Subnet, SubnetSelection } from 'aws-cdk-lib/aws-ec2'; import { Construct } from 'constructs'; import { createCdkId } from '../../core/utils'; import { SecurityGroups, BaseProps } from '../../schema'; +import { SubnetGroup } from 'aws-cdk-lib/aws-rds'; type VpcProps = {} & BaseProps; @@ -44,6 +46,12 @@ export class Vpc extends Construct { /** Security groups for application. */ public readonly securityGroups: SecurityGroups; + /** Created from deployment configured Subnets for application. */ + public readonly subnetGroup?: SubnetGroup; + + /** Imported Subnets for application. */ + public readonly subnetSelection?: SubnetSelection; + /** * @param {Construct} scope - The parent or owner of the construct. * @param {string} id - The unique identifier for the construct within its scope. @@ -54,9 +62,28 @@ export class Vpc extends Construct { let vpc: IVpc; if (config.vpcId) { + // Imports VPC for use by application if supplied, else creates a VPC. vpc = ec2Vpc.fromLookup(this, 'imported-vpc', { vpcId: config.vpcId, }); + + // Checks if SubnetIds are provided in the config, if so we import them for use. + // A VPC must be supplied if Subnets are being used. + if (config.subnets && config.subnets.length > 0) { + this.subnetSelection = { + subnets: props.config.subnets?.map((subnet, index) => Subnet.fromSubnetId(this, index.toString(), subnet.subnetId)) + }; + + this.subnetGroup = new SubnetGroup( + this, + createCdkId([config.deploymentName, 'Imported-Subnets']), + { + vpc: vpc, + description: 'This SubnetGroup is made up of imported Subnets via the deployment config', + vpcSubnets: this.subnetSelection, + } + ); + } } else { // Create VPC vpc = new ec2Vpc(this, 'VPC', { @@ -118,7 +145,7 @@ export class Vpc extends Construct { // All HTTP VPC traffic -> ECS model ALB ecsModelAlbSg.addIngressRule(Peer.ipv4(vpc.vpcCidrBlock), Port.tcp(80), 'Allow VPC traffic on port 80'); - if (config.restApiConfig.loadBalancerConfig.sslCertIamArn) { + if (config.restApiConfig?.sslCertIamArn) { // All HTTPS IPV4 traffic -> REST API ALB restApiAlbSg.addIngressRule(Peer.anyIpv4(), Port.tcp(443), 'Allow any traffic on port 443'); } else { diff --git a/lib/rag/api/repository.ts b/lib/rag/api/repository.ts index 3b1553e5..06cae2f9 100644 --- a/lib/rag/api/repository.ts +++ b/lib/rag/api/repository.ts @@ -16,13 +16,14 @@ import { Duration } from 'aws-cdk-lib'; import { IAuthorizer, RestApi } from 'aws-cdk-lib/aws-apigateway'; -import { ISecurityGroup, IVpc } from 'aws-cdk-lib/aws-ec2'; +import { ISecurityGroup } from 'aws-cdk-lib/aws-ec2'; import { IRole } from 'aws-cdk-lib/aws-iam'; -import { ILayerVersion } from 'aws-cdk-lib/aws-lambda'; +import { ILayerVersion, Runtime } from 'aws-cdk-lib/aws-lambda'; import { Construct } from 'constructs'; import { PythonLambdaFunction, registerAPIEndpoint } from '../../api-base/utils'; import { BaseProps } from '../../schema'; +import { Vpc } from '../../networking/vpc'; /** * Properties for RepositoryAPI Construct. @@ -44,7 +45,7 @@ type RepositoryApiProps = { restApiId: string; rootResourceId: string; securityGroups?: ISecurityGroup[]; - vpc?: IVpc; + vpc?: Vpc; } & BaseProps; /** @@ -57,7 +58,6 @@ export class RepositoryApi extends Construct { const { authorizer, baseEnvironment, - config, commonLayers, lambdaExecutionRole, restApiId, @@ -130,10 +130,10 @@ export class RepositoryApi extends Construct { this, restApi, authorizer, - config.lambdaSourcePath, + './lambda', commonLayers, f, - config.lambdaConfig.pythonRuntime, + Runtime.PYTHON_3_10, lambdaExecutionRole, vpc, securityGroups, diff --git a/lib/rag/index.ts b/lib/rag/index.ts index c5baef5d..6990d6bd 100644 --- a/lib/rag/index.ts +++ b/lib/rag/index.ts @@ -24,7 +24,7 @@ import { CfnOutput, RemovalPolicy, Stack, StackProps } from 'aws-cdk-lib'; import { IAuthorizer } from 'aws-cdk-lib/aws-apigateway'; import { ISecurityGroup, Peer, Port, SecurityGroup } from 'aws-cdk-lib/aws-ec2'; import { AnyPrincipal, CfnServiceLinkedRole, Effect, PolicyStatement, Role } from 'aws-cdk-lib/aws-iam'; -import { Code, LayerVersion } from 'aws-cdk-lib/aws-lambda'; +import { Code, LayerVersion, Runtime } from 'aws-cdk-lib/aws-lambda'; import { Domain, EngineVersion, IDomain } from 'aws-cdk-lib/aws-opensearchservice'; import { Credentials, DatabaseInstance, DatabaseInstanceEngine } from 'aws-cdk-lib/aws-rds'; import { Bucket, HttpMethods } from 'aws-cdk-lib/aws-s3'; @@ -79,9 +79,9 @@ export class LisaRagStack extends Stack { StringParameter.valueForStringParameter(this, `${config.deploymentPrefix}/layerVersion/common`), ); - const bucketName = `${config.deploymentName}-lisaragdocs-${config.accountNumber}`.toLowerCase(); const bucket = new Bucket(this, createCdkId(['LISA', 'RAG', config.deploymentName, config.deploymentStage]), { - bucketName, + removalPolicy: config.removalPolicy, + autoDeleteObjects: config.removalPolicy === RemovalPolicy.DESTROY, cors: [ { allowedMethods: [HttpMethods.GET, HttpMethods.POST], @@ -94,16 +94,16 @@ export class LisaRagStack extends Stack { const baseEnvironment: Record = { REGISTERED_MODELS_PS_NAME: modelsPs.parameterName, - BUCKET_NAME: bucketName, + BUCKET_NAME: bucket.bucketName, CHUNK_SIZE: config.ragFileProcessingConfig!.chunkSize.toString(), CHUNK_OVERLAP: config.ragFileProcessingConfig!.chunkOverlap.toString(), LISA_API_URL_PS_NAME: endpointUrl.parameterName, - REST_API_VERSION: config.restApiConfig.apiVersion, + REST_API_VERSION: 'v2', }; // Add REST API SSL Cert ARN if it exists to be used to verify SSL calls to REST API - if (config.restApiConfig.loadBalancerConfig.sslCertIamArn) { - baseEnvironment['RESTAPI_SSL_CERT_ARN'] = config.restApiConfig.loadBalancerConfig.sslCertIamArn; + if (config.restApiConfig?.sslCertIamArn) { + baseEnvironment['RESTAPI_SSL_CERT_ARN'] = config.restApiConfig?.sslCertIamArn; } const lambdaRole = Role.fromRoleArn( @@ -128,11 +128,12 @@ export class LisaRagStack extends Stack { description: 'Security group for RAG OpenSearch domain', }); // Allow communication from private subnets to ECS cluster - vpc.vpc.isolatedSubnets.concat(vpc.vpc.privateSubnets).forEach((subnet) => { + const subNets = config.subnets && config.vpcId ? vpc.subnetSelection?.subnets : vpc.vpc.isolatedSubnets.concat(vpc.vpc.privateSubnets); + subNets?.forEach((subnet) => { openSearchSg.connections.allowFrom( - Peer.ipv4(subnet.ipv4CidrBlock), - Port.tcp(443), - 'Allow private subnets to communicate with OpenSearch cluster', + Peer.ipv4(config.subnets ? config.subnets.filter((filteredSubnet) => filteredSubnet.subnetId === subnet.subnetId)?.[0]?.ipv4CidrBlock : subnet.ipv4CidrBlock), + Port.tcp(config.restApiConfig.rdsConfig.dbPort), + 'Allow REST API private subnets to communicate with LiteLLM database', ); }); new CfnOutput(this, 'openSearchSg', { value: openSearchSg.securityGroupId }); @@ -172,6 +173,7 @@ export class LisaRagStack extends Stack { version: EngineVersion.OPENSEARCH_2_9, enableVersionUpgrade: true, vpc: vpc.vpc, + vpcSubnets: vpc.subnetSelection ? [vpc.subnetSelection] : [], ebs: { enabled: true, volumeSize: ragConfig.opensearchConfig.volumeSize, @@ -249,11 +251,12 @@ export class LisaRagStack extends Stack { description: 'Security group for RAG PGVector database', }); - vpc.vpc.isolatedSubnets.concat(vpc.vpc.privateSubnets).forEach((subnet) => { + const subNets = config.subnets && config.vpcId ? vpc.subnetSelection?.subnets : vpc.vpc.isolatedSubnets.concat(vpc.vpc.privateSubnets); + subNets?.forEach((subnet) => { pgvectorSg.connections.allowFrom( - Peer.ipv4(subnet.ipv4CidrBlock), - Port.tcp(ragConfig.rdsConfig?.dbPort || 5432), - 'Allow private subnets to communicate with PGVector database', + Peer.ipv4(config.subnets ? config.subnets.filter((filteredSubnet) => filteredSubnet.subnetId === subnet.subnetId)?.[0]?.ipv4CidrBlock : subnet.ipv4CidrBlock), + Port.tcp(config.restApiConfig.rdsConfig.dbPort), + 'Allow REST API private subnets to communicate with LiteLLM database', ); }); @@ -262,6 +265,7 @@ export class LisaRagStack extends Stack { const pgvector_db = new DatabaseInstance(this, 'PGVectorDB', { engine: DatabaseInstanceEngine.POSTGRES, vpc: vpc.vpc, + subnetGroup: vpc.subnetGroup, credentials: dbCreds, securityGroups: [pgvectorSg!], removalPolicy: RemovalPolicy.DESTROY, @@ -309,14 +313,14 @@ export class LisaRagStack extends Stack { if (config.lambdaLayerAssets?.sdkLayerPath) { sdkLayer = new LayerVersion(this, 'SdkLayer', { code: Code.fromAsset(config.lambdaLayerAssets?.sdkLayerPath), - compatibleRuntimes: [config.lambdaConfig.pythonRuntime], + compatibleRuntimes: [Runtime.PYTHON_3_10], removalPolicy: config.removalPolicy, description: 'LISA SDK common layer', }); } else { sdkLayer = new PythonLayerVersion(this, 'SdkLayer', { entry: SDK_PATH, - compatibleRuntimes: [config.lambdaConfig.pythonRuntime], + compatibleRuntimes: [Runtime.PYTHON_3_10], removalPolicy: config.removalPolicy, description: 'LISA SDK common layer', }); @@ -327,7 +331,7 @@ export class LisaRagStack extends Stack { authorizer, baseEnvironment, config, - vpc: vpc.vpc, + vpc: vpc, commonLayers: [commonLambdaLayer, ragLambdaLayer.layer, sdkLayer], restApiId, rootResourceId, diff --git a/lib/schema.ts b/lib/schema.ts index 40178c44..bcd90692 100644 --- a/lib/schema.ts +++ b/lib/schema.ts @@ -1,18 +1,18 @@ /** - Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - Licensed under the Apache License, Version 2.0 (the "License"). - You may not use this file except in compliance with the License. - You may obtain a copy of the License at + Licensed under the Apache License, Version 2.0 (the "License"). + You may not use this file except in compliance with the License. + You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ // Models for schema validation. import * as fs from 'fs'; @@ -21,43 +21,17 @@ import * as path from 'path'; import * as cdk from 'aws-cdk-lib'; import * as ec2 from 'aws-cdk-lib/aws-ec2'; import { AmiHardwareType } from 'aws-cdk-lib/aws-ecs'; -import * as lambda from 'aws-cdk-lib/aws-lambda'; import { z } from 'zod'; const HERE: string = path.resolve(__dirname); const VERSION_PATH: string = path.resolve(HERE, '..', 'VERSION'); const VERSION: string = fs.readFileSync(VERSION_PATH, 'utf8').trim(); -const PYTHON_VERSIONS: Record = { - PYTHON_3_8: lambda.Runtime.PYTHON_3_8, - PYTHON_3_9: lambda.Runtime.PYTHON_3_9, - PYTHON_3_10: lambda.Runtime.PYTHON_3_10, - PYTHON_3_11: lambda.Runtime.PYTHON_3_11, -}; const REMOVAL_POLICIES: Record = { destroy: cdk.RemovalPolicy.DESTROY, retain: cdk.RemovalPolicy.RETAIN, }; -/** - * Configuration schema for Lambda. - */ -const lambdaConfigSchema = z.object({ - pythonRuntime: z - .union([z.literal('PYTHON_3_8'), z.literal('PYTHON_3_9'), z.literal('PYTHON_3_10'), z.literal('PYTHON_3_11')]) - .default('PYTHON_3_9') - .transform((value) => PYTHON_VERSIONS[value]), - logLevel: z.union([z.literal('DEBUG'), z.literal('INFO'), z.literal('WARNING'), z.literal('ERROR')]), -}); - -/** - * Enum for different types of models. - */ -export enum ModelType { - TEXTGEN = 'textgen', - EMBEDDING = 'embedding', -} - /** * Enum for different types of ECS container image sources. */ @@ -68,31 +42,12 @@ export enum EcsSourceType { TARBALL = 'tarball', } -/** - * Details and configurations of a registered model. - * - * @property {string} provider - Model provider, of the form .. - * @property {string} modelName - The unique name that identifies the model. - * @property {string} modelId - The unique user-provided name for the model. - * @property {ModelType} modelType - Specifies the type of model (e.g., 'textgen', 'embedding'). - * @property {string} endpointUrl - The URL endpoint where the model can be accessed or invoked. - * @property {boolean} streaming - Indicates whether the model supports streaming capabilities. - */ -export type RegisteredModel = { - provider: string; - modelId: string; - modelName: string; - modelType: ModelType; - endpointUrl: string; - streaming?: boolean; -}; - /** * Custom security groups for application. * - * @property {ec2.SecurityGroup} ecsModelAlbSg - ECS model application load balancer security group. - * @property {ec2.SecurityGroup} restApiAlbSg - REST API application load balancer security group. - * @property {ec2.SecurityGroup} lambdaSecurityGroup - Lambda security group. + * @property {ec2.SecurityGroup} ecsModelAlbSg - .describe('ECS model application load balancer security group.') + * @property {ec2.SecurityGroup} restApiAlbSg - .describe('REST API application load balancer security group.') + * @property {ec2.SecurityGroup} lambdaSecurityGroup - .describe('Lambda security group.') */ export type SecurityGroups = { ecsModelAlbSg: ec2.SecurityGroup; @@ -100,22 +55,13 @@ export type SecurityGroups = { lambdaSecurityGroup: ec2.SecurityGroup; }; -/** - * Metadata for a specific EC2 instance type. - * - * @property {number} memory - Memory in megabytes (MB). - * @property {number} gpuCount - Number of GPUs. - * @property {string} nvmePath - Path to NVMe drive to mount. - * @property {number} maxThroughput - Maximum network throughput in gigabits per second (Gbps). - * @property {number} vCpus - Number of virtual CPUs (vCPUs). - */ const Ec2TypeSchema = z.object({ - memory: z.number(), - gpuCount: z.number().min(0), - nvmePath: z.string().optional().default(''), - maxThroughput: z.number(), - vCpus: z.number(), -}); + memory: z.number().describe('Memory in megabytes (MB)'), + gpuCount: z.number().min(0).describe('Number of GPUs'), + nvmePath: z.string().default('').describe('Path to NVMe drive to mount'), + maxThroughput: z.number().describe('Maximum network throughput in gigabits per second (Gbps)'), + vCpus: z.number().describe('Number of virtual CPUs (vCPUs)'), +}).describe('Metadata for a specific EC2 instance type.'); type Ec2Type = z.infer; @@ -276,12 +222,12 @@ export class Ec2Metadata { }; /** - * Getter method to access EC2 metadata. Retrieves the metadata for a specific EC2 instance type. - * - * @param {string} key - The key representing the EC2 instance type (e.g., 'g4dn.xlarge'). - * @throws {Error} Throws an error if no metadata is found for the specified EC2 instance type. - * @returns {Ec2Type} The metadata for the specified EC2 instance type. - */ + * Getter method to access EC2 metadata. Retrieves the metadata for a specific EC2 instance type. + * + * @param {string} key - .describe('The key representing the EC2 instance type (e.g., 'g4dn.xlarge').') + * @throws {Error} Throws an error if no metadata is found for the specified EC2 instance type. + * @returns {Ec2Type} The metadata for the specified EC2 instance type. + */ static get (key: string): Ec2Type { const instance = this.instances[key]; if (!instance) { @@ -291,10 +237,10 @@ export class Ec2Metadata { } /** - * Get EC2 instances defined with metadata. - * - * @returns {string[]} Array of EC2 instances. - */ + * Get EC2 instances defined with metadata. + * + * @returns {string[]} Array of EC2 instances. + */ static getValidInstanceKeys (): string[] { return Object.keys(this.instances); } @@ -302,68 +248,43 @@ export class Ec2Metadata { const VALID_INSTANCE_KEYS = Ec2Metadata.getValidInstanceKeys() as [string, ...string[]]; -/** - * Configuration for container health checks. - * - * @property {string[]} [command=['CMD-SHELL', 'exit 0']] - The command to run for health checks. - * @property {number} [interval=10] - The time interval between health checks, in seconds. - * @property {number} [startPeriod=30] - The time to wait before starting the first health check, in seconds. - * @property {number} [timeout=5] - The maximum time allowed for each health check to complete, in seconds. - * @property {number} [retries=2] - The number of times to retry a failed health check before considering the container - * as unhealthy. - */ const ContainerHealthCheckConfigSchema = z.object({ - command: z.array(z.string()).default(['CMD-SHELL', 'exit 0']), - interval: z.number().default(10), - startPeriod: z.number().default(30), - timeout: z.number().default(5), - retries: z.number().default(2), -}); + command: z.array(z.string()).default(['CMD-SHELL', 'exit 0']).describe('The command to run for health checks'), + interval: z.number().default(10).describe('The time interval between health checks, in seconds.'), + startPeriod: z.number().default(30).describe('The time to wait before starting the first health check, in seconds.'), + timeout: z.number().default(5).describe('The maximum time allowed for each health check to complete, in seconds'), + retries: z.number().default(2).describe('The number of times to retry a failed health check before considering the container as unhealthy.'), +}) + .describe('Configuration for container health checks'); -/** - * Container image that will use tarball on disk - */ const ImageTarballAsset = z.object({ path: z.string(), type: z.literal(EcsSourceType.TARBALL), -}); +}) + .describe('Container image that will use tarball on disk'); -/** - * Container image that will be built based on Dockerfile and assets at the supplied path - */ const ImageSourceAsset = z.object({ baseImage: z.string(), path: z.string(), type: z.literal(EcsSourceType.ASSET), -}); +}) + .describe('Container image that will be built based on Dockerfile and assets at the supplied path'); -/** - * Container image that will be pulled from the specified ECR repository - */ const ImageECRAsset = z.object({ repositoryArn: z.string(), tag: z.string().optional(), type: z.literal(EcsSourceType.ECR), -}); +}) + .describe('Container image that will be pulled from the specified ECR repository'); -/** - * Container image that will be pulled from the specified public registry - */ const ImageRegistryAsset = z.object({ registry: z.string(), type: z.literal(EcsSourceType.REGISTRY), -}); +}) + .describe('Container image that will be pulled from the specified public registry'); -/** - * Configuration for a container. - * - * @property {string} baseImage - Base image for the container. - * @property {Record} [environment={}] - Environment variables for the container. - * @property {ContainerHealthCheckConfig} [healthCheckConfig={}] - Health check configuration for the container. - * @property {number} [sharedMemorySize=0] - The value for the size of the /dev/shm volume. - */ const ContainerConfigSchema = z.object({ - image: z.union([ImageTarballAsset, ImageSourceAsset, ImageECRAsset, ImageRegistryAsset]), + image: z.union([ImageTarballAsset, ImageSourceAsset, ImageECRAsset, ImageRegistryAsset]).describe('Base image for the container.'), environment: z .record(z.any()) .transform((obj) => { @@ -375,108 +296,62 @@ const ContainerConfigSchema = z.object({ {} as Record, ); }) - .default({}), - sharedMemorySize: z.number().min(0).optional().default(0), + .default({}) + .describe('Environment variables for the container.'), + sharedMemorySize: z.number().min(0).default(0).describe('The value for the size of the /dev/shm volume.'), healthCheckConfig: ContainerHealthCheckConfigSchema.default({}), -}); +}).describe('Configuration for the container.'); -/** - * Configuration schema for health checks in load balancer settings. - * - * @property {string} path - Path for the health check. - * @property {number} [interval=30] - Interval in seconds between health checks. - * @property {number} [timeout=10] - Timeout in seconds for each health check. - * @property {number} [healthyThresholdCount=2] - Number of consecutive successful health checks required to consider - * the target healthy. - * @property {number} [unhealthyThresholdCount=2] - Number of consecutive failed health checks required to consider the - * target unhealthy. - */ const HealthCheckConfigSchema = z.object({ - path: z.string(), - interval: z.number().default(30), - timeout: z.number().default(10), - healthyThresholdCount: z.number().default(2), - unhealthyThresholdCount: z.number().default(2), -}); + path: z.string().describe('Path for the health check.'), + interval: z.number().default(30).describe('Interval in seconds between health checks.'), + timeout: z.number().default(10).describe('Timeout in seconds for each health check.'), + healthyThresholdCount: z.number().default(2).describe('Number of consecutive successful health checks required to consider the target healthy.'), + unhealthyThresholdCount: z.number().default(2).describe('Number of consecutive failed health checks required to consider the target unhealthy.'), +}) + .describe('Health check configuration for the load balancer.'); -/** - * Configuration schema for the load balancer. - * - * @property {string} [sslCertIamArn=null] - SSL certificate IAM ARN for load balancer. - * @property {HealthCheckConfig} healthCheckConfig - Health check configuration for the load balancer. - * @property {string} domainName - Domain name to use instead of the load balancer's default DNS name. - */ const LoadBalancerConfigSchema = z.object({ - sslCertIamArn: z.string().optional().nullable().default(null), + sslCertIamArn: z.string().nullish().default(null).describe('SSL certificate IAM ARN for load balancer.'), healthCheckConfig: HealthCheckConfigSchema, - domainName: z.string().optional().nullable().default(null), -}); + domainName: z.string().nullish().default(null).describe('Domain name to use instead of the load balancer\'s default DNS name.'), +}) + .describe('Configuration for load balancer settings.'); -/** - * Configuration schema for ECS auto scaling metrics. - * - * @property {string} AlbMetricName - Name of the ALB metric. - * @property {number} targetValue - Target value for the metric. - * @property {number} [duration=60] - Duration in seconds for metric evaluation. - * @property {number} [estimatedInstanceWarmup=180] - Estimated warm-up time in seconds until a newly launched instance - * can send metrics to CloudWatch. - * - */ const MetricConfigSchema = z.object({ - AlbMetricName: z.string(), - targetValue: z.number(), - duration: z.number().default(60), - estimatedInstanceWarmup: z.number().min(0).default(180), -}); + AlbMetricName: z.string().describe('Name of the ALB metric.'), + targetValue: z.number().describe('Target value for the metric.'), + duration: z.number().default(60).describe('Duration in seconds for metric evaluation.'), + estimatedInstanceWarmup: z.number().min(0).default(180).describe('Estimated warm-up time in seconds until a newly launched instance can send metrics to CloudWatch.'), +}) + .describe('Metric configuration for ECS auto scaling.'); -/** - * Configuration schema for ECS auto scaling settings. -* -* @property {number} [minCapacity=1] - Minimum capacity for auto scaling. Must be at least 1. -* @property {number} [maxCapacity=2] - Maximum capacity for auto scaling. Must be at least 1. -* @property {number} [cooldown=420] - Cool down period in seconds between scaling activities. -* @property {number} [defaultInstanceWarmup=180] - Default warm-up time in seconds until a newly launched instance can - send metrics to CloudWatch. -* @property {MetricConfig} metricConfig - Metric configuration for auto scaling. -*/ const AutoScalingConfigSchema = z.object({ - minCapacity: z.number().min(1).default(1), - maxCapacity: z.number().min(1).default(2), - defaultInstanceWarmup: z.number().default(180), - cooldown: z.number().min(1).default(420), + blockDeviceVolumeSize: z.number().min(30).default(30), + minCapacity: z.number().min(1).default(1).describe('Minimum capacity for auto scaling. Must be at least 1.'), + maxCapacity: z.number().min(1).default(2).describe('Maximum capacity for auto scaling. Must be at least 1.'), + defaultInstanceWarmup: z.number().default(180).describe('Default warm-up time in seconds until a newly launched instance can'), + cooldown: z.number().min(1).default(420).describe('Cool down period in seconds between scaling activities.'), metricConfig: MetricConfigSchema, -}); +}) + .describe('Configuration for auto scaling settings.'); -/** - * Configuration schema for an ECS model. - * - * @property {AmiHardwareType} amiHardwareType - Name of the model. - * @property {AutoScalingConfigSchema} autoScalingConfig - Configuration for auto scaling settings. - * @property {Record} buildArgs - Optional build args to be applied when creating the - * task container if containerConfig.image.type is ASSET - * @property {ContainerConfig} containerConfig - Configuration for the container. - * @property {number} [containerMemoryBuffer=2048] - This is the amount of memory to buffer (or subtract off) - * from the total instance memory, if we don't include this, - * the container can have a hard time finding available RAM - * resources to start and the tasks will fail deployment - * @property {Record} environment - Environment variables set on the task container - * @property {identifier} modelType - Unique identifier for the cluster which will be used when naming resources - * @property {string} instanceType - EC2 instance type for running the model. - * @property {boolean} [internetFacing=false] - Whether or not the cluster will be configured as internet facing - * @property {LoadBalancerConfig} loadBalancerConfig - Configuration for load balancer settings. - */ const EcsBaseConfigSchema = z.object({ - amiHardwareType: z.nativeEnum(AmiHardwareType), - autoScalingConfig: AutoScalingConfigSchema, - buildArgs: z.record(z.string()).optional(), + amiHardwareType: z.nativeEnum(AmiHardwareType).describe('Name of the model.'), + autoScalingConfig: AutoScalingConfigSchema.describe('Configuration for auto scaling settings.'), + buildArgs: z.record(z.string()).optional() + .describe('Optional build args to be applied when creating the task container if containerConfig.image.type is ASSET'), containerConfig: ContainerConfigSchema, - containerMemoryBuffer: z.number().default(1024 * 2), - environment: z.record(z.string()), + containerMemoryBuffer: z.number().default(1024 * 2) + .describe('This is the amount of memory to buffer (or subtract off) from the total instance memory, ' + + 'if we don\'t include this, the container can have a hard time finding available RAM resources to start and the tasks will fail deployment'), + environment: z.record(z.string()).describe('Environment variables set on the task container'), identifier: z.string(), - instanceType: z.enum(VALID_INSTANCE_KEYS), - internetFacing: z.boolean().default(false), + instanceType: z.enum(VALID_INSTANCE_KEYS).describe('EC2 instance type for running the model.'), + internetFacing: z.boolean().default(false).describe('Whether or not the cluster will be configured as internet facing'), loadBalancerConfig: LoadBalancerConfigSchema, -}); +}) + .describe('Configuration schema for an ECS model'); /** * Type representing configuration for an ECS model. @@ -488,23 +363,18 @@ type EcsBaseConfig = z.infer; */ export type ECSConfig = EcsBaseConfig; -/** - * Configuration schema for an ECS model. - * - * @property {string} modelName - Name of the model. - * @property {string} baseImage - Base image for the container. - * @property {string} inferenceContainer - Prebuilt inference container for serving model. - */ const EcsModelConfigSchema = z .object({ - modelName: z.string(), - baseImage: z.string(), + modelName: z.string().describe('Name of the model.'), + baseImage: z.string().describe('Base image for the container.'), inferenceContainer: z .union([z.literal('tgi'), z.literal('tei'), z.literal('instructor'), z.literal('vllm')]) .refine((data) => { return !data.includes('.'); // string cannot contain a period }) - }); + .describe('Prebuilt inference container for serving model.'), + }) + .describe('Configuration schema for an ECS model.'); /** * Type representing configuration for an ECS model. @@ -516,15 +386,6 @@ type EcsModelConfig = z.infer; */ export type ModelConfig = EcsModelConfig; -/** - * Configuration schema for authorization. - * - * @property {string} [authority=null] - URL of OIDC authority. - * @property {string} [clientId=null] - Client ID for OIDC IDP . - * @property {string} [adminGroup=null] - Name of the admin group. - * @property {string} [jwtGroupsProperty=null] - Name of the JWT groups property. - * @property {string[]} [additionalScopes=null] - Additional JWT scopes to request. - */ const AuthConfigSchema = z.object({ authority: z.string().transform((value) => { if (value.endsWith('/')) { @@ -532,54 +393,32 @@ const AuthConfigSchema = z.object({ } else { return value; } - }), - clientId: z.string(), - adminGroup: z.string().optional().default(''), - jwtGroupsProperty: z.string().optional().default(''), - additionalScopes: z.array(z.string()).optional().default([]), -}); + }) + .describe('URL of OIDC authority.'), + clientId: z.string().describe('Client ID for OIDC IDP .'), + adminGroup: z.string().default('').describe('Name of the admin group.'), + jwtGroupsProperty: z.string().default('').describe('Name of the JWT groups property.'), + additionalScopes: z.array(z.string()).default([]).describe('Additional JWT scopes to request.'), +}).describe('Configuration schema for authorization.'); -/** - * Configuration schema for RDS Instances needed for LiteLLM scaling or PGVector RAG operations. - * - * The optional fields can be omitted to create a new database instance, otherwise fill in all fields to use - * an existing database instance. - * - * @property {string} username - Database username. - * @property {string} passwordSecretId - SecretsManager Secret ID that stores an existing database password. - * @property {string} dbHost - Database hostname for existing database instance. - * @property {string} dbName - Database name for existing database instance. - * @property {number} dbPort - Port to open on the database instance. - */ const RdsInstanceConfig = z.object({ - username: z.string().optional().default('postgres'), - passwordSecretId: z.string().optional(), - dbHost: z.string().optional(), - dbName: z.string().optional().default('postgres'), - dbPort: z.number().optional().default(5432), -}); + username: z.string().default('postgres').describe('Database username.'), + passwordSecretId: z.string().optional().describe('SecretsManager Secret ID that stores an existing database password.'), + dbHost: z.string().optional().describe('Database hostname for existing database instance.'), + dbName: z.string().default('postgres').describe('Database name for existing database instance.'), + dbPort: z.number().default(5432).describe('Port to open on the database instance.'), +}).describe('Configuration schema for RDS Instances needed for LiteLLM scaling or PGVector RAG operations.\n \n ' + + 'The optional fields can be omitted to create a new database instance, otherwise fill in all fields to use an existing database instance.'); -/** - * Configuration schema for REST API. - * - * @property {string} instanceType - EC2 instance type. - * @property {ContainerConfig} containerConfig - Configuration for the container. - * @property {AutoScalingConfigSchema} autoScalingConfig - Configuration for auto scaling settings. - * @property {LoadBalancerConfig} loadBalancerConfig - Configuration for load balancer settings. - * @property {boolean} [internetFacing=true] - Whether or not the REST API ALB will be configured as internet facing. - * @property {RdsInstanceConfig} rdsConfig - Configuration for LiteLLM scaling database. - */ const FastApiContainerConfigSchema = z.object({ - apiVersion: z.literal('v2'), - instanceType: z.enum(VALID_INSTANCE_KEYS), - containerConfig: ContainerConfigSchema, - autoScalingConfig: AutoScalingConfigSchema, - loadBalancerConfig: LoadBalancerConfigSchema, - internetFacing: z.boolean().default(true), - rdsConfig: RdsInstanceConfig.optional() + internetFacing: z.boolean().default(true).describe('Whether the REST API ALB will be configured as internet facing.'), + domainName: z.string().nullish().default(null), + sslCertIamArn: z.string().nullish().default(null).describe('ARN of the self-signed cert to be used throughout the system'), + rdsConfig: RdsInstanceConfig .default({ dbName: 'postgres', username: 'postgres', + dbPort: 5432, }) .refine( (config) => { @@ -587,11 +426,11 @@ const FastApiContainerConfigSchema = z.object({ }, { message: - 'We do not allow using an existing DB for LiteLLM because of its requirement in internal model management ' + - 'APIs. Please do not define the dbHost or passwordSecretId fields for the FastAPI container DB config.', + 'We do not allow using an existing DB for LiteLLM because of its requirement in internal model management ' + + 'APIs. Please do not define the dbHost or passwordSecretId fields for the FastAPI container DB config.', }, ), -}); +}).describe('Configuration schema for REST API.'); /** * Enum for different types of RAG repositories available @@ -614,9 +453,6 @@ const OpenSearchExistingClusterConfig = z.object({ endpoint: z.string(), }); -/** - * Configuration schema for RAG repository. Defines settings for OpenSearch. - */ const RagRepositoryConfigSchema = z .object({ repositoryId: z.string(), @@ -625,33 +461,22 @@ const RagRepositoryConfigSchema = z rdsConfig: RdsInstanceConfig.optional(), }) .refine((input) => { - if ( - (input.type === RagRepositoryType.OPENSEARCH && input.opensearchConfig === undefined) || - (input.type === RagRepositoryType.PGVECTOR && input.rdsConfig === undefined) - ) { - return false; - } - return true; - }); + return !((input.type === RagRepositoryType.OPENSEARCH && input.opensearchConfig === undefined) || + (input.type === RagRepositoryType.PGVECTOR && input.rdsConfig === undefined)); + }) + .describe('Configuration schema for RAG repository. Defines settings for OpenSearch.'); -/** - * Configuration schema for RAG file processing. Determines the chunk size and chunk overlap when processing documents. - */ const RagFileProcessingConfigSchema = z.object({ chunkSize: z.number().min(100).max(10000), chunkOverlap: z.number().min(0), -}); +}) + .describe('Configuration schema for RAG file processing. Determines the chunk size and chunk overlap when processing documents.'); -/** - * Configuration schema for pypi. - * - * @property {string} [indexUrl=''] - URL for the pypi index. - * @property {string} [trustedHost=''] - Trusted host for pypi. - */ const PypiConfigSchema = z.object({ - indexUrl: z.string().optional().default(''), - trustedHost: z.string().optional().default(''), -}); + indexUrl: z.string().default('').describe('URL for the pypi index.'), + trustedHost: z.string().default('').describe('Trusted host for pypi.'), +}) + .describe('Configuration schema for pypi'); /** * Enum for different types of stack synthesizers @@ -662,208 +487,83 @@ export enum stackSynthesizerType { LegacyStackSynthesizer = 'LegacyStackSynthesizer', } -/** - * Configuration schema for API Gateway Endpoint - * - * @property {string} domainName - Custom domain name for API Gateway Endpoint - */ const ApiGatewayConfigSchema = z .object({ - domainName: z.string().optional().nullable().default(null), + domainName: z.string().nullish().default(null).describe('Custom domain name for API Gateway Endpoint'), }) - .optional(); + .optional() + .describe('Configuration schema for API Gateway Endpoint'); -/** - * Configuration for models inside the LiteLLM Config - * See https://litellm.vercel.app/docs/proxy/configs#all-settings for more details. - * - * The `lisa_params` are custom for the LISA installation to add model metadata to allow the models to be referenced - * correctly within the Chat UI. LiteLLM will ignore these parameters as it is not looking for them, and it will not - * fail to initialize as a result of them existing. - */ -const LiteLLMModel = z.object({ - model_name: z.string(), - litellm_params: z.object({ - model: z.string(), - api_base: z.string().optional(), - api_key: z.string().optional(), - aws_region_name: z.string().optional(), - }), - lisa_params: z - .object({ - streaming: z.boolean().nullable().default(null), - model_type: z.nativeEnum(ModelType), - }) - .refine( - (data) => { - // 'textgen' type must have boolean streaming, 'embedding' type must have null streaming - const isValidForTextgen = data.model_type === 'textgen' && typeof data.streaming === 'boolean'; - const isValidForEmbedding = data.model_type === 'embedding' && data.streaming === null; - - return isValidForTextgen || isValidForEmbedding; - }, - { - message: `For 'textgen' models, 'streaming' must be true or false. - For 'embedding' models, 'streaming' must not be set.`, - path: ['streaming'], - }, - ), - model_info: z - .object({ - id: z.string().optional(), - mode: z.string().optional(), - input_cost_per_token: z.number().optional(), - output_cost_per_token: z.number().optional(), - max_tokens: z.number().optional(), - base_model: z.string().optional(), - }) - .optional(), -}); - -/** - * Core LiteLLM configuration. - * See https://litellm.vercel.app/docs/proxy/configs#all-settings for more details about each field. - */ const LiteLLMConfig = z.object({ - environment_variables: z.map(z.string(), z.string()).optional(), - model_list: z - .array(LiteLLMModel) - .optional() - .nullable() - .default([]) - .transform((value) => value ?? []), - litellm_settings: z.object({ - // ALL (https://github.com/BerriAI/litellm/blob/main/litellm/__init__.py) - telemetry: z.boolean().default(false).optional(), - drop_params: z.boolean().default(true).optional(), - }), - general_settings: z - .object({ - completion_model: z.string().optional(), - disable_spend_logs: z.boolean().optional(), // turn off writing each transaction to the db - disable_master_key_return: z.boolean().optional(), // turn off returning master key on UI - disable_reset_budget: z.boolean().optional(), // turn off reset budget scheduled task - enable_jwt_auth: z.boolean().optional(), // allow proxy admin to auth in via jwt tokens with 'litellm_proxy_admin' - enforce_user_param: z.boolean().optional(), // requires all openai endpoint requests to have a 'user' param - allowed_routes: z.array(z.string()).optional(), // list of allowed proxy API routes a user can access. (JWT only) - key_management_system: z.string().optional(), // either google_kms or azure_kms - master_key: z.string().refine( - (key) => key.startsWith('sk-'), // key needed for model management actions - 'Key string must be defined for model management operations, and it must start with "sk-".' + - 'This can be any string, and a random UUID is recommended. Example: sk-f132c7cc-059c-481b-b5ca-a42e191672aa', - ), - database_url: z.string().optional(), - database_connection_pool_limit: z.number().optional(), // default 100 - database_connection_timeout: z.number().optional(), // default 60s - database_type: z.string().optional(), - database_args: z - .object({ - billing_mode: z.string().optional(), - read_capacity_units: z.number().optional(), - write_capacity_units: z.number().optional(), - ssl_verify: z.boolean().optional(), - region_name: z.string().optional(), - user_table_name: z.string().optional(), - key_table_name: z.string().optional(), - config_table_name: z.string().optional(), - spend_table_name: z.string().optional(), - }) - .optional(), - otel: z.boolean().optional(), - custom_auth: z.string().optional(), - max_parallel_requests: z.number().optional(), - infer_model_from_keys: z.boolean().optional(), - background_health_checks: z.boolean().optional(), - health_check_interval: z.number().optional(), - alerting: z.array(z.string()).optional(), - alerting_threshold: z.number().optional(), - }), -}); + db_key: z.string().refine( + (key) => key.startsWith('sk-'), // key needed for model management actions + 'Key string must be defined for model management operations, and it must start with "sk-".' + + 'This can be any string, and a random UUID is recommended. Example: sk-f132c7cc-059c-481b-b5ca-a42e191672aa', + ), +}) + .describe('Core LiteLLM configuration - see https://litellm.vercel.app/docs/proxy/configs#all-settings for more details about each field.'); -/** - * Raw application configuration schema. - * - * @property {string} [appName='lisa'] - Name of the application. - * @property {string} [profile=null] - AWS CLI profile for deployment. - * @property {string} deploymentName - Name of the deployment. - * @property {string} accountNumber - AWS account number for deployment. Must be 12 digits. - * @property {string} region - AWS region for deployment. - * @property {string} deploymentStage - Deployment stage for the application. - * @property {string} removalPolicy - Removal policy for resources (destroy or retain). - * @property {boolean} [runCdkNag=false] - Whether to run CDK Nag checks. - * @property {lambdaConfigSchema} lambdaConfig - Lambda configuration. - * @property {string} [lambdaSourcePath='./lambda'] - Path to Lambda source code dir. - * @property {string} s3BucketModels - S3 bucket for models. - * @property {string} mountS3DebUrl - URL for S3-mounted Debian package. - * @property {string[]} [accountNumbersEcr=null] - List of AWS account numbers for ECR repositories. - * @property {boolean} [deployRag=false] - Whether to deploy RAG stacks. - * @property {boolean} [deployChat=true] - Whether to deploy chat stacks. - * @property {boolean} [deployUi=true] - Whether to deploy UI stacks. - * @property {string} logLevel - Log level for application. - * @property {AuthConfigSchema} authConfig - Authorization configuration. - * @property {FastApiContainerConfigSchema} restApiConfig - REST API configuration. - * @property {RagRepositoryConfigSchema} ragRepositoryConfig - Rag Repository configuration. - * @property {RagFileProcessingConfigSchema} ragFileProcessingConfig - Rag file processing configuration. - * @property {EcsModelConfigSchema[]} ecsModels - Array of ECS model configurations. - * @property {ApiGatewayConfigSchema} apiGatewayConfig - API Gateway Endpoint configuration. - * @property {string} [nvmeHostMountPath='/nvme'] - Host path for NVMe drives. - * @property {string} [nvmeContainerMountPath='/nvme'] - Container path for NVMe drives. - * @property {Array<{ Key: string, Value: string }>} [tags=null] - Array of key-value pairs for tagging. - * @property {string} [deploymentPrefix=null] - Prefix for deployment resources. - * @property {string} [webAppAssetsPath=null] - Optional path to precompiled webapp assets. If not - * specified the web application will be built at deploy - * time. - */ const RawConfigSchema = z .object({ - appName: z.string().default('lisa'), + appName: z.string().default('lisa').describe('Name of the application.'), profile: z .string() - .optional() - .nullable() - .transform((value) => value ?? ''), - deploymentName: z.string(), + .nullish() + .transform((value) => value ?? '') + .describe('AWS CLI profile for deployment.'), + deploymentName: z.string().default('prod').describe('Name of the deployment.'), accountNumber: z .number() .or(z.string()) .transform((value) => value.toString()) .refine((value) => value.length === 12, { message: 'AWS account number should be 12 digits. If your account ID starts with 0, then please surround the ID with quotation marks.', - }), - region: z.string(), - vpcId: z.string().optional(), - deploymentStage: z.string(), - removalPolicy: z.union([z.literal('destroy'), z.literal('retain')]).transform((value) => REMOVAL_POLICIES[value]), - runCdkNag: z.boolean().default(false), - s3BucketModels: z.string(), - mountS3DebUrl: z.string().optional(), + }) + .describe('AWS account number for deployment. Must be 12 digits.'), + region: z.string().describe('AWS region for deployment.'), + restApiConfig: FastApiContainerConfigSchema, + vpcId: z.string().optional().describe('VPC ID for the application. (e.g. vpc-0123456789abcdef)'), + subnets: z.array(z.object({ + subnetId: z.string().startsWith('subnet-'), + ipv4CidrBlock: z.string() + })).optional().describe('Array of subnet objects for the application. These contain a subnetId(e.g. [subnet-fedcba9876543210] and ipv4CidrBlock'), + deploymentStage: z.string().default('prod').describe('Deployment stage for the application.'), + removalPolicy: z.union([z.literal('destroy'), z.literal('retain')]) + .transform((value) => REMOVAL_POLICIES[value]) + .default('destroy') + .describe('Removal policy for resources (destroy or retain).'), + runCdkNag: z.boolean().default(false).describe('Whether to run CDK Nag checks.'), + privateEndpoints: z.boolean().default(false).describe('Whether to use privateEndpoints for REST API.'), + s3BucketModels: z.string().describe('S3 bucket for models.'), + mountS3DebUrl: z.string().describe('URL for S3-mounted Debian package.'), accountNumbersEcr: z .array(z.union([z.number(), z.string()])) .transform((arr) => arr.map(String)) .refine((value) => value.every((num) => num.length === 12), { message: 'AWS account number should be 12 digits. If your account ID starts with 0, then please surround the ID with quotation marks.', }) - .optional(), - deployRag: z.boolean().optional().default(false), - deployChat: z.boolean().optional().default(true), - deployUi: z.boolean().optional().default(true), - logLevel: z.union([z.literal('DEBUG'), z.literal('INFO'), z.literal('WARNING'), z.literal('ERROR')]), - lambdaConfig: lambdaConfigSchema, - lambdaSourcePath: z.string().optional().default('./lambda'), - authConfig: AuthConfigSchema.optional(), - pypiConfig: PypiConfigSchema.optional().default({ + .optional() + .describe('List of AWS account numbers for ECR repositories.'), + deployRag: z.boolean().default(true).describe('Whether to deploy RAG stacks.'), + deployChat: z.boolean().default(true).describe('Whether to deploy chat stacks.'), + deployDocs: z.boolean().default(true).describe('Whether to deploy docs stacks.'), + deployUi: z.boolean().default(true).describe('Whether to deploy UI stacks.'), + logLevel: z.union([z.literal('DEBUG'), z.literal('INFO'), z.literal('WARNING'), z.literal('ERROR')]) + .default('DEBUG') + .describe('Log level for application.'), + authConfig: AuthConfigSchema.optional().describe('Authorization configuration.'), + pypiConfig: PypiConfigSchema.default({ indexUrl: '', trustedHost: '', - }), - condaUrl: z.string().optional().default(''), - certificateAuthorityBundle: z.string().optional().default(''), - ragRepositories: z.array(RagRepositoryConfigSchema).default([]), - ragFileProcessingConfig: RagFileProcessingConfigSchema.optional(), - restApiConfig: FastApiContainerConfigSchema, - ecsModels: z.array(EcsModelConfigSchema).optional(), - apiGatewayConfig: ApiGatewayConfigSchema.optional(), - nvmeHostMountPath: z.string().default('/nvme'), - nvmeContainerMountPath: z.string().default('/nvme'), + }).describe('Pypi configuration.'), + condaUrl: z.string().default('').describe('Conda URL configuration'), + certificateAuthorityBundle: z.string().default('').describe('Certificate Authority Bundle file'), + ragRepositories: z.array(RagRepositoryConfigSchema).default([]).describe('Rag Repository configuration.'), + ragFileProcessingConfig: RagFileProcessingConfigSchema.optional().describe('Rag file processing configuration.'), + ecsModels: z.array(EcsModelConfigSchema).optional().describe('Array of ECS model configurations.'), + apiGatewayConfig: ApiGatewayConfigSchema, + nvmeHostMountPath: z.string().default('/nvme').describe('Host path for NVMe drives.'), + nvmeContainerMountPath: z.string().default('/nvme').describe('Container path for NVMe drives.'), tags: z .array( z.object({ @@ -871,25 +571,20 @@ const RawConfigSchema = z Value: z.string(), }), ) - .optional(), - deploymentPrefix: z.string().optional(), - webAppAssetsPath: z.string().optional(), + .optional() + .describe('Array of key-value pairs for tagging.'), + deploymentPrefix: z.string().optional().describe('Prefix for deployment resources.'), + webAppAssetsPath: z.string().optional().describe('Optional path to precompiled webapp assets. If not specified the web application will be built at deploy time.'), lambdaLayerAssets: z .object({ - authorizerLayerPath: z.string().optional(), - commonLayerPath: z.string().optional(), - fastapiLayerPath: z.string().optional(), - ragLayerPath: z.string().optional(), - sdkLayerPath: z.string().optional(), + authorizerLayerPath: z.string().optional().describe('Lambda Authorizer code path'), + commonLayerPath: z.string().optional().describe('Lambda common layer code path'), + fastapiLayerPath: z.string().optional().describe('Lambda API code path'), + ragLayerPath: z.string().optional().describe('Lambda RAG layer code path'), + sdkLayerPath: z.string().optional().describe('Lambda SDK layer code path'), }) - .optional(), - systemBanner: z - .object({ - text: z.string(), - backgroundColor: z.string(), - fontColor: z.string(), - }) - .optional(), + .optional() + .describe('Configuration for local Lambda layer code'), permissionsBoundaryAspect: z .object({ permissionsBoundaryPolicyName: z.string(), @@ -897,8 +592,9 @@ const RawConfigSchema = z policyPrefix: z.string().max(20).optional(), instanceProfilePrefix: z.string().optional(), }) - .optional(), - stackSynthesizer: z.nativeEnum(stackSynthesizerType).optional(), + .optional() + .describe('Aspect CDK injector for permissions. Ref: https://docs.aws.amazon.com/cdk/api/v2/docs/aws-cdk-lib.aws_iam.PermissionsBoundary.html'), + stackSynthesizer: z.nativeEnum(stackSynthesizerType).optional().describe('Set the stack synthesize type. Ref: https://docs.aws.amazon.com/cdk/api/v2/docs/aws-cdk-lib.StackSynthesizer.html'), litellmConfig: LiteLLMConfig, }) .refine((config) => (config.pypiConfig.indexUrl && config.region.includes('iso')) || !config.region.includes('iso'), { @@ -912,25 +608,34 @@ const RawConfigSchema = z message: 'Chat stack is needed for UI stack. You must set deployChat to true if deployUi is true.', }, ) + .refine( + (config) => { + return !(config.deployRag && !config.deployUi); + }, + { + message: 'UI Stack is needed for Rag stack. You must set deployUI to true if deployRag is true.', + }, + ) .refine( (config) => { return ( - !(config.deployChat || config.deployRag || config.deployUi || config.restApiConfig.internetFacing) || - config.authConfig + !(config.deployChat || config.deployRag || config.deployUi) || + config.authConfig ); }, { message: - 'An auth config must be provided when deploying the chat, RAG, or UI stacks or when deploying an internet ' + - 'facing ALB. Check that `deployChat`, `deployRag`, `deployUi`, and `restApiConfig.internetFacing` are all ' + - 'false or that an `authConfig` is provided.', + 'An auth config must be provided when deploying the chat, RAG, or UI stacks or when deploying an internet ' + + 'facing ALB. Check that `deployChat`, `deployRag`, `deployUi`, and `restApiConfig.internetFacing` are all ' + + 'false or that an `authConfig` is provided.', }, - ); + ) + .describe('Raw application configuration schema.'); /** * Apply transformations to the raw application configuration schema. * - * @param {Object} rawConfig - The raw application configuration. + * @param {Object} rawConfig - .describe('The raw application configuration.') * @returns {Object} The transformed application configuration. */ export const ConfigSchema = RawConfigSchema.transform((rawConfig) => { @@ -976,12 +681,10 @@ export const ConfigSchema = RawConfigSchema.transform((rawConfig) => { */ export type Config = z.infer; -export type FastApiContainerConfig = z.infer; - /** * Basic properties required for a stack definition in CDK. * - * @property {Config} config - The application configuration. + * @property {Config} config - .describe('The application configuration.') */ export type BaseProps = { config: Config; diff --git a/lib/serve/ecs-model/textgen/tgi/src/entrypoint.sh b/lib/serve/ecs-model/textgen/tgi/src/entrypoint.sh index be6270fc..9486e10f 100644 --- a/lib/serve/ecs-model/textgen/tgi/src/entrypoint.sh +++ b/lib/serve/ecs-model/textgen/tgi/src/entrypoint.sh @@ -27,11 +27,29 @@ echo "Setting environment variables" export MAX_CONCURRENT_REQUESTS="${MAX_CONCURRENT_REQUESTS}" export MAX_INPUT_LENGTH="${MAX_INPUT_LENGTH}" export MAX_TOTAL_TOKENS="${MAX_TOTAL_TOKENS}" + +startArgs=() + if [[ -n "${QUANTIZE}" ]]; then export QUANTIZE="${QUANTIZE}" + startArgs+=('--quantize' "${QUANTIZE}") +fi +# Check if CUDA_VISIBLE_DEVICES is set, otherwise set it to use GPU 0 +if [[ -z "${CUDA_VISIBLE_DEVICES}" ]]; then + export CUDA_VISIBLE_DEVICES="0" +fi +# Check if number of shards is set, otherwise set it to use 1 +if [[ -z "${NUM_SHARD}" ]]; then + export NUM_SHARD="${NUM_SHARD:-1}" fi echo "$(env)" +startArgs+=('--model-id' "${LOCAL_MODEL_PATH}") +startArgs+=('--port' '8080') +startArgs+=('--num-shard' "${NUM_SHARD}") +startArgs+=('--json-output') + # Start the webserver echo "Starting TGI" -text-generation-launcher --model-id $LOCAL_MODEL_PATH --port 8080 --json-output +CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES} \ +text-generation-launcher "${startArgs[@]}" diff --git a/lib/serve/index.ts b/lib/serve/index.ts index e92dbcd0..f8f35def 100644 --- a/lib/serve/index.ts +++ b/lib/serve/index.ts @@ -56,7 +56,6 @@ export class LisaServeApplicationStack extends Stack { super(scope, id, props); const { config, vpc } = props; - const rdsConfig = config.restApiConfig.rdsConfig; let tokenTable; if (config.restApiConfig.internetFacing) { @@ -79,9 +78,8 @@ export class LisaServeApplicationStack extends Stack { config: config, resourcePath: path.join(HERE, 'rest-api'), securityGroup: vpc.securityGroups.restApiAlbSg, - taskConfig: config.restApiConfig, tokenTable: tokenTable, - vpc: vpc.vpc, + vpc: vpc, }); const managementKeySecret = new Secret(this, createCdkId([id, 'managementKeySecret']), { @@ -101,6 +99,11 @@ export class LisaServeApplicationStack extends Stack { // const rotateManagementKeyLambdaId = createCdkId([id, 'RotateManagementKeyLambda']) // const rotateManagementKeyLambda = new Function(this, rotateManagementKeyLambdaId, { + // deadLetterQueueEnabled: true, + // deadLetterQueue: new Queue(this, 'RotateManagementKeyLambdaDLQ', { + // queueName: 'RotateManagementKeyLambdaDLQ', + // enforceSSL: true, + // }), // functionName: rotateManagementKeyLambdaId, // runtime: config.lambdaConfig.pythonRuntime, // handler: 'management_key.rotate_management_key', @@ -112,6 +115,7 @@ export class LisaServeApplicationStack extends Stack { // }, // layers: [commonLambdaLayer], // vpc: props.vpc.vpc, + // reservedConcurrentExecutions: 5, // }); // managementKeySecret.grantRead(rotateManagementKeyLambda); @@ -142,15 +146,17 @@ export class LisaServeApplicationStack extends Stack { vpc: vpc.vpc, description: 'Security group for LiteLLM dynamic model management database.', }); - vpc.vpc.isolatedSubnets.concat(vpc.vpc.privateSubnets).forEach((subnet) => { + + const subNets = config.subnets && config.vpcId ? vpc.subnetSelection?.subnets : vpc.vpc.isolatedSubnets.concat(vpc.vpc.privateSubnets); + subNets?.forEach((subnet) => { litellmDbSg.connections.allowFrom( - Peer.ipv4(subnet.ipv4CidrBlock), - Port.tcp(rdsConfig.dbPort), + Peer.ipv4(config.subnets ? config.subnets.filter((filteredSubnet) => filteredSubnet.subnetId === subnet.subnetId)?.[0]?.ipv4CidrBlock : subnet.ipv4CidrBlock), + Port.tcp(config.restApiConfig.rdsConfig.dbPort), 'Allow REST API private subnets to communicate with LiteLLM database', ); }); - const username = rdsConfig.username; + const username = config.restApiConfig.rdsConfig.username; const dbCreds = Credentials.fromGeneratedSecret(username); // DB is a Single AZ instance for cost + inability to make non-Aurora multi-AZ cluster in CDK @@ -159,6 +165,7 @@ export class LisaServeApplicationStack extends Stack { const litellmDb = new DatabaseInstance(this, 'LiteLLMScalingDB', { engine: DatabaseInstanceEngine.POSTGRES, vpc: vpc.vpc, + subnetGroup: vpc.subnetGroup, credentials: dbCreds, securityGroups: [litellmDbSg!], removalPolicy: config.removalPolicy, @@ -171,8 +178,8 @@ export class LisaServeApplicationStack extends Stack { username: username, passwordSecretId: litellmDbPasswordSecret.secretName, dbHost: litellmDb.dbInstanceEndpointAddress, - dbName: rdsConfig.dbName, - dbPort: rdsConfig.dbPort, + dbName: config.restApiConfig.rdsConfig.dbName, + dbPort: config.restApiConfig.rdsConfig.dbPort, }), }); litellmDbPasswordSecret.grantRead(restApi.taskRole); diff --git a/lib/serve/rest-api/src/requirements.txt b/lib/serve/rest-api/src/requirements.txt index d533279a..2931cbc8 100644 --- a/lib/serve/rest-api/src/requirements.txt +++ b/lib/serve/rest-api/src/requirements.txt @@ -7,7 +7,7 @@ cryptography==42.0.8 fastapi==0.111.1 fastapi_utils==0.7.0 gunicorn==22.0.0 -litellm[proxy]==1.43.4 +litellm[proxy]==1.50.4 loguru==0.7.2 pydantic==2.8.2 PyJWT==2.9.0 diff --git a/lib/serve/rest-api/src/utils/generate_litellm_config.py b/lib/serve/rest-api/src/utils/generate_litellm_config.py index 4d9f06a6..9e5bec09 100644 --- a/lib/serve/rest-api/src/utils/generate_litellm_config.py +++ b/lib/serve/rest-api/src/utils/generate_litellm_config.py @@ -49,7 +49,7 @@ def generate_config(filepath: str) -> None: } for model in registered_models ] - config_models = config_contents["model_list"] or [] # ensure config_models is a list and not None + config_models = [] # ensure config_models is a list and not None config_models.extend(litellm_model_params) config_contents["model_list"] = config_models config_contents["litellm_settings"] = { @@ -67,11 +67,13 @@ def generate_config(filepath: str) -> None: f"/{db_params['dbName']}" ) - general_settings = config_contents["general_settings"] - general_settings.update( + config_contents.update( { - "store_model_in_db": True, - "database_url": connection_str, + "general_settings": { + "store_model_in_db": True, + "database_url": connection_str, + "master_key": config_contents["db_key"], + } } ) diff --git a/lib/stages.ts b/lib/stages.ts index e92925cc..e1edff73 100644 --- a/lib/stages.ts +++ b/lib/stages.ts @@ -28,6 +28,7 @@ import { Tags, } from 'aws-cdk-lib'; import { Construct } from 'constructs'; +import { AwsSolutionsChecks, NIST80053R5Checks } from 'cdk-nag'; import { LisaChatApplicationStack } from './chat'; import { CoreStack, ARCHITECTURE } from './core'; @@ -41,6 +42,7 @@ import { LisaRagStack } from './rag'; import { BaseProps, stackSynthesizerType } from './schema'; import { LisaServeApplicationStack } from './serve'; import { UserInterfaceStack } from './user-interface'; +import { LisaDocsStack } from './docs'; type CustomLisaServeApplicationStageProps = {} & BaseProps; type LisaServeApplicationStageProps = CustomLisaServeApplicationStageProps & StageProps; @@ -100,7 +102,7 @@ export class LisaServeApplicationStage extends Stage { baseStackProps.synthesizer = new DefaultStackSynthesizer(); break; default: - throw Error('Unrecognized config value: "stackSyntehsizer"'); + throw Error('Unrecognized config value: "stackSynthesizer"'); } } @@ -142,7 +144,7 @@ export class LisaServeApplicationStage extends Stage { ...baseStackProps, stackName: createCdkId([config.deploymentName, config.appName, 'API']), description: `LISA-API: ${config.deploymentName}-${config.deploymentStage}`, - vpc: networkingStack.vpc.vpc, + vpc: networkingStack.vpc, }); apiBaseStack.addDependency(coreStack); apiBaseStack.addDependency(serveStack); @@ -170,57 +172,68 @@ export class LisaServeApplicationStage extends Stage { apiDeploymentStack.addDependency(modelsApiDeploymentStack); stacks.push(modelsApiDeploymentStack); - const chatStack = new LisaChatApplicationStack(this, 'LisaChat', { - ...baseStackProps, - authorizer: apiBaseStack.authorizer, - stackName: createCdkId([config.deploymentName, config.appName, 'chat', config.deploymentStage]), - description: `LISA-chat: ${config.deploymentName}-${config.deploymentStage}`, - restApiId: apiBaseStack.restApiId, - rootResourceId: apiBaseStack.rootResourceId, - vpc: networkingStack.vpc.vpc, - }); - chatStack.addDependency(apiBaseStack); - chatStack.addDependency(coreStack); - apiDeploymentStack.addDependency(chatStack); - stacks.push(chatStack); - - const uiStack = new UserInterfaceStack(this, 'LisaUserInterface', { - ...baseStackProps, - architecture: ARCHITECTURE, - stackName: createCdkId([config.deploymentName, config.appName, 'ui', config.deploymentStage]), - description: `LISA-user-interface: ${config.deploymentName}-${config.deploymentStage}`, - restApiId: apiBaseStack.restApiId, - rootResourceId: apiBaseStack.rootResourceId, - }); - uiStack.addDependency(chatStack); - uiStack.addDependency(serveStack); - uiStack.addDependency(apiBaseStack); - apiDeploymentStack.addDependency(uiStack); - stacks.push(uiStack); - - if (config.deployRag) { - const ragStack = new LisaRagStack(this, 'LisaRAG', { + if (config.deployChat) { + const chatStack = new LisaChatApplicationStack(this, 'LisaChat', { ...baseStackProps, authorizer: apiBaseStack.authorizer, - description: `LISA-rag: ${config.deploymentName}-${config.deploymentStage}`, - endpointUrl: serveStack.endpointUrl, - modelsPs: serveStack.modelsPs, + stackName: createCdkId([config.deploymentName, config.appName, 'chat', config.deploymentStage]), + description: `LISA-chat: ${config.deploymentName}-${config.deploymentStage}`, restApiId: apiBaseStack.restApiId, rootResourceId: apiBaseStack.rootResourceId, - stackName: createCdkId([config.deploymentName, config.appName, 'rag', config.deploymentStage]), vpc: networkingStack.vpc, }); - ragStack.addDependency(coreStack); - ragStack.addDependency(iamStack); - ragStack.addDependency(apiBaseStack); - stacks.push(ragStack); - - if (config.deployRag) { - uiStack.addDependency(ragStack); - apiDeploymentStack.addDependency(ragStack); + chatStack.addDependency(apiBaseStack); + chatStack.addDependency(coreStack); + apiDeploymentStack.addDependency(chatStack); + stacks.push(chatStack); + + if (config.deployUi) { + const uiStack = new UserInterfaceStack(this, 'LisaUserInterface', { + ...baseStackProps, + architecture: ARCHITECTURE, + stackName: createCdkId([config.deploymentName, config.appName, 'ui', config.deploymentStage]), + description: `LISA-user-interface: ${config.deploymentName}-${config.deploymentStage}`, + restApiId: apiBaseStack.restApiId, + rootResourceId: apiBaseStack.rootResourceId, + }); + uiStack.addDependency(chatStack); + uiStack.addDependency(serveStack); + uiStack.addDependency(apiBaseStack); + apiDeploymentStack.addDependency(uiStack); + stacks.push(uiStack); + + if (config.deployRag) { + const ragStack = new LisaRagStack(this, 'LisaRAG', { + ...baseStackProps, + authorizer: apiBaseStack.authorizer, + description: `LISA-rag: ${config.deploymentName}-${config.deploymentStage}`, + endpointUrl: serveStack.endpointUrl, + modelsPs: serveStack.modelsPs, + restApiId: apiBaseStack.restApiId, + rootResourceId: apiBaseStack.rootResourceId, + stackName: createCdkId([config.deploymentName, config.appName, 'rag', config.deploymentStage]), + vpc: networkingStack.vpc, + }); + ragStack.addDependency(coreStack); + ragStack.addDependency(iamStack); + ragStack.addDependency(apiBaseStack); + stacks.push(ragStack); + + if (config.deployRag) { + uiStack.addDependency(ragStack); + apiDeploymentStack.addDependency(ragStack); + } + } } } + if (config.deployDocs) { + const docsStack = new LisaDocsStack(this, 'LisaDocs', { + ...baseStackProps + }); + stacks.push(docsStack); + } + stacks.push(apiDeploymentStack); // Set resource tags @@ -238,6 +251,14 @@ export class LisaServeApplicationStage extends Stage { }); } + // Run CDK-nag on app if specified + if (config.runCdkNag) { + stacks.forEach((lisaStack) => { + Aspects.of(lisaStack).add(new AwsSolutionsChecks({ reports: true, verbose: true })); + Aspects.of(lisaStack).add(new NIST80053R5Checks({ reports: true, verbose: true })); + }); + } + // Enforce updates to EC2 launch templates Aspects.of(this).add(new UpdateLaunchTemplateMetadataOptions()); } diff --git a/lib/user-interface/index.ts b/lib/user-interface/index.ts index 2c3c1944..0c4ce8a6 100644 --- a/lib/user-interface/index.ts +++ b/lib/user-interface/index.ts @@ -161,8 +161,6 @@ export class UserInterfaceStack extends Stack { }, ); - const litellmModels = config.litellmConfig.model_list ? config.litellmConfig.model_list : []; - // Website bucket deployment // Copy auth and LISA-Serve info to UI deployment bucket @@ -177,15 +175,9 @@ export class UserInterfaceStack extends Stack { createCdkId(['LisaRestApiUri', 'StringParameter']), `${config.deploymentPrefix}/lisaServeRestApiUri`, ).stringValue, - RESTAPI_VERSION: config.restApiConfig.apiVersion, + RESTAPI_VERSION: 'v2', RAG_ENABLED: config.deployRag, - SYSTEM_BANNER: { - text: config.systemBanner?.text, - backgroundColor: config.systemBanner?.backgroundColor, - fontColor: config.systemBanner?.fontColor, - }, API_BASE_URL: config.apiGatewayConfig?.domainName ? '/' : `/${config.deploymentStage}/`, - MODELS: litellmModels, }; const appEnvSource = Source.data('env.js', `window.env = ${JSON.stringify(appEnvConfig)}`); diff --git a/lib/user-interface/react/package-lock.json b/lib/user-interface/react/package-lock.json index 3fd22531..a5d4e3af 100644 --- a/lib/user-interface/react/package-lock.json +++ b/lib/user-interface/react/package-lock.json @@ -12,12 +12,13 @@ "@cloudscape-design/component-toolkit": "^1.0.0-beta.65", "@cloudscape-design/components": "^3.0.638", "@cloudscape-design/global-styles": "^1.0.12", - "@langchain/core": "^0.1.22", + "@langchain/core": "^0.3.5", + "@langchain/openai": "^0.3.11", "@microsoft/fetch-event-source": "^2.0.1", "@reduxjs/toolkit": "^1.9.5", "axios": "^1.7.4", "git-repo-info": "^2.1.1", - "langchain": "^0.1.12", + "langchain": "^0.3.5", "lodash": "^4.17.21", "luxon": "^3.4.0", "react": "^18.2.0", @@ -77,30 +78,6 @@ "url": "https://github.com/sponsors/sindresorhus" } }, - "node_modules/@anthropic-ai/sdk": { - "version": "0.9.1", - "resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.9.1.tgz", - "integrity": "sha512-wa1meQ2WSfoY8Uor3EdrJq0jTiZJoKoSii2ZVWRY1oN4Tlr5s59pADg9T79FTbPe1/se5c3pBeZgJL63wmuoBA==", - "dependencies": { - "@types/node": "^18.11.18", - "@types/node-fetch": "^2.6.4", - "abort-controller": "^3.0.0", - "agentkeepalive": "^4.2.1", - "digest-fetch": "^1.3.0", - "form-data-encoder": "1.7.2", - "formdata-node": "^4.3.2", - "node-fetch": "^2.6.7", - "web-streams-polyfill": "^3.2.1" - } - }, - "node_modules/@anthropic-ai/sdk/node_modules/@types/node": { - "version": "18.19.14", - "resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.14.tgz", - "integrity": "sha512-EnQ4Us2rmOS64nHDWr0XqAD8DsO6f3XR6lf9UIIrZQpUzPVdN/oPuEzfDWNHSyXLvoGgjuEm/sPwFGSSs35Wtg==", - "dependencies": { - "undici-types": "~5.26.4" - } - }, "node_modules/@babel/runtime": { "version": "7.22.6", "resolved": "https://registry.npmjs.org/@babel/runtime/-/runtime-7.22.6.tgz", @@ -764,9 +741,9 @@ } }, "node_modules/@jridgewell/sourcemap-codec": { - "version": "1.4.15", - "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.4.15.tgz", - "integrity": "sha512-eF2rxCRulEKXHTRiDrDy6erMYWqNw4LPdQ8UQA4huuxaQsVeRPFl2oM8oDGxMFhJUWZf9McpLtJasDDZb/Bpeg==" + "version": "1.5.0", + "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.0.tgz", + "integrity": "sha512-gv3ZRaISU3fjPAgNsriBRqGWQL6quFx04YMPW/zD8XMLsU32mhCCbfbO6KZFLjvYpCZ8zyDEgqsgf+PwPaM7GQ==" }, "node_modules/@jridgewell/trace-mapping": { "version": "0.3.25", @@ -782,364 +759,21 @@ "resolved": "https://registry.npmjs.org/@juggle/resize-observer/-/resize-observer-3.4.0.tgz", "integrity": "sha512-dfLbk+PwWvFzSxwk3n5ySL0hfBog779o8h68wK/7/APo/7cgyWp5jcXockbxdk5kFRkbeXWm4Fbi9FrdN381sA==" }, - "node_modules/@langchain/community": { - "version": "0.0.26", - "resolved": "https://registry.npmjs.org/@langchain/community/-/community-0.0.26.tgz", - "integrity": "sha512-E5/lltEkkRCxA9WQ/IpdTWUBj5gaCOYuf6r2MX4ZNTR5gfaZkHdLQWF1rew6uG3Z7XjRMMtIxxT9jS7me6sRRA==", - "dependencies": { - "@langchain/core": "~0.1.16", - "@langchain/openai": "~0.0.10", - "flat": "^5.0.2", - "langsmith": "~0.0.48", - "uuid": "^9.0.0", - "zod": "^3.22.3" - }, - "engines": { - "node": ">=18" - }, - "peerDependencies": { - "@aws-crypto/sha256-js": "^5.0.0", - "@aws-sdk/client-bedrock-agent-runtime": "^3.485.0", - "@aws-sdk/client-bedrock-runtime": "^3.422.0", - "@aws-sdk/client-dynamodb": "^3.310.0", - "@aws-sdk/client-kendra": "^3.352.0", - "@aws-sdk/client-lambda": "^3.310.0", - "@aws-sdk/client-sagemaker-runtime": "^3.310.0", - "@aws-sdk/client-sfn": "^3.310.0", - "@aws-sdk/credential-provider-node": "^3.388.0", - "@azure/search-documents": "^12.0.0", - "@clickhouse/client": "^0.2.5", - "@cloudflare/ai": "*", - "@datastax/astra-db-ts": "^0.1.4", - "@elastic/elasticsearch": "^8.4.0", - "@getmetal/metal-sdk": "*", - "@getzep/zep-js": "^0.9.0", - "@gomomento/sdk": "^1.51.1", - "@gomomento/sdk-core": "^1.51.1", - "@google-ai/generativelanguage": "^0.2.1", - "@gradientai/nodejs-sdk": "^1.2.0", - "@huggingface/inference": "^2.6.4", - "@mozilla/readability": "*", - "@opensearch-project/opensearch": "*", - "@pinecone-database/pinecone": "*", - "@planetscale/database": "^1.8.0", - "@qdrant/js-client-rest": "^1.2.0", - "@raycast/api": "^1.55.2", - "@rockset/client": "^0.9.1", - "@smithy/eventstream-codec": "^2.0.5", - "@smithy/protocol-http": "^3.0.6", - "@smithy/signature-v4": "^2.0.10", - "@smithy/util-utf8": "^2.0.0", - "@supabase/postgrest-js": "^1.1.1", - "@supabase/supabase-js": "^2.10.0", - "@tensorflow-models/universal-sentence-encoder": "*", - "@tensorflow/tfjs-converter": "*", - "@tensorflow/tfjs-core": "*", - "@upstash/redis": "^1.20.6", - "@vercel/kv": "^0.2.3", - "@vercel/postgres": "^0.5.0", - "@writerai/writer-sdk": "^0.40.2", - "@xata.io/client": "^0.28.0", - "@xenova/transformers": "^2.5.4", - "@zilliz/milvus2-sdk-node": ">=2.2.7", - "cassandra-driver": "^4.7.2", - "chromadb": "*", - "closevector-common": "0.1.0-alpha.1", - "closevector-node": "0.1.0-alpha.10", - "closevector-web": "0.1.0-alpha.16", - "cohere-ai": "*", - "convex": "^1.3.1", - "discord.js": "^14.14.1", - "faiss-node": "^0.5.1", - "firebase-admin": "^11.9.0", - "google-auth-library": "^8.9.0", - "googleapis": "^126.0.1", - "hnswlib-node": "^1.4.2", - "html-to-text": "^9.0.5", - "ioredis": "^5.3.2", - "jsdom": "*", - "llmonitor": "^0.5.9", - "lodash": "^4.17.21", - "lunary": "^0.6.11", - "mongodb": "^5.2.0", - "mysql2": "^3.3.3", - "neo4j-driver": "*", - "node-llama-cpp": "*", - "pg": "^8.11.0", - "pg-copy-streams": "^6.0.5", - "pickleparser": "^0.2.1", - "portkey-ai": "^0.1.11", - "redis": "^4.6.4", - "replicate": "^0.18.0", - "typeorm": "^0.3.12", - "typesense": "^1.5.3", - "usearch": "^1.1.1", - "vectordb": "^0.1.4", - "voy-search": "0.6.2", - "weaviate-ts-client": "^1.4.0", - "web-auth-library": "^1.0.3", - "ws": "^8.14.2" - }, - "peerDependenciesMeta": { - "@aws-crypto/sha256-js": { - "optional": true - }, - "@aws-sdk/client-bedrock-agent-runtime": { - "optional": true - }, - "@aws-sdk/client-bedrock-runtime": { - "optional": true - }, - "@aws-sdk/client-dynamodb": { - "optional": true - }, - "@aws-sdk/client-kendra": { - "optional": true - }, - "@aws-sdk/client-lambda": { - "optional": true - }, - "@aws-sdk/client-sagemaker-runtime": { - "optional": true - }, - "@aws-sdk/client-sfn": { - "optional": true - }, - "@aws-sdk/credential-provider-node": { - "optional": true - }, - "@azure/search-documents": { - "optional": true - }, - "@clickhouse/client": { - "optional": true - }, - "@cloudflare/ai": { - "optional": true - }, - "@datastax/astra-db-ts": { - "optional": true - }, - "@elastic/elasticsearch": { - "optional": true - }, - "@getmetal/metal-sdk": { - "optional": true - }, - "@getzep/zep-js": { - "optional": true - }, - "@gomomento/sdk": { - "optional": true - }, - "@gomomento/sdk-core": { - "optional": true - }, - "@google-ai/generativelanguage": { - "optional": true - }, - "@gradientai/nodejs-sdk": { - "optional": true - }, - "@huggingface/inference": { - "optional": true - }, - "@mozilla/readability": { - "optional": true - }, - "@opensearch-project/opensearch": { - "optional": true - }, - "@pinecone-database/pinecone": { - "optional": true - }, - "@planetscale/database": { - "optional": true - }, - "@qdrant/js-client-rest": { - "optional": true - }, - "@raycast/api": { - "optional": true - }, - "@rockset/client": { - "optional": true - }, - "@smithy/eventstream-codec": { - "optional": true - }, - "@smithy/protocol-http": { - "optional": true - }, - "@smithy/signature-v4": { - "optional": true - }, - "@smithy/util-utf8": { - "optional": true - }, - "@supabase/postgrest-js": { - "optional": true - }, - "@supabase/supabase-js": { - "optional": true - }, - "@tensorflow-models/universal-sentence-encoder": { - "optional": true - }, - "@tensorflow/tfjs-converter": { - "optional": true - }, - "@tensorflow/tfjs-core": { - "optional": true - }, - "@upstash/redis": { - "optional": true - }, - "@vercel/kv": { - "optional": true - }, - "@vercel/postgres": { - "optional": true - }, - "@writerai/writer-sdk": { - "optional": true - }, - "@xata.io/client": { - "optional": true - }, - "@xenova/transformers": { - "optional": true - }, - "@zilliz/milvus2-sdk-node": { - "optional": true - }, - "cassandra-driver": { - "optional": true - }, - "chromadb": { - "optional": true - }, - "closevector-common": { - "optional": true - }, - "closevector-node": { - "optional": true - }, - "closevector-web": { - "optional": true - }, - "cohere-ai": { - "optional": true - }, - "convex": { - "optional": true - }, - "discord.js": { - "optional": true - }, - "faiss-node": { - "optional": true - }, - "firebase-admin": { - "optional": true - }, - "google-auth-library": { - "optional": true - }, - "googleapis": { - "optional": true - }, - "hnswlib-node": { - "optional": true - }, - "html-to-text": { - "optional": true - }, - "ioredis": { - "optional": true - }, - "jsdom": { - "optional": true - }, - "llmonitor": { - "optional": true - }, - "lodash": { - "optional": true - }, - "lunary": { - "optional": true - }, - "mongodb": { - "optional": true - }, - "mysql2": { - "optional": true - }, - "neo4j-driver": { - "optional": true - }, - "node-llama-cpp": { - "optional": true - }, - "pg": { - "optional": true - }, - "pg-copy-streams": { - "optional": true - }, - "pickleparser": { - "optional": true - }, - "portkey-ai": { - "optional": true - }, - "redis": { - "optional": true - }, - "replicate": { - "optional": true - }, - "typeorm": { - "optional": true - }, - "typesense": { - "optional": true - }, - "usearch": { - "optional": true - }, - "vectordb": { - "optional": true - }, - "voy-search": { - "optional": true - }, - "weaviate-ts-client": { - "optional": true - }, - "web-auth-library": { - "optional": true - }, - "ws": { - "optional": true - } - } - }, "node_modules/@langchain/core": { - "version": "0.1.22", - "resolved": "https://registry.npmjs.org/@langchain/core/-/core-0.1.22.tgz", - "integrity": "sha512-I3KMv87D5AFeAvuJhzaGOYdppFL4h/bRm7LeJfwF2PspQIZwvDE9GP7hkw4n+7jwNaBxjU8ZTj6o3LZAh1R5LQ==", + "version": "0.3.16", + "resolved": "https://registry.npmjs.org/@langchain/core/-/core-0.3.16.tgz", + "integrity": "sha512-g83M2Z1XlhECFUtT4C7XLsVVGt2Hk3Y/KhS5tZSsz+Gqtxwd790/MD7MxdUHpZj0VKkvrFuWARWpJmNKlkiY+g==", + "license": "MIT", "dependencies": { "ansi-styles": "^5.0.0", "camelcase": "6", "decamelize": "1.2.0", - "js-tiktoken": "^1.0.8", - "langsmith": "~0.0.48", - "ml-distance": "^4.0.0", + "js-tiktoken": "^1.0.12", + "langsmith": "^0.2.0", + "mustache": "^4.2.0", "p-queue": "^6.6.2", "p-retry": "4", - "uuid": "^9.0.0", + "uuid": "^10.0.0", "zod": "^3.22.4", "zod-to-json-schema": "^3.22.3" }, @@ -1169,19 +803,35 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/@langchain/core/node_modules/uuid": { + "version": "10.0.0", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-10.0.0.tgz", + "integrity": "sha512-8XkAphELsDnEGrDxUOHB3RGvXz6TeuYSGEZBOjtTtPm2lwhGBjLgOzLHB63IUWfBpNucQjND6d3AOudO+H3RWQ==", + "funding": [ + "https://github.com/sponsors/broofa", + "https://github.com/sponsors/ctavan" + ], + "license": "MIT", + "bin": { + "uuid": "dist/bin/uuid" + } + }, "node_modules/@langchain/openai": { - "version": "0.0.14", - "resolved": "https://registry.npmjs.org/@langchain/openai/-/openai-0.0.14.tgz", - "integrity": "sha512-co6nRylPrLGY/C3JYxhHt6cxLq07P086O7K3QaZH7SFFErIN9wSzJonpvhZR07DEUq6eK6wKgh2ORxA/NcjSRQ==", + "version": "0.3.11", + "resolved": "https://registry.npmjs.org/@langchain/openai/-/openai-0.3.11.tgz", + "integrity": "sha512-mEFbpJ8w8NPArsquUlCwxvZTKNkXxqwzvTEYzv6Jb7gUoBDOZtwLg6AdcngTJ+w5VFh3wxgPy0g3zb9Aw0Qbpw==", + "license": "MIT", "dependencies": { - "@langchain/core": "~0.1.13", - "js-tiktoken": "^1.0.7", - "openai": "^4.26.0", + "js-tiktoken": "^1.0.12", + "openai": "^4.68.0", "zod": "^3.22.4", "zod-to-json-schema": "^3.22.3" }, "engines": { "node": ">=18" + }, + "peerDependencies": { + "@langchain/core": ">=0.2.26 <0.4.0" } }, "node_modules/@microsoft/fetch-event-source": { @@ -1604,7 +1254,8 @@ "node_modules/@types/uuid": { "version": "9.0.2", "resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-9.0.2.tgz", - "integrity": "sha512-kNnC1GFBLuhImSnV7w4njQkUiJi0ZXUycu1rUaouPqiKlXkh77JKgdRnTAp1x5eBwcIwbtI+3otwzuIDEuDoxQ==" + "integrity": "sha512-kNnC1GFBLuhImSnV7w4njQkUiJi0ZXUycu1rUaouPqiKlXkh77JKgdRnTAp1x5eBwcIwbtI+3otwzuIDEuDoxQ==", + "dev": true }, "node_modules/@typescript-eslint/eslint-plugin": { "version": "6.21.0", @@ -2130,11 +1781,6 @@ "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==" }, - "node_modules/base-64": { - "version": "0.1.0", - "resolved": "https://registry.npmjs.org/base-64/-/base-64-0.1.0.tgz", - "integrity": "sha512-Y5gU45svrR5tI2Vt/X9GPd3L0HNIKzGu202EjxrXMpuc2V2CiKgemAbUUsqYmZJvPtCXoUKjNZwBJzsNScUbXA==" - }, "node_modules/base64-js": { "version": "1.5.1", "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz", @@ -2171,11 +1817,6 @@ "node": ">=8" } }, - "node_modules/binary-search": { - "version": "1.3.6", - "resolved": "https://registry.npmjs.org/binary-search/-/binary-search-1.3.6.tgz", - "integrity": "sha512-nbE1WxOTTrUWIfsfZ4aHGYu5DOuNkbxGokjV6Z2kxfJK3uaAb8zNK1muzOeipoLHZjInT4Br88BHpzevc681xA==" - }, "node_modules/bplist-parser": { "version": "0.2.0", "resolved": "https://registry.npmjs.org/bplist-parser/-/bplist-parser-0.2.0.tgz", @@ -2338,14 +1979,6 @@ "url": "https://github.com/sponsors/wooorm" } }, - "node_modules/charenc": { - "version": "0.0.2", - "resolved": "https://registry.npmjs.org/charenc/-/charenc-0.0.2.tgz", - "integrity": "sha512-yrLQ/yVUFXkzg7EDQsPieE/53+0RlaWTs+wBrvW36cyilJ2SaDWfl4Yj7MtLTXleV9uEKefbAGUPv2/iWSooRA==", - "engines": { - "node": "*" - } - }, "node_modules/chokidar": { "version": "3.5.3", "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-3.5.3.tgz", @@ -2429,6 +2062,14 @@ "url": "https://github.com/sponsors/wooorm" } }, + "node_modules/commander": { + "version": "10.0.1", + "resolved": "https://registry.npmjs.org/commander/-/commander-10.0.1.tgz", + "integrity": "sha512-y4Mg2tXshplEbSGzx7amzPwKKOCGuoSRP/CjEdwwk0FOGlUbq6lKuoyDZTNZkmxHdJtp54hdfY/JUrdL7Xfdug==", + "engines": { + "node": ">=14" + } + }, "node_modules/concat-map": { "version": "0.0.1", "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", @@ -2448,14 +2089,6 @@ "node": ">= 8" } }, - "node_modules/crypt": { - "version": "0.0.2", - "resolved": "https://registry.npmjs.org/crypt/-/crypt-0.0.2.tgz", - "integrity": "sha512-mCxBlsHFYh9C+HVpiEacem8FEBnMXgU9gy4zmNC+SXAZNB/1idgp/aulFJ4FgCi7GPEVbfyng092GqL2k2rmow==", - "engines": { - "node": "*" - } - }, "node_modules/crypto-js": { "version": "4.2.0", "resolved": "https://registry.npmjs.org/crypto-js/-/crypto-js-4.2.0.tgz", @@ -2488,9 +2121,9 @@ } }, "node_modules/csstype": { - "version": "3.1.2", - "resolved": "https://registry.npmjs.org/csstype/-/csstype-3.1.2.tgz", - "integrity": "sha512-I7K1Uu0MBPzaFKg4nI5Q7Vs2t+3gWWW648spaF+Rg7pI9ds18Ugn+lvg4SHczUdKlHI5LWBXyqfS8+DufyBsgQ==" + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/csstype/-/csstype-3.1.3.tgz", + "integrity": "sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw==" }, "node_modules/d3-path": { "version": "1.0.9", @@ -2653,15 +2286,6 @@ "node": ">=0.3.1" } }, - "node_modules/digest-fetch": { - "version": "1.3.0", - "resolved": "https://registry.npmjs.org/digest-fetch/-/digest-fetch-1.3.0.tgz", - "integrity": "sha512-CGJuv6iKNM7QyZlM2T3sPAdZWd/p9zQiRNS9G+9COUCwzWFTs0Xp8NF5iePx7wtvhDykReiRRrSeNb4oMmB8lA==", - "dependencies": { - "base-64": "^0.1.0", - "md5": "^2.3.0" - } - }, "node_modules/dir-glob": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/dir-glob/-/dir-glob-3.0.1.tgz", @@ -3202,11 +2826,6 @@ "url": "https://github.com/sindresorhus/execa?sponsor=1" } }, - "node_modules/expr-eval": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/expr-eval/-/expr-eval-2.0.2.tgz", - "integrity": "sha512-4EMSHGOPSwAfBiibw3ndnP0AvjDWLsMvGOvWEZ2F96IGk0bIVdjQisOHxReSkE13mHcfbuCiXw+G4y0zv6N8Eg==" - }, "node_modules/extend": { "version": "3.0.2", "resolved": "https://registry.npmjs.org/extend/-/extend-3.0.2.tgz", @@ -3263,29 +2882,6 @@ "integrity": "sha512-DCXu6Ifhqcks7TZKY3Hxp3y6qphY5SJZmrWMDrKcERSOXWQdMhU9Ig/PYrzyw/ul9jOIyh0N4M0tbC5hodg8dw==", "dev": true }, - "node_modules/fast-xml-parser": { - "version": "4.4.1", - "resolved": "https://registry.npmjs.org/fast-xml-parser/-/fast-xml-parser-4.4.1.tgz", - "integrity": "sha512-xkjOecfnKGkSsOwtZ5Pz7Us/T6mrbPQrq0nh+aCO5V9nk5NLWmasAHumTKjiPJPWANe+kAZ84Jc8ooJkzZ88Sw==", - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/NaturalIntelligence" - }, - { - "type": "paypal", - "url": "https://paypal.me/naturalintelligence" - } - ], - "optional": true, - "peer": true, - "dependencies": { - "strnum": "^1.0.5" - }, - "bin": { - "fxparser": "src/cli/cli.js" - } - }, "node_modules/fastparse": { "version": "1.1.2", "resolved": "https://registry.npmjs.org/fastparse/-/fastparse-1.1.2.tgz", @@ -3338,14 +2934,6 @@ "url": "https://github.com/sponsors/sindresorhus" } }, - "node_modules/flat": { - "version": "5.0.2", - "resolved": "https://registry.npmjs.org/flat/-/flat-5.0.2.tgz", - "integrity": "sha512-b6suED+5/3rTpUBdG1gupIl8MPFCAMA0QXwmljLhvCUKcUvdE4gWky9zpuGCcXHOsz4J9wPGNWq6OKpmIzz3hQ==", - "bin": { - "flat": "cli.js" - } - }, "node_modules/flat-cache": { "version": "3.0.4", "resolved": "https://registry.npmjs.org/flat-cache/-/flat-cache-3.0.4.tgz", @@ -3423,14 +3011,6 @@ "node": ">= 12.20" } }, - "node_modules/formdata-node/node_modules/web-streams-polyfill": { - "version": "4.0.0-beta.3", - "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-4.0.0-beta.3.tgz", - "integrity": "sha512-QW95TCTaHmsYfHDybGMwO5IJIM93I/6vTRk+daHTWFPhwh+C8Cg7j7XyKrwrj8Ib6vYXe0ocYNrmzY4xAAN6ug==", - "engines": { - "node": ">= 14" - } - }, "node_modules/fraction.js": { "version": "4.2.0", "resolved": "https://registry.npmjs.org/fraction.js/-/fraction.js-4.2.0.tgz", @@ -3450,9 +3030,9 @@ "integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==" }, "node_modules/fsevents": { - "version": "2.3.2", - "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz", - "integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==", + "version": "2.3.3", + "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", + "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==", "hasInstallScript": true, "optional": true, "os": [ @@ -3764,7 +3344,7 @@ "version": "5.2.4", "resolved": "https://registry.npmjs.org/ignore/-/ignore-5.2.4.tgz", "integrity": "sha512-MAb38BcSbH0eHNBxn7ql2NH/kX33OkB3lZ1BNdh7ENeRChHTYsTvWrMubiIAMNS2llXEEgZ1MUOBtXChP3kaFQ==", - "devOptional": true, + "dev": true, "engines": { "node": ">= 4" } @@ -3847,11 +3427,6 @@ "tslib": "^2.4.0" } }, - "node_modules/is-any-array": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/is-any-array/-/is-any-array-2.0.1.tgz", - "integrity": "sha512-UtilS7hLRu++wb/WBAw9bNuP1Eg04Ivn1vERJck8zJthEvXCBEBpGR/33u/xLKWEQf95803oalHrVDptcAvFdQ==" - }, "node_modules/is-array-buffer": { "version": "3.0.2", "resolved": "https://registry.npmjs.org/is-array-buffer/-/is-array-buffer-3.0.2.tgz", @@ -4218,9 +3793,9 @@ } }, "node_modules/js-tiktoken": { - "version": "1.0.10", - "resolved": "https://registry.npmjs.org/js-tiktoken/-/js-tiktoken-1.0.10.tgz", - "integrity": "sha512-ZoSxbGjvGyMT13x6ACo9ebhDha/0FHdKA+OsQcMOWcm1Zs7r90Rhk5lhERLzji+3rA7EKpXCgwXcM5fF3DMpdA==", + "version": "1.0.15", + "resolved": "https://registry.npmjs.org/js-tiktoken/-/js-tiktoken-1.0.15.tgz", + "integrity": "sha512-65ruOWWXDEZHHbAo7EjOcNxOGasQKbL4Fq3jEr2xsCqSsoOo6VVSqzWQb6PRIqypFSDcma4jO90YP0w5X8qVXQ==", "dependencies": { "base64-js": "^1.5.1" } @@ -4280,25 +3855,19 @@ "peer": true }, "node_modules/langchain": { - "version": "0.1.12", - "resolved": "https://registry.npmjs.org/langchain/-/langchain-0.1.12.tgz", - "integrity": "sha512-F3WK6KJGeA+gnXIrijKy892yEGzUOpO4pEWWphUrCxrtfjXh1hFcXfj5Oh14qGvaUCmn8ezBqQMJ/LhL6z3DhQ==", - "dependencies": { - "@anthropic-ai/sdk": "^0.9.1", - "@langchain/community": "~0.0.20", - "@langchain/core": "~0.1.16", - "@langchain/openai": "~0.0.12", - "binary-extensions": "^2.2.0", - "expr-eval": "^2.0.2", - "js-tiktoken": "^1.0.7", + "version": "0.3.5", + "resolved": "https://registry.npmjs.org/langchain/-/langchain-0.3.5.tgz", + "integrity": "sha512-Gq0xC45Sq6nszS8kQG9suCrmBsuXH0INMmiF7D2TwPb6mtG35Jiq4grCk9ykpwPsarTHdty3SzUbII/FqiYSSw==", + "dependencies": { + "@langchain/openai": ">=0.1.0 <0.4.0", + "@langchain/textsplitters": ">=0.0.0 <0.2.0", + "js-tiktoken": "^1.0.12", "js-yaml": "^4.1.0", "jsonpointer": "^5.0.1", - "langchainhub": "~0.0.6", - "langsmith": "~0.0.59", - "ml-distance": "^4.0.0", + "langsmith": "^0.2.0", "openapi-types": "^12.1.3", "p-retry": "4", - "uuid": "^9.0.0", + "uuid": "^10.0.0", "yaml": "^2.2.1", "zod": "^3.22.4", "zod-to-json-schema": "^3.22.3" @@ -4307,108 +3876,44 @@ "node": ">=18" }, "peerDependencies": { - "@aws-sdk/client-s3": "^3.310.0", - "@aws-sdk/client-sagemaker-runtime": "^3.310.0", - "@aws-sdk/client-sfn": "^3.310.0", - "@aws-sdk/credential-provider-node": "^3.388.0", - "@azure/storage-blob": "^12.15.0", - "@gomomento/sdk": "^1.51.1", - "@gomomento/sdk-core": "^1.51.1", - "@gomomento/sdk-web": "^1.51.1", - "@google-ai/generativelanguage": "^0.2.1", - "@google-cloud/storage": "^6.10.1", - "@notionhq/client": "^2.2.10", - "@pinecone-database/pinecone": "*", - "@supabase/supabase-js": "^2.10.0", - "@vercel/kv": "^0.2.3", - "@xata.io/client": "^0.28.0", - "apify-client": "^2.7.1", - "assemblyai": "^4.0.0", + "@langchain/anthropic": "*", + "@langchain/aws": "*", + "@langchain/cohere": "*", + "@langchain/core": ">=0.2.21 <0.4.0", + "@langchain/google-genai": "*", + "@langchain/google-vertexai": "*", + "@langchain/groq": "*", + "@langchain/mistralai": "*", + "@langchain/ollama": "*", "axios": "*", - "cheerio": "^1.0.0-rc.12", - "chromadb": "*", - "convex": "^1.3.1", - "d3-dsv": "^2.0.0", - "epub2": "^3.0.1", - "fast-xml-parser": "^4.2.7", - "google-auth-library": "^8.9.0", - "googleapis": "^126.0.1", + "cheerio": "*", "handlebars": "^4.7.8", - "html-to-text": "^9.0.5", - "ignore": "^5.2.0", - "ioredis": "^5.3.2", - "jsdom": "*", - "mammoth": "^1.6.0", - "mongodb": "^5.2.0", - "node-llama-cpp": "*", - "notion-to-md": "^3.1.0", - "officeparser": "^4.0.4", - "pdf-parse": "1.1.1", "peggy": "^3.0.2", - "playwright": "^1.32.1", - "puppeteer": "^19.7.2", - "pyodide": "^0.24.1", - "redis": "^4.6.4", - "sonix-speech-recognition": "^2.1.1", - "srt-parser-2": "^1.2.3", - "typeorm": "^0.3.12", - "vectordb": "^0.1.4", - "weaviate-ts-client": "^1.4.0", - "web-auth-library": "^1.0.3", - "ws": "^8.14.2", - "youtube-transcript": "^1.0.6", - "youtubei.js": "^5.8.0" + "typeorm": "*" }, "peerDependenciesMeta": { - "@aws-sdk/client-s3": { - "optional": true - }, - "@aws-sdk/client-sagemaker-runtime": { - "optional": true - }, - "@aws-sdk/client-sfn": { - "optional": true - }, - "@aws-sdk/credential-provider-node": { + "@langchain/anthropic": { "optional": true }, - "@azure/storage-blob": { + "@langchain/aws": { "optional": true }, - "@gomomento/sdk": { + "@langchain/cohere": { "optional": true }, - "@gomomento/sdk-core": { + "@langchain/google-genai": { "optional": true }, - "@gomomento/sdk-web": { + "@langchain/google-vertexai": { "optional": true }, - "@google-ai/generativelanguage": { + "@langchain/groq": { "optional": true }, - "@google-cloud/storage": { + "@langchain/mistralai": { "optional": true }, - "@notionhq/client": { - "optional": true - }, - "@pinecone-database/pinecone": { - "optional": true - }, - "@supabase/supabase-js": { - "optional": true - }, - "@vercel/kv": { - "optional": true - }, - "@xata.io/client": { - "optional": true - }, - "apify-client": { - "optional": true - }, - "assemblyai": { + "@langchain/ollama": { "optional": true }, "axios": { @@ -4417,133 +3922,82 @@ "cheerio": { "optional": true }, - "chromadb": { - "optional": true - }, - "convex": { - "optional": true - }, - "d3-dsv": { - "optional": true - }, - "epub2": { - "optional": true - }, - "faiss-node": { - "optional": true - }, - "fast-xml-parser": { - "optional": true - }, - "google-auth-library": { - "optional": true - }, - "googleapis": { - "optional": true - }, "handlebars": { "optional": true }, - "html-to-text": { - "optional": true - }, - "ignore": { - "optional": true - }, - "ioredis": { - "optional": true - }, - "jsdom": { - "optional": true - }, - "mammoth": { - "optional": true - }, - "mongodb": { - "optional": true - }, - "node-llama-cpp": { - "optional": true - }, - "notion-to-md": { - "optional": true - }, - "officeparser": { - "optional": true - }, - "pdf-parse": { - "optional": true - }, "peggy": { "optional": true }, - "playwright": { - "optional": true - }, - "puppeteer": { - "optional": true - }, - "pyodide": { - "optional": true - }, - "redis": { - "optional": true - }, - "sonix-speech-recognition": { - "optional": true - }, - "srt-parser-2": { - "optional": true - }, "typeorm": { "optional": true - }, - "vectordb": { - "optional": true - }, - "weaviate-ts-client": { - "optional": true - }, - "web-auth-library": { - "optional": true - }, - "ws": { - "optional": true - }, - "youtube-transcript": { - "optional": true - }, - "youtubei.js": { - "optional": true } } }, - "node_modules/langchainhub": { - "version": "0.0.6", - "resolved": "https://registry.npmjs.org/langchainhub/-/langchainhub-0.0.6.tgz", - "integrity": "sha512-SW6105T+YP1cTe0yMf//7kyshCgvCTyFBMTgH2H3s9rTAR4e+78DA/BBrUL/Mt4Q5eMWui7iGuAYb3pgGsdQ9w==" + "node_modules/langchain/node_modules/@langchain/textsplitters": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/@langchain/textsplitters/-/textsplitters-0.1.0.tgz", + "integrity": "sha512-djI4uw9rlkAb5iMhtLED+xJebDdAG935AdP4eRTB02R7OB/act55Bj9wsskhZsvuyQRpO4O1wQOp85s6T6GWmw==", + "dependencies": { + "js-tiktoken": "^1.0.12" + }, + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "@langchain/core": ">=0.2.21 <0.4.0" + } + }, + "node_modules/langchain/node_modules/uuid": { + "version": "10.0.0", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-10.0.0.tgz", + "integrity": "sha512-8XkAphELsDnEGrDxUOHB3RGvXz6TeuYSGEZBOjtTtPm2lwhGBjLgOzLHB63IUWfBpNucQjND6d3AOudO+H3RWQ==", + "funding": [ + "https://github.com/sponsors/broofa", + "https://github.com/sponsors/ctavan" + ], + "bin": { + "uuid": "dist/bin/uuid" + } }, "node_modules/langsmith": { - "version": "0.0.66", - "resolved": "https://registry.npmjs.org/langsmith/-/langsmith-0.0.66.tgz", - "integrity": "sha512-yextqrwQiN+2Y0WjHEjQmwS9V6886RIuUG8esibiSh6BTHrtt1WMCAPKJIy8E1+HQvVY7IzsuJ4vzpkKi0wcTQ==", + "version": "0.2.3", + "resolved": "https://registry.npmjs.org/langsmith/-/langsmith-0.2.3.tgz", + "integrity": "sha512-SPMYPVqR9kwXZVmJ2PXC61HeBnXIFHrjfjDxQ14H0+n5p4gqjLzgSHIQyxBlFeWQUQzArJxe65Ap+s+Xo1cZog==", + "license": "MIT", "dependencies": { - "@types/uuid": "^9.0.1", + "@types/uuid": "^10.0.0", "commander": "^10.0.1", "p-queue": "^6.6.2", "p-retry": "4", - "uuid": "^9.0.0" + "semver": "^7.6.3", + "uuid": "^10.0.0" }, - "bin": { - "langsmith": "dist/cli/main.cjs" + "peerDependencies": { + "openai": "*" + }, + "peerDependenciesMeta": { + "openai": { + "optional": true + } } }, - "node_modules/langsmith/node_modules/commander": { - "version": "10.0.1", - "resolved": "https://registry.npmjs.org/commander/-/commander-10.0.1.tgz", - "integrity": "sha512-y4Mg2tXshplEbSGzx7amzPwKKOCGuoSRP/CjEdwwk0FOGlUbq6lKuoyDZTNZkmxHdJtp54hdfY/JUrdL7Xfdug==", - "engines": { - "node": ">=14" + "node_modules/langsmith/node_modules/@types/uuid": { + "version": "10.0.0", + "resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-10.0.0.tgz", + "integrity": "sha512-7gqG38EyHgyP1S+7+xomFtL+ZNHcKv6DwNaCZmJmo1vgMugyF3TCnXVg4t1uk89mLNwnLtnY3TpOpCOyp1/xHQ==", + "license": "MIT" + }, + "node_modules/langsmith/node_modules/uuid": { + "version": "10.0.0", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-10.0.0.tgz", + "integrity": "sha512-8XkAphELsDnEGrDxUOHB3RGvXz6TeuYSGEZBOjtTtPm2lwhGBjLgOzLHB63IUWfBpNucQjND6d3AOudO+H3RWQ==", + "funding": [ + "https://github.com/sponsors/broofa", + "https://github.com/sponsors/ctavan" + ], + "license": "MIT", + "bin": { + "uuid": "dist/bin/uuid" } }, "node_modules/levn": { @@ -4616,18 +4070,6 @@ "loose-envify": "cli.js" } }, - "node_modules/lru-cache": { - "version": "6.0.0", - "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-6.0.0.tgz", - "integrity": "sha512-Jo6dJ04CmSjuznwJSS3pUeWmd/H0ffTlkXXgwZi+eq1UCmqQwCh+eLsYOYCwY991i2Fah4h1BEMCx4qThGbsiA==", - "dev": true, - "dependencies": { - "yallist": "^4.0.0" - }, - "engines": { - "node": ">=10" - } - }, "node_modules/luxon": { "version": "3.4.0", "resolved": "https://registry.npmjs.org/luxon/-/luxon-3.4.0.tgz", @@ -4636,21 +4078,6 @@ "node": ">=12" } }, - "node_modules/md5": { - "version": "2.3.0", - "resolved": "https://registry.npmjs.org/md5/-/md5-2.3.0.tgz", - "integrity": "sha512-T1GITYmFaKuO91vxyoQMFETst+O71VUPEU3ze5GNzDm0OWdP8v1ziTaAEPUr/3kLsY3Sftgz242A1SetQiDL7g==", - "dependencies": { - "charenc": "0.0.2", - "crypt": "0.0.2", - "is-buffer": "~1.1.6" - } - }, - "node_modules/md5/node_modules/is-buffer": { - "version": "1.1.6", - "resolved": "https://registry.npmjs.org/is-buffer/-/is-buffer-1.1.6.tgz", - "integrity": "sha512-NcdALwpXkTm5Zvvbk7owOUSvVvBKDgKP5/ewfXEznmQFfs4ZRmanOeKBTjRVjka3QFoN6XJ+9F3USqfHqTaU5w==" - }, "node_modules/mdast-util-definitions": { "version": "5.1.2", "resolved": "https://registry.npmjs.org/mdast-util-definitions/-/mdast-util-definitions-5.1.2.tgz", @@ -5218,46 +4645,6 @@ "url": "https://github.com/sponsors/ljharb" } }, - "node_modules/ml-array-mean": { - "version": "1.1.6", - "resolved": "https://registry.npmjs.org/ml-array-mean/-/ml-array-mean-1.1.6.tgz", - "integrity": "sha512-MIdf7Zc8HznwIisyiJGRH9tRigg3Yf4FldW8DxKxpCCv/g5CafTw0RRu51nojVEOXuCQC7DRVVu5c7XXO/5joQ==", - "dependencies": { - "ml-array-sum": "^1.1.6" - } - }, - "node_modules/ml-array-sum": { - "version": "1.1.6", - "resolved": "https://registry.npmjs.org/ml-array-sum/-/ml-array-sum-1.1.6.tgz", - "integrity": "sha512-29mAh2GwH7ZmiRnup4UyibQZB9+ZLyMShvt4cH4eTK+cL2oEMIZFnSyB3SS8MlsTh6q/w/yh48KmqLxmovN4Dw==", - "dependencies": { - "is-any-array": "^2.0.0" - } - }, - "node_modules/ml-distance": { - "version": "4.0.1", - "resolved": "https://registry.npmjs.org/ml-distance/-/ml-distance-4.0.1.tgz", - "integrity": "sha512-feZ5ziXs01zhyFUUUeZV5hwc0f5JW0Sh0ckU1koZe/wdVkJdGxcP06KNQuF0WBTj8FttQUzcvQcpcrOp/XrlEw==", - "dependencies": { - "ml-array-mean": "^1.1.6", - "ml-distance-euclidean": "^2.0.0", - "ml-tree-similarity": "^1.0.0" - } - }, - "node_modules/ml-distance-euclidean": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/ml-distance-euclidean/-/ml-distance-euclidean-2.0.0.tgz", - "integrity": "sha512-yC9/2o8QF0A3m/0IXqCTXCzz2pNEzvmcE/9HFKOZGnTjatvBbsn4lWYJkxENkA4Ug2fnYl7PXQxnPi21sgMy/Q==" - }, - "node_modules/ml-tree-similarity": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/ml-tree-similarity/-/ml-tree-similarity-1.0.0.tgz", - "integrity": "sha512-XJUyYqjSuUQkNQHMscr6tcjldsOoAekxADTplt40QKfwW6nd++1wHWV9AArl0Zvw/TIHgNaZZNvr8QGvE8wLRg==", - "dependencies": { - "binary-search": "^1.3.5", - "num-sort": "^2.0.0" - } - }, "node_modules/mnth": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/mnth/-/mnth-2.0.0.tgz", @@ -5282,6 +4669,15 @@ "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==" }, + "node_modules/mustache": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/mustache/-/mustache-4.2.0.tgz", + "integrity": "sha512-71ippSywq5Yb7/tVYyGbkBggbU8H3u5Rz56fH60jGFgr8uHwxs+aSKeqmluIVzM0m0kB7xQjKS6qPfd0b2ZoqQ==", + "license": "MIT", + "bin": { + "mustache": "bin/mustache" + } + }, "node_modules/mz": { "version": "2.7.0", "resolved": "https://registry.npmjs.org/mz/-/mz-2.7.0.tgz", @@ -5293,9 +4689,9 @@ } }, "node_modules/nanoid": { - "version": "3.3.6", - "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.6.tgz", - "integrity": "sha512-BGcqMMJuToF7i1rt+2PWSNVnWIkGCU78jBG3RxO/bZlnZPK2Cmi2QaffxGO/2RvWi9sL+FAiRiXMgsyxQ1DIDA==", + "version": "3.3.7", + "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.7.tgz", + "integrity": "sha512-eSRppjcPIatRIMC1U6UngP8XFcz8MQWGQdt1MTBQ7NaAmvXDfvNxbvWV3x2y6CdEUciCSsDHDQZbhYaB8QEo2g==", "funding": [ { "type": "github", @@ -5334,9 +4730,9 @@ } }, "node_modules/node-fetch": { - "version": "2.6.12", - "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.12.tgz", - "integrity": "sha512-C/fGU2E8ToujUivIO0H+tpQ6HWo4eEmchoPIoXtxCrVghxdKq+QOHqEZW7tuP3KlV3bC8FRMO5nMCC7Zm1VP6g==", + "version": "2.7.0", + "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz", + "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==", "dependencies": { "whatwg-url": "^5.0.0" }, @@ -5402,17 +4798,6 @@ "url": "https://github.com/sponsors/sindresorhus" } }, - "node_modules/num-sort": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/num-sort/-/num-sort-2.1.0.tgz", - "integrity": "sha512-1MQz1Ed8z2yckoBeSfkQHHO9K1yDRxxtotKSJ9yvcTUUxSvfvzEq5GwBrjjHEpMlq/k5gvXdmJ1SbYxWtpNoVg==", - "engines": { - "node": ">=8" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, "node_modules/object-assign": { "version": "4.1.1", "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", @@ -5537,28 +4922,34 @@ } }, "node_modules/openai": { - "version": "4.26.0", - "resolved": "https://registry.npmjs.org/openai/-/openai-4.26.0.tgz", - "integrity": "sha512-HPC7tgYdeP38F3uHA5WgnoXZyGbAp9jgcIo23p6It+q/07u4C+NZ8xHKlMShsPbDDmFRpPsa3vdbXYpbhJH3eg==", + "version": "4.68.4", + "resolved": "https://registry.npmjs.org/openai/-/openai-4.68.4.tgz", + "integrity": "sha512-LRinV8iU9VQplkr25oZlyrsYGPGasIwYN8KFMAAFTHHLHjHhejtJ5BALuLFrkGzY4wfbKhOhuT+7lcHZ+F3iEA==", "dependencies": { "@types/node": "^18.11.18", "@types/node-fetch": "^2.6.4", "abort-controller": "^3.0.0", "agentkeepalive": "^4.2.1", - "digest-fetch": "^1.3.0", "form-data-encoder": "1.7.2", "formdata-node": "^4.3.2", - "node-fetch": "^2.6.7", - "web-streams-polyfill": "^3.2.1" + "node-fetch": "^2.6.7" }, "bin": { "openai": "bin/cli" + }, + "peerDependencies": { + "zod": "^3.23.8" + }, + "peerDependenciesMeta": { + "zod": { + "optional": true + } } }, "node_modules/openai/node_modules/@types/node": { - "version": "18.19.14", - "resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.14.tgz", - "integrity": "sha512-EnQ4Us2rmOS64nHDWr0XqAD8DsO6f3XR6lf9UIIrZQpUzPVdN/oPuEzfDWNHSyXLvoGgjuEm/sPwFGSSs35Wtg==", + "version": "18.19.61", + "resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.61.tgz", + "integrity": "sha512-z8fH66NcVkDzBItOao+Nyh0fiy7CYdxIyxnNCcZ60aY0I+EA/y4TSi/S/W9i8DIQvwVo7a0pgzAxmDeNnqrpkw==", "dependencies": { "undici-types": "~5.26.4" } @@ -5715,9 +5106,9 @@ } }, "node_modules/picocolors": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.0.0.tgz", - "integrity": "sha512-1fygroTLlHu66zi26VoTDv8yRgm0Fccecssto+MhsZ0D/DGW2sm8E8AjW7NU5VVTRt5GxbeZ5qBuJr+HyLYkjQ==" + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz", + "integrity": "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==" }, "node_modules/picomatch": { "version": "2.3.1", @@ -5739,9 +5130,9 @@ } }, "node_modules/postcss": { - "version": "8.4.31", - "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.4.31.tgz", - "integrity": "sha512-PS08Iboia9mts/2ygV3eLpY5ghnUcfLV/EXTOW1E2qYxJKGGBUtNjN76FYHnMs36RmARn41bC0AZmn+rR0OVpQ==", + "version": "8.4.47", + "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.4.47.tgz", + "integrity": "sha512-56rxCq7G/XfB4EkXq9Egn5GCqugWvDFjafDOThIdMBsI15iqPqR5r15TfSr1YPYeEI19YeaXMCbY6u88Y76GLQ==", "funding": [ { "type": "opencollective", @@ -5757,9 +5148,9 @@ } ], "dependencies": { - "nanoid": "^3.3.6", - "picocolors": "^1.0.0", - "source-map-js": "^1.0.2" + "nanoid": "^3.3.7", + "picocolors": "^1.1.0", + "source-map-js": "^1.2.1" }, "engines": { "node": "^10 || ^12 || >=14" @@ -6519,13 +5910,9 @@ } }, "node_modules/semver": { - "version": "7.5.4", - "resolved": "https://registry.npmjs.org/semver/-/semver-7.5.4.tgz", - "integrity": "sha512-1bCSESV6Pv+i21Hvpxp3Dx+pSD8lIPt8uVjRrxAUt/nbswYc+tK6Y2btiULjd4+fnq15PX+nqQDC7Oft7WkwcA==", - "dev": true, - "dependencies": { - "lru-cache": "^6.0.0" - }, + "version": "7.6.3", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.6.3.tgz", + "integrity": "sha512-oVekP1cKtI+CTDvHWYFUcMtsK/00wmAEfyqKfNdARm8u1wNVhSgaX7A8d4UuIlUI5e84iEwOhs7ZPYRmzU9U6A==", "bin": { "semver": "bin/semver.js" }, @@ -6585,9 +5972,9 @@ } }, "node_modules/source-map-js": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.0.2.tgz", - "integrity": "sha512-R0XvVJ9WusLiqTCEiGCmICCMplcCkIwwR11mOSD9CR5u+IXYdiseeEuXCVAjS54zqwkLcPNnmU4OeJ6tUrWhDw==", + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz", + "integrity": "sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA==", "engines": { "node": ">=0.10.0" } @@ -6714,13 +6101,6 @@ "url": "https://github.com/sponsors/sindresorhus" } }, - "node_modules/strnum": { - "version": "1.0.5", - "resolved": "https://registry.npmjs.org/strnum/-/strnum-1.0.5.tgz", - "integrity": "sha512-J8bbNyKKXl5qYcR36TIO8W3mVGVHrmmxsd5PAItGkmyzwJvybiw2IVq5nqd0i4LSNSkB/sx9VHllbfFdr9k1JA==", - "optional": true, - "peer": true - }, "node_modules/style-to-object": { "version": "0.4.1", "resolved": "https://registry.npmjs.org/style-to-object/-/style-to-object-0.4.1.tgz", @@ -7304,6 +6684,7 @@ "version": "9.0.0", "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.0.tgz", "integrity": "sha512-MXcSTerfPa4uqyzStbRoTgt5XIe3x5+42+q1sDuy3R5MDk66URdLMOZe5aPX/SQd+kuYAh0FdP/pO28IkQyTeg==", + "dev": true, "bin": { "uuid": "dist/bin/uuid" } @@ -7418,11 +6799,11 @@ } }, "node_modules/web-streams-polyfill": { - "version": "3.3.2", - "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-3.3.2.tgz", - "integrity": "sha512-3pRGuxRF5gpuZc0W+EpwQRmCD7gRqcDOMt688KmdlDAgAyaB1XlN0zq2njfDNm44XVdIouE7pZ6GzbdyH47uIQ==", + "version": "4.0.0-beta.3", + "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-4.0.0-beta.3.tgz", + "integrity": "sha512-QW95TCTaHmsYfHDybGMwO5IJIM93I/6vTRk+daHTWFPhwh+C8Cg7j7XyKrwrj8Ib6vYXe0ocYNrmzY4xAAN6ug==", "engines": { - "node": ">= 8" + "node": ">= 14" } }, "node_modules/webidl-conversions": { @@ -7499,34 +6880,6 @@ "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==" }, - "node_modules/ws": { - "version": "8.17.1", - "resolved": "https://registry.npmjs.org/ws/-/ws-8.17.1.tgz", - "integrity": "sha512-6XQFvXTkbfUOZOKKILFG1PDK2NDQs4azKQl26T0YS5CxqWLgXajbPZ+h4gZekJyRqFU8pvnbAbbs/3TgRPy+GQ==", - "optional": true, - "peer": true, - "engines": { - "node": ">=10.0.0" - }, - "peerDependencies": { - "bufferutil": "^4.0.1", - "utf-8-validate": ">=5.0.2" - }, - "peerDependenciesMeta": { - "bufferutil": { - "optional": true - }, - "utf-8-validate": { - "optional": true - } - } - }, - "node_modules/yallist": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz", - "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==", - "dev": true - }, "node_modules/yaml": { "version": "2.3.1", "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.3.1.tgz", @@ -7548,9 +6901,9 @@ } }, "node_modules/zod": { - "version": "3.22.4", - "resolved": "https://registry.npmjs.org/zod/-/zod-3.22.4.tgz", - "integrity": "sha512-iC+8Io04lddc+mVqQ9AZ7OQ2MrUKGN+oIQyq1vemgt46jwCwLfhq7/pwnBnNXXXZb8VTVLKwp9EDkx+ryxIWmg==", + "version": "3.23.8", + "resolved": "https://registry.npmjs.org/zod/-/zod-3.23.8.tgz", + "integrity": "sha512-XBx9AXhXktjUqnepgTiE5flcKIYWi/rme0Eaj+5Y0lftuGBq+jyRu/md4WnuxqgP1ubdpNCsYEYPxrzVHD8d6g==", "funding": { "url": "https://github.com/sponsors/colinhacks" } diff --git a/lib/user-interface/react/package.json b/lib/user-interface/react/package.json index adeaa260..61069de6 100644 --- a/lib/user-interface/react/package.json +++ b/lib/user-interface/react/package.json @@ -1,7 +1,7 @@ { "name": "lisa-web", "private": true, - "version": "3.1.0", + "version": "3.2.0", "type": "module", "scripts": { "dev": "vite", @@ -15,12 +15,13 @@ "@cloudscape-design/component-toolkit": "^1.0.0-beta.65", "@cloudscape-design/components": "^3.0.638", "@cloudscape-design/global-styles": "^1.0.12", - "@langchain/core": "^0.1.22", + "@langchain/core": "^0.3.5", + "@langchain/openai": "^0.3.11", "@microsoft/fetch-event-source": "^2.0.1", "@reduxjs/toolkit": "^1.9.5", "axios": "^1.7.4", "git-repo-info": "^2.1.1", - "langchain": "^0.1.12", + "langchain": "^0.3.5", "lodash": "^4.17.21", "luxon": "^3.4.0", "react": "^18.2.0", diff --git a/lib/user-interface/react/src/App.tsx b/lib/user-interface/react/src/App.tsx index 3259ced2..f0e6e796 100644 --- a/lib/user-interface/react/src/App.tsx +++ b/lib/user-interface/react/src/App.tsx @@ -30,6 +30,8 @@ import { selectCurrentUserIsAdmin } from './shared/reducers/user.reducer'; import ModelManagement from './pages/ModelManagement'; import NotificationBanner from './shared/notification/notification'; import ConfirmationModal, { ConfirmationModalProps } from './shared/modal/confirmation-modal'; +import Configuration from './pages/Configuration'; +import { useGetConfigurationQuery } from './shared/reducers/configuration.reducer'; const PrivateRoute = ({ children }) => { const auth = useAuth(); @@ -58,6 +60,7 @@ function App () { const [showTools, setShowTools] = useState(false); const [tools, setTools] = useState(null); const confirmationModal: ConfirmationModalProps = useAppSelector((state) => state.modal.confirmationModal); + const { data: config } = useGetConfigurationQuery('global', {refetchOnMountOrArgChange: 5}); useEffect(() => { if (tools) { @@ -70,10 +73,10 @@ function App () { const baseHref = document?.querySelector('base')?.getAttribute('href')?.replace(/\/$/, ''); return ( - {window.env.SYSTEM_BANNER?.text && } + {config && config[0]?.configuration.systemBanner.isEnabled && }
@@ -113,11 +116,19 @@ function App () { } /> + + + + } + /> } /> {confirmationModal && } - {window.env.SYSTEM_BANNER?.text && } + {config && config[0]?.configuration.systemBanner.isEnabled && }
); } diff --git a/lib/user-interface/react/src/components/Topbar.tsx b/lib/user-interface/react/src/components/Topbar.tsx index 2759ca14..efd95a68 100644 --- a/lib/user-interface/react/src/components/Topbar.tsx +++ b/lib/user-interface/react/src/components/Topbar.tsx @@ -68,6 +68,16 @@ function Topbar () { utilities={[ ...(isUserAdmin ? [ + { + type: 'button', + variant: 'link', + text: 'Configuration', + disableUtilityCollapse: false, + external: false, + onClick: () => { + navigate('/configuration'); + }, + }, { type: 'button', variant: 'link', diff --git a/lib/user-interface/react/src/components/chatbot/Chat.tsx b/lib/user-interface/react/src/components/chatbot/Chat.tsx index 2406df8e..548efb24 100644 --- a/lib/user-interface/react/src/components/chatbot/Chat.tsx +++ b/lib/user-interface/react/src/components/chatbot/Chat.tsx @@ -62,8 +62,10 @@ import { ContextUploadModal, RagUploadModal } from './FileUploadModals'; import { ChatOpenAI } from '@langchain/openai'; import { useGetAllModelsQuery } from '../../shared/reducers/model-management.reducer'; import { IModel, ModelStatus, ModelType } from '../../shared/model/model-management.model'; +import { useGetConfigurationQuery } from '../../shared/reducers/configuration.reducer'; export default function Chat ({ sessionId }) { + const { data: config } = useGetConfigurationQuery('global', {refetchOnMountOrArgChange: 5}); const [userPrompt, setUserPrompt] = useState(''); const [humanPrefix, setHumanPrefix] = useState('User'); const [aiPrefix, setAiPrefix] = useState('Assistant'); @@ -595,15 +597,15 @@ export default function Chat ({ sessionId }) { }`} actions={ - + {config && config[0]?.configuration.enabledComponents.uploadContextDocs && - - {window.env.RAG_ENABLED && ( + } + {window.env.RAG_ENABLED && config && config[0]?.configuration.enabledComponents.uploadRagDocs && ( - - - Chat history buffer size: - - - setRagTopK(parseInt(detail.selectedOption.value))} - options={oneThroughTenOptions} - /> - - - + {config && (config[0]?.configuration.enabledComponents.viewMetaData || + config[0]?.configuration.enabledComponents.editKwargs || + config[0]?.configuration.enabledComponents.editPromptTemplate || + config[0]?.configuration.enabledComponents.editChatHistoryBuffer || + config[0]?.configuration.enabledComponents.editNumOfRagDocument) && + + + {config && config[0]?.configuration.enabledComponents.viewMetaData && setShowMetadata(detail.checked)} checked={showMetadata}> + Show metadata + } + {config && config[0]?.configuration.enabledComponents.editKwargs && } + {config && config[0]?.configuration.enabledComponents.editPromptTemplate && } + {config && config[0]?.configuration.enabledComponents.editChatHistoryBuffer && <> + Chat history buffer size: + + + setRagTopK(parseInt(detail.selectedOption.value))} + options={oneThroughTenOptions} + /> + } + + } diff --git a/lib/user-interface/react/src/components/chatbot/Sessions.tsx b/lib/user-interface/react/src/components/chatbot/Sessions.tsx index ca568f43..648a5787 100644 --- a/lib/user-interface/react/src/components/chatbot/Sessions.tsx +++ b/lib/user-interface/react/src/components/chatbot/Sessions.tsx @@ -28,8 +28,10 @@ import { useCollection } from '@cloudscape-design/collection-hooks'; import { v4 as uuidv4 } from 'uuid'; import { LisaChatSession } from '../types'; import { listSessions, deleteSession, deleteUserSessions } from '../utils'; +import { useGetConfigurationQuery } from '../../shared/reducers/configuration.reducer'; export function Sessions () { + const { data: config } = useGetConfigurationQuery('global', {refetchOnMountOrArgChange: 5}); const auth = useAuth(); const [sessions, setSessions] = useState([]); const [isLoading, setIsLoading] = useState(true); @@ -115,9 +117,10 @@ export function Sessions () { + {config && config[0]?.configuration.enabledComponents.deleteSessionHistory && + } ), minWidth: 170, @@ -139,6 +142,7 @@ export function Sessions () { > Refresh + {config && config[0].configuration.enabledComponents.deleteSessionHistory && + } } diff --git a/lib/user-interface/react/src/components/configuration/ActivatedUserComponents.tsx b/lib/user-interface/react/src/components/configuration/ActivatedUserComponents.tsx new file mode 100644 index 00000000..eeac84b5 --- /dev/null +++ b/lib/user-interface/react/src/components/configuration/ActivatedUserComponents.tsx @@ -0,0 +1,79 @@ +/** + Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"). + You may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ + +import { + Container, + Grid, + Header, + SpaceBetween, + Toggle, + Box +} from '@cloudscape-design/components'; +import React from 'react'; +import {SetFieldsFunction} from '../../shared/validation'; + +const configurableOperations = { + deleteSessionHistory: 'Delete Session History', + viewMetaData: 'View Chat Meta Data', + editKwargs: 'Edit Kwargs', + editPromptTemplate: 'Update Prompt Template', + editNumOfRagDocument: 'Edit Number of RAG documents', + editChatHistoryBuffer: 'Edit Chat History Buffer', + uploadRagDocs: 'Upload documents to RAG', + uploadContextDocs: 'Upload documents to context', +}; + +export type ActivatedComponentConfigurationProps = { + setFields: SetFieldsFunction; + enabledComponents: {[key: string]: boolean}; +}; + +export function ActivatedUserComponents (props: ActivatedComponentConfigurationProps) { + return ( + + Activated Chat UI Components + + }> + + ({colspan: 3}))}> + {Object.keys(configurableOperations).map((operation) => { + return ( + + + { + const updatedField = {}; + updatedField[`enabledComponents.${operation}`] = detail.checked; + props.setFields(updatedField); + }} + checked={props.enabledComponents[operation]} + data-cy={`Toggle-${operation}`} + > + + +

{configurableOperations[operation]}

+
+ ); + })} +
+
+
+ ); +} + +export default ActivatedUserComponents; diff --git a/lib/user-interface/react/src/components/configuration/ConfigurationComponent.tsx b/lib/user-interface/react/src/components/configuration/ConfigurationComponent.tsx new file mode 100644 index 00000000..b805c149 --- /dev/null +++ b/lib/user-interface/react/src/components/configuration/ConfigurationComponent.tsx @@ -0,0 +1,165 @@ +/** + Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"). + You may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ + +import _ from 'lodash'; +import React, { ReactElement, useEffect, useMemo } from 'react'; +import ActivatedUserComponents from './ActivatedUserComponents'; +import SystemBannerConfiguration from './SystemBannerConfiguration'; +import { scrollToInvalid, useValidationReducer } from '../../shared/validation'; +import { IConfiguration, SystemConfiguration, SystemConfigurationSchema } from '../../shared/model/configuration.model'; +import SpaceBetween from '@cloudscape-design/components/space-between'; +import { Button, Header } from '@cloudscape-design/components'; +import { useGetConfigurationQuery, useUpdateConfigurationMutation } from '../../shared/reducers/configuration.reducer'; +import { useAppDispatch, useAppSelector } from '../../config/store'; +import { selectCurrentUsername } from '../../shared/reducers/user.reducer'; +import { getJsonDifference } from '../../shared/util/utils'; +import { setConfirmationModal } from '../../shared/reducers/modal.reducer'; +import { useNotificationService } from '../../shared/util/hooks'; + +export type ConfigState = { + validateAll: boolean; + form: SystemConfiguration; + touched: any; + formSubmitting: boolean; +}; + +export function ConfigurationComponent () : ReactElement { + const dispatch = useAppDispatch(); + const notificationService = useNotificationService(dispatch); + const { data: config, isFetching: isFetchingConfig } = useGetConfigurationQuery('global', {refetchOnMountOrArgChange: true}); + const [ + updateConfigMutation, + { isSuccess: isUpdateSuccess, isError: isUpdateError, error: updateError, isLoading: isUpdating, reset: resetUpdate }, + ] = useUpdateConfigurationMutation(); + const initialForm = SystemConfigurationSchema.parse({}); + const currentUsername = useAppSelector(selectCurrentUsername); + const { state, setState, setFields, touchFields, errors, isValid } = useValidationReducer(SystemConfigurationSchema, { + validateAll: false as boolean, + touched: {}, + formSubmitting: false as boolean, + form: { + ...initialForm + }, + } as ConfigState); + + /** + * Converts a JSON object into an outline structure represented as React nodes. + * + * @param {object} [json={}] - The JSON object to be converted. + * @returns {React.ReactNode[]} - An array of React nodes representing the outline structure. + */ + function jsonToOutline (json = {}) { + const output: React.ReactNode[] = []; + + for (const key in json) { + const value = json[key]; + output.push((
  • {_.startCase(key)}{_.isPlainObject(value) ? '' : `: ${value}`}

  • )); + + if (_.isPlainObject(value)) { + const recursiveJson = jsonToOutline(value); // recursively call + output.push((recursiveJson)); + } + } + return
      {output}
    ; + } + + const changesDiff = useMemo(() => { + return getJsonDifference(config && config[0] ? config[0].configuration : initialForm, state.form); + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [initialForm, state.form]); + + useEffect(() => { + if (!isFetchingConfig && config != null) { + setState({ + ...state, + form: { + ...config[0]?.configuration + } + }); + } + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [config, isFetchingConfig]); + + useEffect(() => { + if (!isUpdating && isUpdateSuccess) { + notificationService.generateNotification('Successfully updated configuration', 'success'); + resetUpdate(); + } else if (!isUpdating && isUpdateError) { + notificationService.generateNotification(`Error updating config: ${updateError.data?.message ?? updateError.data}`, 'error'); + resetUpdate(); + } + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [isUpdateSuccess, isUpdating, isUpdateError, updateError]); + + function handleSubmit () { + if (isValid && !_.isEmpty(changesDiff)) { + const toSubmit: IConfiguration = { + configuration: state.form, + configScope: 'global', + versionId: Number(config[0]?.versionId) + 1, + changedBy: currentUsername ?? 'Admin', + changeReason: `Changes to: ${Object.keys(changesDiff)}` + }; + dispatch( + setConfirmationModal({ + action: 'Update', + resourceName: 'Configuration', + onConfirm: () => updateConfigMutation(toSubmit), + description: _.isEmpty(changesDiff) ?

    No changes detected

    : jsonToOutline(changesDiff), + })); + } + } + + return ( + +
    + LISA App Configuration +
    + + + + + +
    + ); +} + +export default ConfigurationComponent; diff --git a/lib/user-interface/react/src/components/configuration/SystemBannerConfiguration.tsx b/lib/user-interface/react/src/components/configuration/SystemBannerConfiguration.tsx new file mode 100644 index 00000000..f5507c6a --- /dev/null +++ b/lib/user-interface/react/src/components/configuration/SystemBannerConfiguration.tsx @@ -0,0 +1,123 @@ +/** + Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"). + You may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ + +import { + Box, + Container, + FormField, + Grid, + Header, + Input, + SpaceBetween, + Toggle +} from '@cloudscape-design/components'; +import React from 'react'; +import { SetFieldsFunction, TouchFieldsFunction } from '../../shared/validation'; + +export type SystemBannerConfigurationProps = { + setFields: SetFieldsFunction; + textColor: string; + backgroundColor: string; + text: string; + isEnabled: boolean; + touchFields: TouchFieldsFunction; + errors: any; +}; + +export function SystemBannerConfiguration (props: SystemBannerConfigurationProps) { + return ( + + System Banner + + }> + + + + + { + props.setFields({'systemBanner.isEnabled': detail.checked}); + }} + checked={props.isEnabled!} + > + + +

    Activate System Banner

    +
    + + + + + props.setFields({'systemBanner.textColor': event.target.value}) + } + value={props.textColor} + disabled={!props.isEnabled} + style={{ + border: '2px solid #7F8897', + borderRadius: '6px', + padding: '3px' + }} + /> + +

    Text Color

    +
    +
    + + + + + props.setFields({'systemBanner.backgroundColor': event.target.value}) + } + value={props.backgroundColor} + disabled={!props.isEnabled} + style={{ + border: '2px solid #7F8897', + borderRadius: '6px', + padding: '3px' + }} + /> + +

    Background Color

    +
    +
    +
    + + { + props.setFields({'systemBanner.text': detail.value}); + }} + onBlur={() => props.touchFields(['systemBanner.text'])} + value={props.text} + placeholder='Enter system banner text' + disabled={!props.isEnabled} + /> + +
    +
    + ); +} + +export default SystemBannerConfiguration; diff --git a/lib/user-interface/react/src/components/model-management/create-model/AutoScalingConfig.tsx b/lib/user-interface/react/src/components/model-management/create-model/AutoScalingConfig.tsx index c8968cd3..cfcc4f3f 100644 --- a/lib/user-interface/react/src/components/model-management/create-model/AutoScalingConfig.tsx +++ b/lib/user-interface/react/src/components/model-management/create-model/AutoScalingConfig.tsx @@ -35,6 +35,14 @@ export function AutoScalingConfig (props: AutoScalingConfigProps) : ReactElement
    Auto Scaling Capacity
    } > + + + props.touchFields(['autoScalingConfig.blockDeviceVolumeSize'])} disabled={props.isEdit} onChange={({ detail }) => { + props.setFields({ 'autoScalingConfig.blockDeviceVolumeSize': Number(detail.value) }); + }}/> + GBs + + props.touchFields(['autoScalingConfig.minCapacity'])} onChange={({ detail }) => { diff --git a/lib/user-interface/react/src/components/model-management/create-model/BaseModelConfig.tsx b/lib/user-interface/react/src/components/model-management/create-model/BaseModelConfig.tsx index 2d80b425..54420b5b 100644 --- a/lib/user-interface/react/src/components/model-management/create-model/BaseModelConfig.tsx +++ b/lib/user-interface/react/src/components/model-management/create-model/BaseModelConfig.tsx @@ -22,12 +22,15 @@ import Toggle from '@cloudscape-design/components/toggle'; import Select from '@cloudscape-design/components/select'; import { IModelRequest, InferenceContainer, ModelType } from '../../../shared/model/model-management.model'; import { Grid, SpaceBetween } from '@cloudscape-design/components'; +import { useGetInstancesQuery } from '../../../shared/reducers/model-management.reducer'; export type BaseModelConfigCustomProps = { isEdit: boolean }; export function BaseModelConfig (props: FormProps & BaseModelConfigCustomProps) : ReactElement { + const {data: instances, isLoading: isLoadingInstances} = useGetInstancesQuery(); + return ( @@ -68,9 +71,19 @@ export function BaseModelConfig (props: FormProps & BaseModelConf /> - props.touchFields(['instanceType'])} onChange={({ detail }) => { - props.setFields({ 'instanceType': detail.value }); - }} disabled={props.isEdit} placeholder='g5.xlarge'/> + ( schema: z.AnyZodObject | z.ZodEff export function CreateModelModal (props: CreateModelModalProps) : ReactElement { const [ createModelMutation, - { isSuccess: isCreateSuccess, isError: isCreateError, error: createError, isLoading: isCreating }, + { isSuccess: isCreateSuccess, isError: isCreateError, error: createError, isLoading: isCreating, reset: resetCreate }, ] = useCreateModelMutation(); const [ updateModelMutation, - { isSuccess: isUpdateSuccess, isError: isUpdateError, error: updateError, isLoading: isUpdating }, + { isSuccess: isUpdateSuccess, isError: isUpdateError, error: updateError, isLoading: isUpdating, reset: resetUpdate }, ] = useUpdateModelMutation(); const initialForm = { ...getDefaults(ModelRequestSchema), @@ -131,40 +133,8 @@ export function CreateModelModal (props: CreateModelModalProps) : ReactElement { }, activeStepIndex: 0, }, ModifyMethod.Set); - } - - /** - * Computes the difference between two JSON objects, recursively. - * - * This function takes two JSON objects as input and returns a new object that - * contains the differences between the two. Works with nested objects. - * - * @param {object} [obj1={}] - The first JSON object to compare. - * @param {object} [obj2={}] - The second JSON object to compare. - * @returns {object} - A new object containing the differences between the two input objects. - */ - function getJsonDifference (obj1 = {}, obj2 = {}) { - const output = {}, - merged = { ...obj1, ...obj2 }; // has properties of both - - for (const key in merged) { - const value1 = obj1 && Object.keys(obj1).includes(key) ? obj1[key] : undefined; - const value2 = obj2 && Object.keys(obj2).includes(key) ? obj2[key] : undefined; - - if (_.isPlainObject(value1) || _.isPlainObject(value2)) { - const value = getJsonDifference(value1, value2); // recursively call - if (Object.keys(value).length !== 0) { - output[key] = value; - } - - } else { - if (!_.isEqual(value1, value2) && (value1 || value2)) { - output[key] = value2; - // output[key][value2] = value2. - } - } - } - return output; + resetCreate(); + resetUpdate(); } const changesDiff = useMemo(() => { @@ -179,9 +149,11 @@ export function CreateModelModal (props: CreateModelModalProps) : ReactElement { function handleSubmit () { delete toSubmit.lisaHostedModel; if (isValid && !props.isEdit && !_.isEmpty(changesDiff)) { + resetCreate(); createModelMutation(toSubmit); } else if (isValid && props.isEdit && !_.isEmpty(changesDiff)) { // pick only the values we care about + resetUpdate(); updateModelMutation(_.mapKeys(_.pick({...changesDiff, modelId: props.selectedItems[0].modelId}, [ 'modelId', 'streaming', @@ -226,11 +198,9 @@ export function CreateModelModal (props: CreateModelModalProps) : ReactElement { props.setVisible(false); props.setIsEdit(false); resetState(); - } else if (!isCreating && isCreateError) { - notificationService.generateNotification(`Error creating model: ${createError.data.message ?? createError.data}`, 'error'); } // eslint-disable-next-line react-hooks/exhaustive-deps - }, [isCreateError, createError, isCreating, isCreateSuccess]); + }, [isCreating, isCreateSuccess]); useEffect(() => { if (!isUpdating && isUpdateSuccess) { @@ -239,11 +209,33 @@ export function CreateModelModal (props: CreateModelModalProps) : ReactElement { props.setIsEdit(false); props.setSelectedItems([]); resetState(); - } else if (!isUpdating && isUpdateError) { - notificationService.generateNotification(`Error updating model: ${updateError.data.message ?? updateError.data}`, 'error'); } // eslint-disable-next-line react-hooks/exhaustive-deps - }, [isUpdateError, updateError, isUpdating, isUpdateSuccess]); + }, [isUpdating, isUpdateSuccess]); + + const normalizeError = (error: SerializedError | {status: string, data: any}): SerializedError | undefined => { + // type predicate to help discriminate between types + function isResponseError (responseError: SerializedError | T): responseError is T { + return (responseError as T)?.status !== undefined; + } + + if (error !== undefined) { + if (isResponseError(error)) { + return { + name: 'Model Error', + message: error.status + }; + } else if (error) { + return { + name: error?.name || 'Model Error', + message: error?.message + }; + } + } + + return undefined; + }; + const reviewError = normalizeError(isCreateError ? createError : isUpdateError ? updateError : undefined); const steps = [ { @@ -280,7 +272,7 @@ export function CreateModelModal (props: CreateModelModalProps) : ReactElement { title: `Review and ${props.isEdit ? 'Update' : 'Create'}`, description: `Review configuration ${props.isEdit ? 'changes' : ''} prior to submitting.`, content: ( - + ), onEdit: state.form.lisaHostedModel } diff --git a/lib/user-interface/react/src/components/model-management/create-model/ReviewModelChanges.tsx b/lib/user-interface/react/src/components/model-management/create-model/ReviewModelChanges.tsx index bdea71b8..2bcfb8f7 100644 --- a/lib/user-interface/react/src/components/model-management/create-model/ReviewModelChanges.tsx +++ b/lib/user-interface/react/src/components/model-management/create-model/ReviewModelChanges.tsx @@ -16,11 +16,13 @@ import React, { ReactElement } from 'react'; import _ from 'lodash'; -import { SpaceBetween, TextContent } from '@cloudscape-design/components'; +import { Alert, SpaceBetween, TextContent } from '@cloudscape-design/components'; import Container from '@cloudscape-design/components/container'; +import { SerializedError } from '@reduxjs/toolkit'; export type ReviewModelChangesProps = { - jsonDiff: object + jsonDiff: object, + error?: SerializedError }; export function ReviewModelChanges (props: ReviewModelChangesProps) : ReactElement { @@ -35,7 +37,7 @@ export function ReviewModelChanges (props: ReviewModelChangesProps) : ReactEleme for (const key in json) { const value = json[key]; - output.push((
  • {_.startCase(key)}{_.isPlainObject(value) ? '' : `: ${_.startCase(value)}`}

  • )); + output.push((
  • {_.startCase(key)}{_.isPlainObject(value) ? '' : `: ${value}`}

  • )); if (_.isPlainObject(value)) { const recursiveJson = jsonToOutline(value); // recursively call @@ -52,6 +54,14 @@ export function ReviewModelChanges (props: ReviewModelChangesProps) : ReactEleme {_.isEmpty(props.jsonDiff) ?

    No changes detected

    : jsonToOutline(props.jsonDiff)} + + { props?.error && + { props?.error?.message } + }
    ); } diff --git a/lib/user-interface/react/src/components/system-banner/system-banner.tsx b/lib/user-interface/react/src/components/system-banner/system-banner.tsx index b53aa827..11ad3f56 100644 --- a/lib/user-interface/react/src/components/system-banner/system-banner.tsx +++ b/lib/user-interface/react/src/components/system-banner/system-banner.tsx @@ -16,20 +16,22 @@ import { TextContent } from '@cloudscape-design/components'; import React from 'react'; +import { useGetConfigurationQuery } from '../../shared/reducers/configuration.reducer'; type BannerOptions = { position: 'TOP' | 'BOTTOM'; }; export const SystemBanner = ({ position }: BannerOptions) => { + const { data: config } = useGetConfigurationQuery('global', {refetchOnMountOrArgChange: 5}); const bannerStyle: React.CSSProperties = { width: '100%', position: 'fixed', zIndex: 4999, textAlign: 'center', padding: '2px 0px', - backgroundColor: window.env.SYSTEM_BANNER.backgroundColor, - color: window.env.SYSTEM_BANNER.fontColor, + backgroundColor: config[0]?.configuration.systemBanner.backgroundColor, + color: config[0]?.configuration.systemBanner.textColor, }; if (position === 'TOP') { @@ -41,7 +43,7 @@ export const SystemBanner = ({ position }: BannerOptions) => { return (
    - {window.env.SYSTEM_BANNER.text} + {config[0]?.configuration.systemBanner.text}
    ); diff --git a/lib/user-interface/react/src/components/utils.ts b/lib/user-interface/react/src/components/utils.ts index fea0b96a..3dd1a060 100644 --- a/lib/user-interface/react/src/components/utils.ts +++ b/lib/user-interface/react/src/components/utils.ts @@ -20,9 +20,7 @@ import { PutSessionRequestBody, LisaChatMessage, Repository, - ModelTypes, Model, - DescribeModelsResponseBody, } from './types'; const stripTrailingSlash = (str) => { @@ -167,32 +165,6 @@ export const deleteUserSessions = async (idToken: string) => { return await resp.json(); }; -/** - * Describes all models of a given type which are available to a user - * @param modelType model type we are requesting - * @returns - */ -export const describeModels = async (idToken: string, modelType: ModelTypes): Promise => { - const resp = await sendAuthenticatedRequest(`${RESTAPI_URI}/${RESTAPI_VERSION}/serve/models`, 'GET', idToken); - const modelResponse = (await resp.json()) as DescribeModelsResponseBody; - - return modelResponse.data - .filter((openAiModel) => { - const configModelMatch = window.env.MODELS.filter((configModel) => configModel.model === openAiModel.id)[0]; - if (!configModelMatch || configModelMatch.modelType === modelType) { - return true; - } - }) - .map((openAiModel) => { - const configModelMatch = window.env.MODELS.filter((configModel) => configModel.model === openAiModel.id)[0]; - return { - id: openAiModel.id, - streaming: configModelMatch?.streaming, - modelType: configModelMatch?.modelType, - }; - }); -}; - /** * Returns true or false based on the model health status * @param idToken the user's ID token from authenticating diff --git a/lib/user-interface/react/src/main.tsx b/lib/user-interface/react/src/main.tsx index 039ff81d..d95a298d 100644 --- a/lib/user-interface/react/src/main.tsx +++ b/lib/user-interface/react/src/main.tsx @@ -21,7 +21,6 @@ import './index.css'; import AppConfigured from './components/app-configured'; import '@cloudscape-design/global-styles/index.css'; -import { ModelTypes } from './components/types'; import getStore from './config/store'; declare global { @@ -37,18 +36,6 @@ declare global { RESTAPI_VERSION: string; RAG_ENABLED: boolean; API_BASE_URL: string; - SYSTEM_BANNER?: { - text: string; - backgroundColor: string; - fontColor: string; - }; - MODELS: [ - { - model: string; - streaming: boolean | null; - modelType: ModelTypes; - }, - ]; }; gitInfo?: { revisionTag?: string; diff --git a/lib/user-interface/react/src/pages/Configuration.tsx b/lib/user-interface/react/src/pages/Configuration.tsx new file mode 100644 index 00000000..91bdfc99 --- /dev/null +++ b/lib/user-interface/react/src/pages/Configuration.tsx @@ -0,0 +1,28 @@ +/** + Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"). + You may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ + +import { ReactElement, useEffect } from 'react'; +import ConfigurationComponent from '../components/configuration/ConfigurationComponent'; + +export function Configuration ({ setTools }): ReactElement { + useEffect(() => { + setTools(null); + }, [setTools]); + + return ; +} + +export default Configuration; diff --git a/lib/user-interface/react/src/shared/modal/confirmation-modal.tsx b/lib/user-interface/react/src/shared/modal/confirmation-modal.tsx index b1102659..af0f16c5 100644 --- a/lib/user-interface/react/src/shared/modal/confirmation-modal.tsx +++ b/lib/user-interface/react/src/shared/modal/confirmation-modal.tsx @@ -27,7 +27,7 @@ export type ConfirmationModalProps = { resourceName: string; onConfirm: () => MutationActionCreatorResult; postConfirm?: CallbackFunction; - description?: string; + description?: string | ReactElement; disabled?: boolean; }; diff --git a/lib/user-interface/react/src/shared/model/configuration.model.ts b/lib/user-interface/react/src/shared/model/configuration.model.ts new file mode 100644 index 00000000..be144edc --- /dev/null +++ b/lib/user-interface/react/src/shared/model/configuration.model.ts @@ -0,0 +1,77 @@ +/** + Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"). + You may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ +import { z } from 'zod'; + +export type SystemConfiguration = { + systemBanner: ISystemBannerConfiguration, + enabledComponents: IEnabledComponents +}; + +export type IEnabledComponents = { + deleteSessionHistory: boolean; + viewMetaData: boolean; + editKwargs: boolean; + editPromptTemplate: boolean; + editNumOfRagDocument: boolean; + editChatHistoryBuffer: boolean; + uploadRagDocs: boolean; + uploadContextDocs: boolean; +}; + +export type ISystemBannerConfiguration = { + isEnabled: boolean; + text: string; + textColor: string; + backgroundColor: string; +}; + +export type BaseConfiguration = { + configScope: string; + versionId: number; + createdAt?: number; + changedBy: string; + changeReason: string; +}; + +export type IConfiguration = BaseConfiguration & { + configuration: SystemConfiguration; +}; + +export const systemBannerConfigSchema = z.object({ + isEnabled: z.boolean().default(false), + text: z.string().default(''), + textColor: z.string().default(''), + backgroundColor: z.string().default(''), +}).refine((data) => !data.isEnabled || (data.isEnabled && data.text.length >= 1), { + message: 'Text is required when banner is activated.', + path: ['text'] +}); + +export const enabledComponentsSchema = z.object({ + deleteSessionHistory: z.boolean().default(true), + viewMetaData: z.boolean().default(true), + editKwargs: z.boolean().default(true), + editPromptTemplate: z.boolean().default(true), + editChatHistoryBuffer: z.boolean().default(true), + editNumOfRagDocument: z.boolean().default(true), + uploadRagDocs: z.boolean().default(true), + uploadContextDocs: z.boolean().default(true), +}); + +export const SystemConfigurationSchema = z.object({ + systemBanner: systemBannerConfigSchema.default(systemBannerConfigSchema.parse({})), + enabledComponents: enabledComponentsSchema.default(enabledComponentsSchema.parse({})), +}); diff --git a/lib/user-interface/react/src/shared/model/model-management.model.ts b/lib/user-interface/react/src/shared/model/model-management.model.ts index 4b52969e..0721681c 100644 --- a/lib/user-interface/react/src/shared/model/model-management.model.ts +++ b/lib/user-interface/react/src/shared/model/model-management.model.ts @@ -72,6 +72,7 @@ export type ILoadBalancerConfig = { }; export type IAutoScalingConfig = { + blockDeviceVolumeSize: number; minCapacity: number; maxCapacity: number; desiredCapacity?: number; @@ -161,6 +162,7 @@ export const loadBalancerConfigSchema = z.object({ }); export const autoScalingConfigSchema = z.object({ + blockDeviceVolumeSize: z.number().min(30).default(30), minCapacity: z.number().min(1).default(1), maxCapacity: z.number().min(1).default(1), desiredCapacity: z.number().optional(), diff --git a/lib/user-interface/react/src/shared/reducers/configuration.reducer.ts b/lib/user-interface/react/src/shared/reducers/configuration.reducer.ts new file mode 100644 index 00000000..14d17561 --- /dev/null +++ b/lib/user-interface/react/src/shared/reducers/configuration.reducer.ts @@ -0,0 +1,52 @@ +/** + Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"). + You may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ + +import { createApi } from '@reduxjs/toolkit/query/react'; +import { lisaBaseQuery } from './reducer.utils'; +import { IConfiguration } from '../model/configuration.model'; + +export const configurationApi = createApi({ + reducerPath: 'configuration', + baseQuery: lisaBaseQuery(), + endpoints: (builder) => ({ + getConfiguration: builder.query({ + query: (configScope) => ({ + url: `/configuration?configScope=${configScope}` + }), + providesTags:['configuration'], + }), + updateConfiguration: builder.mutation({ + query: (updatedConfig) => ({ + url: `/configuration/${updatedConfig.configScope}`, + method: 'PUT', + data: updatedConfig + }), + transformErrorResponse: (baseQueryReturnValue) => { + // transform into SerializedError + return { + name: 'Update Configuration Error', + message: baseQueryReturnValue.data?.type === 'RequestValidationError' ? baseQueryReturnValue.data.detail.map((error) => error.msg).join(', ') : baseQueryReturnValue.data.message + }; + }, + invalidatesTags: ['configuration'], + }), + }), +}); + +export const { + useGetConfigurationQuery, + useUpdateConfigurationMutation +} = configurationApi; diff --git a/lib/user-interface/react/src/shared/reducers/index.ts b/lib/user-interface/react/src/shared/reducers/index.ts index 707d2f5e..f2231a4d 100644 --- a/lib/user-interface/react/src/shared/reducers/index.ts +++ b/lib/user-interface/react/src/shared/reducers/index.ts @@ -20,14 +20,16 @@ import userReducer from './user.reducer'; import notificationReducer from './notification.reducer'; import modalReducer from './modal.reducer'; import { modelManagementApi } from './model-management.reducer'; +import { configurationApi } from './configuration.reducer'; const rootReducer: ReducersMapObject = { user: userReducer, notification: notificationReducer, modal: modalReducer, [modelManagementApi.reducerPath]: modelManagementApi.reducer, + [configurationApi.reducerPath]: configurationApi.reducer, }; -export const rootMiddleware = [modelManagementApi.middleware]; +export const rootMiddleware = [modelManagementApi.middleware, configurationApi.middleware]; export default rootReducer; diff --git a/lib/user-interface/react/src/shared/reducers/model-management.reducer.ts b/lib/user-interface/react/src/shared/reducers/model-management.reducer.ts index 49a7a36c..994eb4c4 100644 --- a/lib/user-interface/react/src/shared/reducers/model-management.reducer.ts +++ b/lib/user-interface/react/src/shared/reducers/model-management.reducer.ts @@ -42,6 +42,13 @@ export const modelManagementApi = createApi({ method: 'POST', data: modelRequest }), + transformErrorResponse: (baseQueryReturnValue) => { + // transform into SerializedError + return { + name: 'Create Model Error', + message: baseQueryReturnValue.data?.type === 'RequestValidationError' ? baseQueryReturnValue.data.detail.map((error) => error.msg).join(', ') : baseQueryReturnValue.data.message + }; + }, invalidatesTags: ['models'], }), updateModel: builder.mutation({ @@ -50,10 +57,27 @@ export const modelManagementApi = createApi({ method: 'PUT', data: modelRequest }), + transformErrorResponse: (baseQueryReturnValue) => { + // transform into SerializedError + return { + name: 'Update Model Error', + message: baseQueryReturnValue.data?.type === 'RequestValidationError' ? baseQueryReturnValue.data.detail.map((error) => error.msg).join(', ') : baseQueryReturnValue.data.message + }; + }, invalidatesTags: ['models'], }), + getInstances: builder.query({ + query: () => ({ + url: '/models/metadata/instances' + }) + }) }), }); -export const { useGetAllModelsQuery, useDeleteModelMutation, useCreateModelMutation, useUpdateModelMutation } = - modelManagementApi; +export const { + useGetAllModelsQuery, + useDeleteModelMutation, + useCreateModelMutation, + useUpdateModelMutation, + useGetInstancesQuery +} = modelManagementApi; diff --git a/lib/user-interface/react/src/shared/reducers/user.reducer.ts b/lib/user-interface/react/src/shared/reducers/user.reducer.ts index 4c0f499e..1bc69193 100644 --- a/lib/user-interface/react/src/shared/reducers/user.reducer.ts +++ b/lib/user-interface/react/src/shared/reducers/user.reducer.ts @@ -33,6 +33,7 @@ export const User = createSlice({ }); export const selectCurrentUserIsAdmin = (state: any) => state.user.info?.isAdmin ?? false; +export const selectCurrentUsername = (state: any) => state.user.info?.preferred_username ?? ''; export const { updateUserState } = User.actions; diff --git a/lib/user-interface/react/src/shared/util/utils.ts b/lib/user-interface/react/src/shared/util/utils.ts new file mode 100644 index 00000000..f5dd7aa5 --- /dev/null +++ b/lib/user-interface/react/src/shared/util/utils.ts @@ -0,0 +1,51 @@ +/** + Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"). + You may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ + +import _ from 'lodash'; + +/** + * Computes the difference between two JSON objects, recursively. + * + * This function takes two JSON objects as input and returns a new object that + * contains the differences between the two. Works with nested objects. + * + * @param {object} [obj1={}] - The first JSON object to compare. + * @param {object} [obj2={}] - The second JSON object to compare. + * @returns {object} - A new object containing the differences between the two input objects. + */ +export function getJsonDifference (obj1 = {}, obj2 = {}) { + const output = {}, + merged = { ...obj1, ...obj2 }; // has properties of both + + for (const key in merged) { + const value1 = obj1 && Object.keys(obj1).includes(key) ? obj1[key] : undefined; + const value2 = obj2 && Object.keys(obj2).includes(key) ? obj2[key] : undefined; + + if (_.isPlainObject(value1) || _.isPlainObject(value2)) { + const value = getJsonDifference(value1, value2); // recursively call + if (Object.keys(value).length !== 0) { + output[key] = value; + } + + } else { + if (!_.isEqual(value1, value2) && (value1 || value2)) { + output[key] = value2; + // output[key][value2] = value2. + } + } + } + return output; +} diff --git a/lib/zod2md.config.ts b/lib/zod2md.config.ts new file mode 100644 index 00000000..84e70bbd --- /dev/null +++ b/lib/zod2md.config.ts @@ -0,0 +1,24 @@ +/** + Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"). + You may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +import type { Config } from 'zod2md'; + +export default { + title: 'LISA Configuration Schema', + entry: './lib/schema.ts', + output: './lib/docs/config/schema.md', + tsconfig: 'tsconfig.json', +} satisfies Config; diff --git a/lisa-sdk/pyproject.toml b/lisa-sdk/pyproject.toml index 4bd844ef..20dcfd30 100644 --- a/lisa-sdk/pyproject.toml +++ b/lisa-sdk/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "lisapy" -version = "3.1.0" +version = "3.2.0" description = "A simple SDK to help you interact with LISA. LISA is an LLM hosting solution for AWS dedicated clouds or ADCs." authors = ["Steve Goley "] readme = "README.md" diff --git a/package-lock.json b/package-lock.json index 7801f45a..04c23166 100644 --- a/package-lock.json +++ b/package-lock.json @@ -7,12 +7,14 @@ "": { "name": "lisa", "version": "3.1.0", + "hasInstallScript": true, "license": "Apache-2.0", "dependencies": { "aws-cdk-lib": "2.125.0", "cdk-nag": "^2.27.198", "constructs": "^10.0.0", "js-yaml": "^4.1.0", + "lodash": "^4.17.21", "source-map-support": "^0.5.21", "zod": "^3.22.3" }, @@ -26,6 +28,7 @@ "@stylistic/eslint-plugin": "^2.7.2", "@types/jest": "^29.5.12", "@types/js-yaml": "^4.0.5", + "@types/lodash": "^4.17.12", "@types/node": "20.5.3", "@typescript-eslint/eslint-plugin": "^6.7.0", "@typescript-eslint/parser": "^6.6.0", @@ -40,7 +43,8 @@ "lint-staged": "^15.2.10", "ts-jest": "^29.1.1", "ts-node": "^10.9.1", - "typescript": "~5.1.6" + "typescript": "~5.1.6", + "zod2md": "^0.1.4" } }, "node_modules/@ampproject/remapping": { @@ -1733,6 +1737,15 @@ "node": ">=4.0" } }, + "node_modules/@commander-js/extra-typings": { + "version": "12.1.0", + "resolved": "https://registry.npmjs.org/@commander-js/extra-typings/-/extra-typings-12.1.0.tgz", + "integrity": "sha512-wf/lwQvWAA0goIghcb91dQYpkLBcyhOhQNqG/VgWhnKzgt+UOMvra7EX/2fv70arm5RW+PUHoQHHDa6/p77Eqg==", + "dev": true, + "peerDependencies": { + "commander": "~12.1.0" + } + }, "node_modules/@cspotcode/source-map-support": { "version": "0.8.1", "resolved": "https://registry.npmjs.org/@cspotcode/source-map-support/-/source-map-support-0.8.1.tgz", @@ -1755,6 +1768,374 @@ "@jridgewell/sourcemap-codec": "^1.4.10" } }, + "node_modules/@esbuild/aix-ppc64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.19.12.tgz", + "integrity": "sha512-bmoCYyWdEL3wDQIVbcyzRyeKLgk2WtWLTWz1ZIAZF/EGbNOwSA6ew3PftJ1PqMiOOGu0OyFMzG53L0zqIpPeNA==", + "cpu": [ + "ppc64" + ], + "dev": true, + "optional": true, + "os": [ + "aix" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/android-arm": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.19.12.tgz", + "integrity": "sha512-qg/Lj1mu3CdQlDEEiWrlC4eaPZ1KztwGJ9B6J+/6G+/4ewxJg7gqj8eVYWvao1bXrqGiW2rsBZFSX3q2lcW05w==", + "cpu": [ + "arm" + ], + "dev": true, + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/android-arm64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.19.12.tgz", + "integrity": "sha512-P0UVNGIienjZv3f5zq0DP3Nt2IE/3plFzuaS96vihvD0Hd6H/q4WXUGpCxD/E8YrSXfNyRPbpTq+T8ZQioSuPA==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/android-x64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.19.12.tgz", + "integrity": "sha512-3k7ZoUW6Q6YqhdhIaq/WZ7HwBpnFBlW905Fa4s4qWJyiNOgT1dOqDiVAQFwBH7gBRZr17gLrlFCRzF6jFh7Kew==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/darwin-arm64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.19.12.tgz", + "integrity": "sha512-B6IeSgZgtEzGC42jsI+YYu9Z3HKRxp8ZT3cqhvliEHovq8HSX2YX8lNocDn79gCKJXOSaEot9MVYky7AKjCs8g==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/darwin-x64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.19.12.tgz", + "integrity": "sha512-hKoVkKzFiToTgn+41qGhsUJXFlIjxI/jSYeZf3ugemDYZldIXIxhvwN6erJGlX4t5h417iFuheZ7l+YVn05N3A==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/freebsd-arm64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.19.12.tgz", + "integrity": "sha512-4aRvFIXmwAcDBw9AueDQ2YnGmz5L6obe5kmPT8Vd+/+x/JMVKCgdcRwH6APrbpNXsPz+K653Qg8HB/oXvXVukA==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/freebsd-x64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.19.12.tgz", + "integrity": "sha512-EYoXZ4d8xtBoVN7CEwWY2IN4ho76xjYXqSXMNccFSx2lgqOG/1TBPW0yPx1bJZk94qu3tX0fycJeeQsKovA8gg==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-arm": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.19.12.tgz", + "integrity": "sha512-J5jPms//KhSNv+LO1S1TX1UWp1ucM6N6XuL6ITdKWElCu8wXP72l9MM0zDTzzeikVyqFE6U8YAV9/tFyj0ti+w==", + "cpu": [ + "arm" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-arm64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.19.12.tgz", + "integrity": "sha512-EoTjyYyLuVPfdPLsGVVVC8a0p1BFFvtpQDB/YLEhaXyf/5bczaGeN15QkR+O4S5LeJ92Tqotve7i1jn35qwvdA==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-ia32": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.19.12.tgz", + "integrity": "sha512-Thsa42rrP1+UIGaWz47uydHSBOgTUnwBwNq59khgIwktK6x60Hivfbux9iNR0eHCHzOLjLMLfUMLCypBkZXMHA==", + "cpu": [ + "ia32" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-loong64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.19.12.tgz", + "integrity": "sha512-LiXdXA0s3IqRRjm6rV6XaWATScKAXjI4R4LoDlvO7+yQqFdlr1Bax62sRwkVvRIrwXxvtYEHHI4dm50jAXkuAA==", + "cpu": [ + "loong64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-mips64el": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.19.12.tgz", + "integrity": "sha512-fEnAuj5VGTanfJ07ff0gOA6IPsvrVHLVb6Lyd1g2/ed67oU1eFzL0r9WL7ZzscD+/N6i3dWumGE1Un4f7Amf+w==", + "cpu": [ + "mips64el" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-ppc64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.19.12.tgz", + "integrity": "sha512-nYJA2/QPimDQOh1rKWedNOe3Gfc8PabU7HT3iXWtNUbRzXS9+vgB0Fjaqr//XNbd82mCxHzik2qotuI89cfixg==", + "cpu": [ + "ppc64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-riscv64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.19.12.tgz", + "integrity": "sha512-2MueBrlPQCw5dVJJpQdUYgeqIzDQgw3QtiAHUC4RBz9FXPrskyyU3VI1hw7C0BSKB9OduwSJ79FTCqtGMWqJHg==", + "cpu": [ + "riscv64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-s390x": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.19.12.tgz", + "integrity": "sha512-+Pil1Nv3Umes4m3AZKqA2anfhJiVmNCYkPchwFJNEJN5QxmTs1uzyy4TvmDrCRNT2ApwSari7ZIgrPeUx4UZDg==", + "cpu": [ + "s390x" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-x64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.19.12.tgz", + "integrity": "sha512-B71g1QpxfwBvNrfyJdVDexenDIt1CiDN1TIXLbhOw0KhJzE78KIFGX6OJ9MrtC0oOqMWf+0xop4qEU8JrJTwCg==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/netbsd-x64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.19.12.tgz", + "integrity": "sha512-3ltjQ7n1owJgFbuC61Oj++XhtzmymoCihNFgT84UAmJnxJfm4sYCiSLTXZtE00VWYpPMYc+ZQmB6xbSdVh0JWA==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "netbsd" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/openbsd-x64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.19.12.tgz", + "integrity": "sha512-RbrfTB9SWsr0kWmb9srfF+L933uMDdu9BIzdA7os2t0TXhCRjrQyCeOt6wVxr79CKD4c+p+YhCj31HBkYcXebw==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "openbsd" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/sunos-x64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.19.12.tgz", + "integrity": "sha512-HKjJwRrW8uWtCQnQOz9qcU3mUZhTUQvi56Q8DPTLLB+DawoiQdjsYq+j+D3s9I8VFtDr+F9CjgXKKC4ss89IeA==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "sunos" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/win32-arm64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.19.12.tgz", + "integrity": "sha512-URgtR1dJnmGvX864pn1B2YUYNzjmXkuJOIqG2HdU62MVS4EHpU2946OZoTMnRUHklGtJdJZ33QfzdjGACXhn1A==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/win32-ia32": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.19.12.tgz", + "integrity": "sha512-+ZOE6pUkMOJfmxmBZElNOx72NKpIa/HFOMGzu8fqzQJ5kgf6aTGrcJaFsNiVMH4JKpMipyK+7k0n2UXN7a8YKQ==", + "cpu": [ + "ia32" + ], + "dev": true, + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/win32-x64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.19.12.tgz", + "integrity": "sha512-T1QyPSDCyMXaO3pzBkF96E8xMkiRYbUEZADd29SyPGabqxMViNoii+NcK7eWJAEoU6RZyEm5lVSIjTmcdoB9HA==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=12" + } + }, "node_modules/@eslint-community/eslint-utils": { "version": "4.4.0", "resolved": "https://registry.npmjs.org/@eslint-community/eslint-utils/-/eslint-utils-4.4.0.tgz", @@ -3667,6 +4048,13 @@ "integrity": "sha512-dRLjCWHYg4oaA77cxO64oO+7JwCwnIzkZPdrrC71jQmQtlhM556pwKo5bUzqvZndkVbeFLIIi+9TC40JNF5hNQ==", "dev": true }, + "node_modules/@types/lodash": { + "version": "4.17.12", + "resolved": "https://registry.npmjs.org/@types/lodash/-/lodash-4.17.12.tgz", + "integrity": "sha512-sviUmCE8AYdaF/KIHLDJBQgeYzPBI0vf/17NaYehBJfYD1j6/L95Slh07NlyK2iNyBNaEkb3En2jRt+a8y3xZQ==", + "dev": true, + "license": "MIT" + }, "node_modules/@types/minimatch": { "version": "3.0.5", "resolved": "https://registry.npmjs.org/@types/minimatch/-/minimatch-3.0.5.tgz", @@ -4885,6 +5273,21 @@ "resolved": "https://registry.npmjs.org/buffer-from/-/buffer-from-1.1.2.tgz", "integrity": "sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ==" }, + "node_modules/bundle-require": { + "version": "4.2.1", + "resolved": "https://registry.npmjs.org/bundle-require/-/bundle-require-4.2.1.tgz", + "integrity": "sha512-7Q/6vkyYAwOmQNRw75x+4yRtZCZJXUDmHHlFdkiV0wgv/reNjtJwpu1jPJ0w2kbEpIM0uoKI3S4/f39dU7AjSA==", + "dev": true, + "dependencies": { + "load-tsconfig": "^0.2.3" + }, + "engines": { + "node": "^12.20.0 || ^14.13.1 || >=16.0.0" + }, + "peerDependencies": { + "esbuild": ">=0.17" + } + }, "node_modules/call-bind": { "version": "1.0.7", "resolved": "https://registry.npmjs.org/call-bind/-/call-bind-1.0.7.tgz", @@ -5803,6 +6206,44 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/esbuild": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.19.12.tgz", + "integrity": "sha512-aARqgq8roFBj054KvQr5f1sFu0D65G+miZRCuJyJ0G13Zwx7vRar5Zhn2tkQNzIXcBrNVsv/8stehpj+GAjgbg==", + "dev": true, + "hasInstallScript": true, + "bin": { + "esbuild": "bin/esbuild" + }, + "engines": { + "node": ">=12" + }, + "optionalDependencies": { + "@esbuild/aix-ppc64": "0.19.12", + "@esbuild/android-arm": "0.19.12", + "@esbuild/android-arm64": "0.19.12", + "@esbuild/android-x64": "0.19.12", + "@esbuild/darwin-arm64": "0.19.12", + "@esbuild/darwin-x64": "0.19.12", + "@esbuild/freebsd-arm64": "0.19.12", + "@esbuild/freebsd-x64": "0.19.12", + "@esbuild/linux-arm": "0.19.12", + "@esbuild/linux-arm64": "0.19.12", + "@esbuild/linux-ia32": "0.19.12", + "@esbuild/linux-loong64": "0.19.12", + "@esbuild/linux-mips64el": "0.19.12", + "@esbuild/linux-ppc64": "0.19.12", + "@esbuild/linux-riscv64": "0.19.12", + "@esbuild/linux-s390x": "0.19.12", + "@esbuild/linux-x64": "0.19.12", + "@esbuild/netbsd-x64": "0.19.12", + "@esbuild/openbsd-x64": "0.19.12", + "@esbuild/sunos-x64": "0.19.12", + "@esbuild/win32-arm64": "0.19.12", + "@esbuild/win32-ia32": "0.19.12", + "@esbuild/win32-x64": "0.19.12" + } + }, "node_modules/escalade": { "version": "3.1.2", "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.2.tgz", @@ -9526,6 +9967,15 @@ "node": ">=18.0.0" } }, + "node_modules/load-tsconfig": { + "version": "0.2.5", + "resolved": "https://registry.npmjs.org/load-tsconfig/-/load-tsconfig-0.2.5.tgz", + "integrity": "sha512-IXO6OCs9yg8tMKzfPZ1YmheJbZCiEsnBdcB03l0OcfK9prKnJb96siuHCr5Fl37/yo9DnKU+TLpxzTUspw9shg==", + "dev": true, + "engines": { + "node": "^12.20.0 || ^14.13.1 || >=16.0.0" + } + }, "node_modules/locate-path": { "version": "6.0.0", "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-6.0.0.tgz", @@ -9544,8 +9994,7 @@ "node_modules/lodash": { "version": "4.17.21", "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz", - "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==", - "dev": true + "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==" }, "node_modules/lodash.memoize": { "version": "4.1.2", @@ -11700,6 +12149,24 @@ "funding": { "url": "https://github.com/sponsors/colinhacks" } + }, + "node_modules/zod2md": { + "version": "0.1.4", + "resolved": "https://registry.npmjs.org/zod2md/-/zod2md-0.1.4.tgz", + "integrity": "sha512-ZEW9TZd4M9PHB/UeZcLXIjlCbzPUESGvzEN+Ttye18quh4Afap8DYd/zpIPfw+DrVsSSWoNU40HVnfE9UcpmPw==", + "dev": true, + "dependencies": { + "@commander-js/extra-typings": "^12.0.0", + "bundle-require": "^4.0.2", + "commander": "^12.0.0", + "esbuild": "^0.19.11" + }, + "bin": { + "zod2md": "dist/bin.js" + }, + "peerDependencies": { + "zod": "^3.22.0" + } } } } diff --git a/package.json b/package.json index 2bad7510..7166d5e5 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "lisa", - "version": "3.1.0", + "version": "3.2.0", "bin": { "lisa": "bin/lisa.js" }, @@ -9,7 +9,11 @@ "watch": "tsc -w", "test": "jest", "cdk": "cdk", - "prepare": "husky install" + "prepare": "husky install", + "migrate-properties": "node ./scripts/migrate-properties.mjs", + "postinstall": "(cd lib/user-interface/react && npm install) && (cd lib/docs && npm install)", + "postbuild": "(cd lib/user-interface/react && npm build) && (cd lib/docs && npm build)", + "generateSchemaDocs": "npx zod2md -c ./lib/zod2md.config.ts" }, "devDependencies": { "@aws-cdk/aws-lambda-python-alpha": "2.125.0-alpha.0", @@ -18,6 +22,7 @@ "@stylistic/eslint-plugin": "^2.7.2", "@types/jest": "^29.5.12", "@types/js-yaml": "^4.0.5", + "@types/lodash": "^4.17.12", "@types/node": "20.5.3", "@typescript-eslint/eslint-plugin": "^6.7.0", "@typescript-eslint/parser": "^6.6.0", @@ -32,13 +37,15 @@ "lint-staged": "^15.2.10", "ts-jest": "^29.1.1", "ts-node": "^10.9.1", - "typescript": "~5.1.6" + "typescript": "~5.1.6", + "zod2md": "^0.1.4" }, "dependencies": { "aws-cdk-lib": "2.125.0", "cdk-nag": "^2.27.198", "constructs": "^10.0.0", "js-yaml": "^4.1.0", + "lodash": "^4.17.21", "source-map-support": "^0.5.21", "zod": "^3.22.3" }, diff --git a/scripts/migrate-properties.mjs b/scripts/migrate-properties.mjs new file mode 100644 index 00000000..0d6d1c73 --- /dev/null +++ b/scripts/migrate-properties.mjs @@ -0,0 +1,60 @@ +/** + Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the 'License'). + You may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an 'AS IS' BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ + +import * as yaml from 'js-yaml'; +import fs from 'fs'; +import path from 'path'; +import _ from 'lodash'; + +console.log('MIGRATING PROPERTIES...'); + +const configFilePath = path.join('./config.yaml'); +const configFile = yaml.load(fs.readFileSync(configFilePath, 'utf8')); + +console.log('FOUND CONFIG FILE: config.yaml\n') + +for (const key in configFile){ + if(_.isPlainObject(configFile[key])) { + const oldConfig = configFile[key] + let newConfig = {...configFile[key]}; + + delete newConfig.lambdaConfig; + delete newConfig.litellmConfig; + delete newConfig.restApiConfig; + + newConfig['restApiConfig'] = { + 'sslCertIamArn': oldConfig['restApiConfig']['loadBalancerConfig']['sslCertIamArn'], + 'internetFacing': oldConfig['restApiConfig']['internetFacing'], + 'domainName': oldConfig['restApiConfig']['loadBalancerConfig']['domainName'], + 'rdsConfig': oldConfig['restApiConfig']['rdsConfig'], + } + + newConfig['litellmConfig'] = { + 'dbKey': oldConfig['litellmConfig']['general_settings']['master_key'] + } + + if (JSON.stringify(newConfig.restApiConfig) === '{}'){ + delete newConfig.restApiConfig; + } + + if (JSON.stringify(newConfig.litellmConfig) === '{}'){ + delete newConfig.litellmConfig; + } + + console.log('NEW CONFIG FILE = \n' + yaml.dump(_(newConfig).omit(_.isNil).value())); + fs.writeFileSync('./config-custom.yaml', yaml.dump(_(newConfig).omit(_.isNil).value())); + } +} diff --git a/test/cdk/mocks/config.yaml b/test/cdk/mocks/config.yaml index b364623e..e0991c48 100644 --- a/test/cdk/mocks/config.yaml +++ b/test/cdk/mocks/config.yaml @@ -21,10 +21,6 @@ dev: # rolePrefix: CustomPrefix # policyPrefix: CustomPrefix # instanceProfilePrefix: CustomPrefix - # systemBanner: - # text: 'LISA System' - # backgroundColor: orange - # fontColor: black s3BucketModels: hf-models-gaiic # aws partition mountS3 package location mountS3DebUrl: https://s3.amazonaws.com/mountpoint-s3-release/latest/x86_64/mount-s3.deb @@ -35,57 +31,12 @@ dev: accountNumbersEcr: - '012345678901' deployRag: true - lambdaConfig: - pythonRuntime: PYTHON_3_10 - logLevel: DEBUG - vpcAutoscalingConfig: - provisionedConcurrentExecutions: 5 - minCapacity: 1 - maxCapacity: 50 - targetValue: 0.80 - cooldown: 30 authConfig: authority: test clientId: test logLevel: DEBUG - # NOTE: The following configuration will allow for using a custom domain for the chat user interface. - # If this option is specified, the API Gateway invocation URL will NOT work on its own as the application URL. - # Users must use the custom domain for the user interface to work if this option is populated. - apiGatewayConfig: - domainName: restApiConfig: - apiVersion: v2 - instanceType: m5.large - containerConfig: - image: - baseImage: python:3.9 - path: lib/serve/rest-api - type: asset - healthCheckConfig: - command: ["CMD-SHELL", "exit 0"] - interval: 10 - startPeriod: 30 - timeout: 5 - retries: 3 - autoScalingConfig: - minCapacity: 1 - maxCapacity: 1 - cooldown: 60 - defaultInstanceWarmup: 60 - metricConfig: - AlbMetricName: RequestCountPerTarget - targetValue: 1000 - duration: 60 - estimatedInstanceWarmup: 30 - loadBalancerConfig: - sslCertIamArn: arn:aws:iam::012345678901:server-certificate/lisa-self-signed-dev - healthCheckConfig: - path: /health - interval: 60 - timeout: 30 - healthyThresholdCount: 2 - unhealthyThresholdCount: 10 - domainName: + sslCertIamArn: arn:aws:iam::012345678901:server-certificate/lisa-self-signed-dev ragRepositories: - repositoryId: pgvector-rag type: pgvector @@ -125,8 +76,4 @@ dev: # inferenceContainer: tgi # baseImage: ghcr.io/huggingface/text-generation-inference:2.0.1 litellmConfig: - general_settings: - master_key: sk-012345 - litellm_settings: - telemetry: false - model_list: + db_key: sk-012345 #pragma: allowlist secret diff --git a/test/cdk/stacks/chat.test.ts b/test/cdk/stacks/chat.test.ts index 526e32bf..304adcf0 100644 --- a/test/cdk/stacks/chat.test.ts +++ b/test/cdk/stacks/chat.test.ts @@ -79,7 +79,7 @@ describe.each(regions)('Chat Nag Pack Tests | Region Test: %s', (awsRegion) => { ...baseStackProps, stackName: createCdkId([config.deploymentName, config.appName, 'API']), description: `LISA-API: ${config.deploymentName}-${config.deploymentStage}`, - vpc: networkingStack.vpc.vpc, + vpc: networkingStack.vpc, }); stack = new LisaChatApplicationStack(app, 'LisaChat', { @@ -89,7 +89,7 @@ describe.each(regions)('Chat Nag Pack Tests | Region Test: %s', (awsRegion) => { description: `LISA-chat: ${config.deploymentName}-${config.deploymentStage}`, restApiId: apiBaseStack.restApiId, rootResourceId: apiBaseStack.rootResourceId, - vpc: networkingStack.vpc.vpc, + vpc: networkingStack.vpc, }); // WHEN @@ -105,12 +105,12 @@ describe.each(regions)('Chat Nag Pack Tests | Region Test: %s', (awsRegion) => { //TODO Update expect values to remediate CDK NAG findings and remove debug test('AwsSolutions CDK NAG Warnings', () => { const warnings = Annotations.fromStack(stack).findWarning('*', Match.stringLikeRegexp('AwsSolutions-.*')); - expect(warnings.length).toBe(1); + expect(warnings.length).toBe(2); }); test('AwsSolutions CDK NAG Errors', () => { const errors = Annotations.fromStack(stack).findError('*', Match.stringLikeRegexp('AwsSolutions-.*')); - expect(errors.length).toBe(17); + expect(errors.length).toBe(28); }); test('NIST800.53r5 CDK NAG Warnings', () => { @@ -120,6 +120,6 @@ describe.each(regions)('Chat Nag Pack Tests | Region Test: %s', (awsRegion) => { test('NIST800.53r5 CDK NAG Errors', () => { const errors = Annotations.fromStack(stack).findError('*', Match.stringLikeRegexp('NIST.*')); - expect(errors.length).toBe(14); + expect(errors.length).toBe(11); }); }); diff --git a/test/cdk/stacks/core-api-base.test.ts b/test/cdk/stacks/core-api-base.test.ts index e14dddde..c3e543a6 100644 --- a/test/cdk/stacks/core-api-base.test.ts +++ b/test/cdk/stacks/core-api-base.test.ts @@ -78,7 +78,7 @@ describe.each(regions)('API Core Nag Pack Tests | Region Test: %s', (awsRegion) ...baseStackProps, stackName: createCdkId([config.deploymentName, config.appName, 'API']), description: `LISA-API: ${config.deploymentName}-${config.deploymentStage}`, - vpc: networkingStack.vpc.vpc, + vpc: networkingStack.vpc, }); tempStack.authorizer._attachToApi(tempStack.restApi); @@ -112,6 +112,6 @@ describe.each(regions)('API Core Nag Pack Tests | Region Test: %s', (awsRegion) test('NIST800.53r5 CDK NAG Errors', () => { const errors = Annotations.fromStack(stack).findError('*', Match.stringLikeRegexp('NIST.*')); - expect(errors.length).toBe(7); + expect(errors.length).toBe(5); }); }); diff --git a/test/cdk/stacks/core-api-deploy.test.ts b/test/cdk/stacks/core-api-deploy.test.ts index dc78eb30..7b0fe20e 100644 --- a/test/cdk/stacks/core-api-deploy.test.ts +++ b/test/cdk/stacks/core-api-deploy.test.ts @@ -79,7 +79,7 @@ describe.each(regions)('API Core Deployment Nag Pack Tests | Region Test: %s', ( ...baseStackProps, stackName: createCdkId([config.deploymentName, config.appName, 'API']), description: `LISA-API: ${config.deploymentName}-${config.deploymentStage}`, - vpc: networkingStack.vpc.vpc, + vpc: networkingStack.vpc, }); tempStack.authorizer._attachToApi(tempStack.restApi); diff --git a/test/cdk/stacks/docs.test.ts b/test/cdk/stacks/docs.test.ts new file mode 100644 index 00000000..e5e2a924 --- /dev/null +++ b/test/cdk/stacks/docs.test.ts @@ -0,0 +1,104 @@ +/** + Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"). + You may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +import * as fs from 'fs'; +import * as path from 'path'; + +import { App, Aspects, Stack, StackProps } from 'aws-cdk-lib'; +import { Annotations, Match } from 'aws-cdk-lib/assertions'; +import { AwsSolutionsChecks, NIST80053R5Checks } from 'cdk-nag'; +import * as yaml from 'js-yaml'; + +import { LisaDocsStack } from '../../../lib/docs/index'; +import { BaseProps, Config, ConfigFile, ConfigSchema } from '../../../lib/schema'; + +const regions = ['us-east-1', 'us-gov-west-1', 'us-gov-east-1', 'us-isob-east-1', 'us-iso-east-1', 'us-iso-west-1']; + +describe.each(regions)('Docs Nag Pack Tests | Region Test: %s', (awsRegion) => { + let app: App; + let stack: Stack; + let config: Config; + let baseStackProps: BaseProps & StackProps; + + beforeAll(() => { + app = new App(); + + // Read configuration file + const configFilePath = path.join(__dirname, '../../../test/cdk/mocks/config.yaml'); + const configFile = yaml.load(fs.readFileSync(configFilePath, 'utf8')) as ConfigFile; + const configEnv = configFile.env || 'dev'; + const configData = configFile[configEnv]; + if (!configData) { + throw new Error(`Configuration for environment "${configEnv}" not found.`); + } + // Validate and parse configuration + try { + config = ConfigSchema.parse(configData); + } catch (error) { + if (error instanceof Error) { + console.error('Error parsing the configuration:', error.message); + } else { + console.error('An unexpected error occurred:', error); + } + process.exit(1); + } + + baseStackProps = { + env: { + account: '012345678901', + region: awsRegion, + }, + config, + }; + }); + + beforeEach(() => { + + stack = new LisaDocsStack(app, 'LisaDocs', { + ...baseStackProps, + }); + + // WHEN + Aspects.of(stack).add(new AwsSolutionsChecks({ verbose: true })); + Aspects.of(stack).add(new NIST80053R5Checks({ verbose: true })); + }); + + afterEach(() => { + app = new App(); + stack = new Stack(); + }); + + //TODO Update expect values to remediate CDK NAG findings and remove debug + test('AwsSolutions CDK NAG Warnings', () => { + const warnings = Annotations.fromStack(stack).findWarning('*', Match.stringLikeRegexp('AwsSolutions-.*')); + expect(warnings.length).toBe(1); + }); + + test('AwsSolutions CDK NAG Errors', () => { + const errors = Annotations.fromStack(stack).findError('*', Match.stringLikeRegexp('AwsSolutions-.*')); + expect(errors.length).toBe(23); + }); + + test('NIST800.53r5 CDK NAG Warnings', () => { + const warnings = Annotations.fromStack(stack).findWarning('*', Match.stringLikeRegexp('NIST.*')); + expect(warnings.length).toBe(0); + }); + + test('NIST800.53r5 CDK NAG Errors', () => { + const errors = Annotations.fromStack(stack).findError('*', Match.stringLikeRegexp('NIST.*')); + expect(errors.length).toBe(13); + }); +}); diff --git a/test/cdk/stacks/ui.test.ts b/test/cdk/stacks/ui.test.ts index 7e95fba9..63066fa4 100644 --- a/test/cdk/stacks/ui.test.ts +++ b/test/cdk/stacks/ui.test.ts @@ -80,7 +80,7 @@ describe.each(regions)('UI Nag Pack Tests | Region Test: %s', (awsRegion) => { ...baseStackProps, stackName: createCdkId([config.deploymentName, config.appName, 'API']), description: `LISA-API: ${config.deploymentName}-${config.deploymentStage}`, - vpc: networkingStack.vpc.vpc, + vpc: networkingStack.vpc, }); stack = new UserInterfaceStack(app, 'LisaUserInterface', { diff --git a/tsconfig.json b/tsconfig.json index 5473529e..747a2f76 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -19,7 +19,8 @@ "inlineSources": true, "experimentalDecorators": true, "strictPropertyInitialization": false, - "typeRoots": ["./node_modules/@types"] + "typeRoots": ["./node_modules/@types"], + "outDir": "dist/" }, - "exclude": ["node_modules", "cdk.out", ".git"] + "exclude": ["node_modules", "cdk.out", ".git", "dist"] }