diff --git a/.github/actions/aml_real_deployment/action.yml b/.github/actions/aml_real_deployment/action.yml index 3da2fcdef..f5bb4e2fb 100644 --- a/.github/actions/aml_real_deployment/action.yml +++ b/.github/actions/aml_real_deployment/action.yml @@ -24,24 +24,10 @@ inputs: KEY_VAULT_NAME: description: "Key vault name" required: true - ENV_VARS: - description: 'The API key for authentication' - required: false runs: using: composite - steps: - - name: Create .env file - shell: bash - run: | - echo "${{ inputs.ENV_VARS }}" >> .env - for var in $(cat .env); do - echo "$var" >> $GITHUB_ENV - done - - name: load .env file - shell: bash - run: python -c "from dotenv import load_dotenv; load_dotenv()" - + steps: - name: Provision Managed Endpoint uses: ./.github/actions/execute_script with: diff --git a/.github/actions/kubernetes_deployment/action.yml b/.github/actions/kubernetes_deployment/action.yml index 9fa6aeea2..07530dd0a 100644 --- a/.github/actions/kubernetes_deployment/action.yml +++ b/.github/actions/kubernetes_deployment/action.yml @@ -29,16 +29,6 @@ inputs: runs: using: composite steps: - - name: Create .env file - shell: bash - run: | - echo "${{ secrets.env_vars }}" >> .env - for var in $(cat .env); do - echo "$var" >> $GITHUB_ENV - done - - name: load .env file - shell: bash - run: python -c "from dotenv import load_dotenv; load_dotenv()" - name: Provision Kubernetes Online Endpoint uses: ./.github/actions/execute_script with: diff --git a/.github/actions/webapp_deployment/action.yml b/.github/actions/webapp_deployment/action.yml index 291cb717f..b9b867ad7 100644 --- a/.github/actions/webapp_deployment/action.yml +++ b/.github/actions/webapp_deployment/action.yml @@ -9,20 +9,10 @@ inputs: DEPLOY_ENVIRONMENT: description: "env stage e.g. dev, test, prod" required: true - ENV_VARS: - description: "env stage e.g. dev, test, prod" - required: true runs: using: composite steps: - - name: Convert .env to environment variables - shell: bash - run: | - while IFS='=' read -r key value; do - echo "$key=$value" >> $GITHUB_ENV - done <<< "${{ inputs.env_vars }}" - - name: create docker image shell: bash run: use_case_base_path=${{ inputs.USE_CASE_BASE_PATH }} deploy_environment=${{ inputs.DEPLOY_ENVIRONMENT }} build_id=${{ github.run_id }} ./llmops/common/scripts/gen_docker_image.sh diff --git a/.github/workflows/platform_cd_dev_workflow.yml b/.github/workflows/platform_cd_dev_workflow.yml index ca1520e6c..78896bcfd 100644 --- a/.github/workflows/platform_cd_dev_workflow.yml +++ b/.github/workflows/platform_cd_dev_workflow.yml @@ -58,20 +58,25 @@ jobs: uses: azure/login@v1 with: creds: ${{ secrets.azure_credentials }} + - name: Configure Azure ML Agent uses: ./.github/actions/configure_azureml_agent with: versionSpec: "3.9" base_path: ${{ inputs.use_case_base_path }} - - name: Create .env file + - name: Load secrets from ENV_VARS run: | - echo "${{ secrets.ENV_VARS }}" >> .env - for var in $(cat .env); do - echo "$var" >> $GITHUB_ENV - done - - name: load .env file - run: python -c "from dotenv import load_dotenv; load_dotenv()" + set -e # fail on error + IFS=$'\n' # Set internal field separator to new line + for var in $(echo "${{ secrets.ENV_VARS }}"); do + KEY=$(echo "$var" | cut -d '=' -f 1) + VALUE=$(echo "$var" | cut -d '=' -f 2-) + echo "::add-mask::$VALUE" + echo "$KEY=$VALUE" >> $GITHUB_ENV + done + shell: bash + - name: load the current Azure subscription details id: subscription_details shell: bash @@ -123,7 +128,6 @@ jobs: WORKSPACE_NAME: ${{ vars.WORKSPACE_NAME }} KEY_VAULT_NAME: ${{ vars.KEY_VAULT_NAME }} MODEL_VERSION: $MODEL_VERSION - ENV_VARS: "${{ secrets.ENV_VARS }}" #===================================== # Executes Managed kubernetes deployment when parameter deployment_type == 'aks' @@ -155,12 +159,6 @@ jobs: --env_name ${{ inputs.env_name }} \ --base_path ${{ inputs.use_case_base_path }} \ - - name: Read .env file - id: env_vars - run: | - LOCAL_VARS=$(cat .env) - echo "local_vars=$LOCAL_VARS" >> $GITHUB_OUTPUT - #===================================== # Executes Azure Webapp deployment when parameter # deployment_type == 'webapp' using docker image @@ -172,7 +170,6 @@ jobs: with: USE_CASE_BASE_PATH: ${{ inputs.use_case_base_path }} DEPLOY_ENVIRONMENT: ${{ inputs.env_name }} - ENV_VARS: ${{ steps.env_vars.outputs.local_vars }} env: REGISTRY_DETAILS: ${{ secrets.registry_details }} diff --git a/.github/workflows/platform_ci_dev_workflow.yml b/.github/workflows/platform_ci_dev_workflow.yml index 3f87e398b..b1ba78173 100644 --- a/.github/workflows/platform_ci_dev_workflow.yml +++ b/.github/workflows/platform_ci_dev_workflow.yml @@ -52,14 +52,18 @@ jobs: versionSpec: "3.9" base_path: ${{ inputs.use_case_base_path }} - - name: Create .env file + - name: Load secrets from ENV_VARS run: | - echo "${{ secrets.ENV_VARS }}" >> .env - for var in $(cat .env); do - echo "$var" >> $GITHUB_ENV + set -e # fail on error + IFS=$'\n' # Set internal field separator to new line + for var in $(echo "${{ secrets.ENV_VARS }}"); do + KEY=$(echo "$var" | cut -d '=' -f 1) + VALUE=$(echo "$var" | cut -d '=' -f 2-) + echo "::add-mask::$VALUE" + echo "$KEY=$VALUE" >> $GITHUB_ENV done - - name: load .env file - run: python -c "from dotenv import load_dotenv; load_dotenv()" + shell: bash + - name: load the current Azure subscription details id: subscription_details shell: bash diff --git a/.github/workflows/platform_pr_dev_workflow.yml b/.github/workflows/platform_pr_dev_workflow.yml index c0b5f5b1e..d95115da0 100644 --- a/.github/workflows/platform_pr_dev_workflow.yml +++ b/.github/workflows/platform_pr_dev_workflow.yml @@ -15,7 +15,7 @@ on: azure_credentials: description: "service principal auth to Azure" required: true - env_vars: + ENV_VARS: description: "env vars for the flow" required: false @@ -43,19 +43,24 @@ jobs: uses: azure/login@v1 with: creds: ${{ secrets.azure_credentials }} + - name: Configure Azure ML Agent uses: ./.github/actions/configure_azureml_agent with: versionSpec: "3.9" base_path: ${{ inputs.use_case_base_path }} - - name: Create .env file + + - name: Load secrets from ENV_VARS run: | - echo "${{ secrets.env_vars }}" >> .env - for var in $(cat .env); do - echo "$var" >> $GITHUB_ENV + set -e # fail on error + IFS=$'\n' # Set internal field separator to new line + for var in $(echo "${{ secrets.ENV_VARS }}"); do + KEY=$(echo "$var" | cut -d '=' -f 1) + VALUE=$(echo "$var" | cut -d '=' -f 2-) + echo "::add-mask::$VALUE" + echo "$KEY=$VALUE" >> $GITHUB_ENV done - - name: load .env file - run: python -c "from dotenv import load_dotenv; load_dotenv()" + shell: bash - name: load the current Azure subscription details id: subscription_details diff --git a/chat_with_pdf/data/bert-paper-qna-1-line.jsonl b/chat_with_pdf/data/bert-paper-qna-1-line.jsonl index 0993b5867..db56d0143 100644 --- a/chat_with_pdf/data/bert-paper-qna-1-line.jsonl +++ b/chat_with_pdf/data/bert-paper-qna-1-line.jsonl @@ -1 +1 @@ -{"pdf_url":"https://arxiv.org/pdf/1810.04805.pdf","chat_history":[],"question":"What is the name of the new language representation model introduced in the document?","answer":"BERT","context":"We introduce a new language representation model called BERT, which stands for Bidirectional Encoder Representations from Transformers.","config":{"EMBEDDING_MODEL_DEPLOYMENT_NAME":"text-embedding-ada-002","CHAT_MODEL_DEPLOYMENT_NAME":"aoai","PROMPT_TOKEN_LIMIT":2000,"MAX_COMPLETION_TOKENS":256,"CHUNK_SIZE":1024,"CHUNK_OVERLAP":64}} +{"pdf_url":"https://arxiv.org/pdf/1810.04805.pdf","chat_history":[],"question":"What is the name of the new language representation model introduced in the document?","answer":"BERT","context":"We introduce a new language representation model called BERT, which stands for Bidirectional Encoder Representations from Transformers.","config":{"EMBEDDING_MODEL_DEPLOYMENT_NAME":"text-embedding-ada-002","CHAT_MODEL_DEPLOYMENT_NAME":"gpt-35-turbo","PROMPT_TOKEN_LIMIT":2000,"MAX_COMPLETION_TOKENS":256,"CHUNK_SIZE":1024,"CHUNK_OVERLAP":64}} diff --git a/chat_with_pdf/data/bert-paper-qna-3-line.jsonl b/chat_with_pdf/data/bert-paper-qna-3-line.jsonl index 8f7f63e3f..522363968 100644 --- a/chat_with_pdf/data/bert-paper-qna-3-line.jsonl +++ b/chat_with_pdf/data/bert-paper-qna-3-line.jsonl @@ -1,3 +1,3 @@ -{"pdf_url":"https://arxiv.org/pdf/1810.04805.pdf", "chat_history":[], "question": "What is the main difference between BERT and previous language representation models?", "groundtruth": "BERT is designed to pretrain deep bidirectional representations from unlabeled text by jointly conditioning on both left and right context in all layers.", "context": "Unlike recent language representation models (Peters et al., 2018a; Radford et al., 2018), BERT is designed to pretrain deep bidirectional representations from unlabeled text by jointly conditioning on both left and right context in all layers.", "config":{"EMBEDDING_MODEL_DEPLOYMENT_NAME":"text-embedding-ada-002","CHAT_MODEL_DEPLOYMENT_NAME":"aoai","PROMPT_TOKEN_LIMIT":2000,"MAX_COMPLETION_TOKENS":256,"CHUNK_SIZE":1024,"CHUNK_OVERLAP":64}} -{"pdf_url":"https://arxiv.org/pdf/1810.04805.pdf", "chat_history":[], "question": "What is the size of the vocabulary used by BERT?", "groundtruth": "30,000", "context": "We use WordPiece embeddings (Wu et al., 2016) with a 30,000 token vocabulary.", "config":{"EMBEDDING_MODEL_DEPLOYMENT_NAME":"text-embedding-ada-002","CHAT_MODEL_DEPLOYMENT_NAME":"aoai","PROMPT_TOKEN_LIMIT":2000,"MAX_COMPLETION_TOKENS":256,"CHUNK_SIZE":1024,"CHUNK_OVERLAP":64}} -{"pdf_url":"https://grs.pku.edu.cn/docs/2018-03/20180301083100898652.pdf", "chat_history":[], "question": "论文写作中论文引言有什么注意事项?", "groundtruth":"", "context":"", "config":{"EMBEDDING_MODEL_DEPLOYMENT_NAME":"text-embedding-ada-002","CHAT_MODEL_DEPLOYMENT_NAME":"aoai","PROMPT_TOKEN_LIMIT":2000,"MAX_COMPLETION_TOKENS":256,"CHUNK_SIZE":1024,"CHUNK_OVERLAP":64}} \ No newline at end of file +{"pdf_url":"https://arxiv.org/pdf/1810.04805.pdf", "chat_history":[], "question": "What is the main difference between BERT and previous language representation models?", "groundtruth": "BERT is designed to pretrain deep bidirectional representations from unlabeled text by jointly conditioning on both left and right context in all layers.", "context": "Unlike recent language representation models (Peters et al., 2018a; Radford et al., 2018), BERT is designed to pretrain deep bidirectional representations from unlabeled text by jointly conditioning on both left and right context in all layers.", "config":{"EMBEDDING_MODEL_DEPLOYMENT_NAME":"text-embedding-ada-002","CHAT_MODEL_DEPLOYMENT_NAME":"gpt-35-turbo","PROMPT_TOKEN_LIMIT":2000,"MAX_COMPLETION_TOKENS":256,"CHUNK_SIZE":1024,"CHUNK_OVERLAP":64}} +{"pdf_url":"https://arxiv.org/pdf/1810.04805.pdf", "chat_history":[], "question": "What is the size of the vocabulary used by BERT?", "groundtruth": "30,000", "context": "We use WordPiece embeddings (Wu et al., 2016) with a 30,000 token vocabulary.", "config":{"EMBEDDING_MODEL_DEPLOYMENT_NAME":"text-embedding-ada-002","CHAT_MODEL_DEPLOYMENT_NAME":"gpt-35-turbo","PROMPT_TOKEN_LIMIT":2000,"MAX_COMPLETION_TOKENS":256,"CHUNK_SIZE":1024,"CHUNK_OVERLAP":64}} +{"pdf_url":"https://grs.pku.edu.cn/docs/2018-03/20180301083100898652.pdf", "chat_history":[], "question": "论文写作中论文引言有什么注意事项?", "groundtruth":"", "context":"", "config":{"EMBEDDING_MODEL_DEPLOYMENT_NAME":"text-embedding-ada-002","CHAT_MODEL_DEPLOYMENT_NAME":"gpt-35-turbo","PROMPT_TOKEN_LIMIT":2000,"MAX_COMPLETION_TOKENS":256,"CHUNK_SIZE":1024,"CHUNK_OVERLAP":64}} \ No newline at end of file diff --git a/chat_with_pdf/flows/evaluation/flow.dag.yaml b/chat_with_pdf/flows/evaluation/flow.dag.yaml index 703ce0bbf..e7d2d1d48 100644 --- a/chat_with_pdf/flows/evaluation/flow.dag.yaml +++ b/chat_with_pdf/flows/evaluation/flow.dag.yaml @@ -422,7 +422,7 @@ nodes: type: code path: rag_groundedness_prompt.jinja2 inputs: - deployment_name: aoai + deployment_name: gpt-35-turbo temperature: 0 top_p: 1 max_tokens: 1000 @@ -443,7 +443,7 @@ nodes: type: code path: rag_retrieval_prompt.jinja2 inputs: - deployment_name: aoai + deployment_name: gpt-35-turbo temperature: 0 top_p: 1 max_tokens: 1000 @@ -463,7 +463,7 @@ nodes: type: code path: rag_generation_prompt.jinja2 inputs: - deployment_name: aoai + deployment_name: gpt-35-turbo temperature: 0 top_p: 1 max_tokens: 1000 diff --git a/chat_with_pdf/flows/standard/flow.dag.yaml b/chat_with_pdf/flows/standard/flow.dag.yaml index db6d36158..30f609811 100644 --- a/chat_with_pdf/flows/standard/flow.dag.yaml +++ b/chat_with_pdf/flows/standard/flow.dag.yaml @@ -16,7 +16,7 @@ inputs: type: object default: EMBEDDING_MODEL_DEPLOYMENT_NAME: text-embedding-ada-002 - CHAT_MODEL_DEPLOYMENT_NAME: aoai + CHAT_MODEL_DEPLOYMENT_NAME: gpt-35-turbo PROMPT_TOKEN_LIMIT: 3000 MAX_COMPLETION_TOKENS: 1024 VERBOSE: true