From ba95c9221ae63be7ae1d7c83103f296a7843c73b Mon Sep 17 00:00:00 2001 From: sinedied Date: Mon, 8 Apr 2024 18:08:14 +0200 Subject: [PATCH 1/4] docs: add comments --- packages/api/src/functions/chat-post.ts | 5 +++++ packages/api/src/functions/documents-get.ts | 1 + packages/api/src/functions/documents-post.ts | 4 ++++ scripts/upload-documents.js | 7 +++++++ 4 files changed, 17 insertions(+) diff --git a/packages/api/src/functions/chat-post.ts b/packages/api/src/functions/chat-post.ts index 59c8adc..ee8b776 100644 --- a/packages/api/src/functions/chat-post.ts +++ b/packages/api/src/functions/chat-post.ts @@ -55,6 +55,7 @@ export async function postChat(request: HttpRequest, context: InvocationContext) let store: VectorStore; if (azureOpenAiEndpoint) { + // Initialize models and vector database embeddings = new AzureOpenAIEmbeddings(); model = new AzureChatOpenAI(); store = new AzureCosmosDBVectorStore(embeddings, {}); @@ -66,6 +67,7 @@ export async function postChat(request: HttpRequest, context: InvocationContext) store = await FaissStore.load(faissStoreFolder, embeddings); } + // Create the chain that combines the prompt with the documents const combineDocsChain = await createStuffDocumentsChain({ llm: model, prompt: ChatPromptTemplate.fromMessages([ @@ -74,6 +76,8 @@ export async function postChat(request: HttpRequest, context: InvocationContext) ]), documentPrompt: PromptTemplate.fromTemplate('{filename}: {page_content}\n'), }); + + // Create the chain to retrieve the documents from the database const chain = await createRetrievalChain({ retriever: store.asRetriever(), combineDocsChain, @@ -96,6 +100,7 @@ export async function postChat(request: HttpRequest, context: InvocationContext) } } +// Transform the response chunks into a JSON stream function createStream(chunks: AsyncIterable<{ context: Document[]; answer: string }>) { const buffer = new Readable({ read() {}, diff --git a/packages/api/src/functions/documents-get.ts b/packages/api/src/functions/documents-get.ts index 4f4fded..0683f4f 100644 --- a/packages/api/src/functions/documents-get.ts +++ b/packages/api/src/functions/documents-get.ts @@ -16,6 +16,7 @@ async function getDocument(request: HttpRequest, context: InvocationContext): Pr let fileData: Uint8Array; if (connectionString && containerName) { + // Retrieve the file from Azure Blob Storage context.log(`Reading blob from: "${containerName}/${fileName}"`); const blobServiceClient = BlobServiceClient.fromConnectionString(connectionString); const containerClient = blobServiceClient.getContainerClient(containerName); diff --git a/packages/api/src/functions/documents-post.ts b/packages/api/src/functions/documents-post.ts index be8b851..e7787c9 100644 --- a/packages/api/src/functions/documents-post.ts +++ b/packages/api/src/functions/documents-post.ts @@ -25,18 +25,21 @@ export async function postDocuments(request: HttpRequest, context: InvocationCon const file = parsedForm.get('file') as File; const filename = file.name; + // Extract text from the PDF const loader = new PDFLoader(file, { splitPages: false, }); const rawDocument = await loader.load(); rawDocument[0].metadata.filename = filename; + // Split the text into smaller chunks const splitter = new RecursiveCharacterTextSplitter({ chunkSize: 1500, chunkOverlap: 100, }); const documents = await splitter.splitDocuments(rawDocument); + // Generate embeddings and save in database if (azureOpenAiEndpoint) { const store = await AzureCosmosDBVectorStore.fromDocuments(documents, new AzureOpenAIEmbeddings(), {}); await store.createIndex(); @@ -50,6 +53,7 @@ export async function postDocuments(request: HttpRequest, context: InvocationCon } if (connectionString && containerName) { + // Upload the PDF file to Azure Blob Storage context.log(`Uploading file to blob storage: "${containerName}/${filename}"`); const blobServiceClient = BlobServiceClient.fromConnectionString(connectionString); const containerClient = blobServiceClient.getContainerClient(containerName); diff --git a/scripts/upload-documents.js b/scripts/upload-documents.js index 0930a78..4beee66 100644 --- a/scripts/upload-documents.js +++ b/scripts/upload-documents.js @@ -1,6 +1,13 @@ import fs from 'node:fs/promises'; import path from 'node:path'; +// This script uploads all PDF files from the 'data' folder to the ingestion API. +// It does a cross-platform equivalent of this bash script: +// ``` +// for file in data/*.pdf; do +// curl -X POST -F "file=@$file" /api/documents +// done +// ``` async function uploadDocuments(apiUrl, dataFolder) { try { const files = await fs.readdir(dataFolder); From e4f81ef6828729d664b1ac43131c7200583ec530 Mon Sep 17 00:00:00 2001 From: sinedied Date: Mon, 8 Apr 2024 18:49:52 +0200 Subject: [PATCH 2/4] docs: add mistral badge --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index ed197e0..66b93bc 100644 --- a/README.md +++ b/README.md @@ -7,8 +7,9 @@ [![Open project in GitHub Codespaces](https://img.shields.io/badge/Codespaces-Open-blue?style=flat-square&logo=github)](https://codespaces.new/Azure-Samples/serverless-chat-langchainjs?hide_repo_select=true&ref=main) ![Node version](https://img.shields.io/badge/Node.js->=20-grass?style=flat-square) +[![Ollama + Mistral](https://img.shields.io/badge/Ollama-Mistral-ff7000?style=flat-square)](https://ollama.com/library/mistral) [![TypeScript](https://img.shields.io/badge/TypeScript-blue?style=flat-square&logo=typescript&logoColor=white)](https://www.typescriptlang.org) -[![License](https://img.shields.io/badge/License-MIT-orange?style=flat-square)](LICENSE) +[![License](https://img.shields.io/badge/License-MIT-yellow?style=flat-square)](LICENSE) From fe72682475d5dd2288cac57bf138ce2c06006007 Mon Sep 17 00:00:00 2001 From: sinedied Date: Mon, 8 Apr 2024 18:57:37 +0200 Subject: [PATCH 3/4] docs: add build badge --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 66b93bc..d32e381 100644 --- a/README.md +++ b/README.md @@ -6,12 +6,12 @@ # Serverless ChatGPT with RAG using LangChain.js [![Open project in GitHub Codespaces](https://img.shields.io/badge/Codespaces-Open-blue?style=flat-square&logo=github)](https://codespaces.new/Azure-Samples/serverless-chat-langchainjs?hide_repo_select=true&ref=main) -![Node version](https://img.shields.io/badge/Node.js->=20-grass?style=flat-square) +[![Build Status](https://img.shields.io/github/actions/workflow/status/Azure-Samples/serverless-chat-langchainjs/build-test.yaml?style=flat-square&label=Build)](https://github.com/Azure-Samples/serverless-chat-langchainjs/actions) +![Node version](https://img.shields.io/badge/Node.js->=20-3c873a?style=flat-square) [![Ollama + Mistral](https://img.shields.io/badge/Ollama-Mistral-ff7000?style=flat-square)](https://ollama.com/library/mistral) [![TypeScript](https://img.shields.io/badge/TypeScript-blue?style=flat-square&logo=typescript&logoColor=white)](https://www.typescriptlang.org) [![License](https://img.shields.io/badge/License-MIT-yellow?style=flat-square)](LICENSE) - :star: If you like this sample, star it on GitHub — it helps a lot! From 8f36c72fcc9e38255dc00e9c5df2e04abb27da23 Mon Sep 17 00:00:00 2001 From: sinedied Date: Mon, 8 Apr 2024 19:04:56 +0200 Subject: [PATCH 4/4] docs: update comment --- scripts/upload-documents.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/upload-documents.js b/scripts/upload-documents.js index 4beee66..706ebc8 100644 --- a/scripts/upload-documents.js +++ b/scripts/upload-documents.js @@ -2,7 +2,7 @@ import fs from 'node:fs/promises'; import path from 'node:path'; // This script uploads all PDF files from the 'data' folder to the ingestion API. -// It does a cross-platform equivalent of this bash script: +// It does a Node.js equivalent of this bash script: // ``` // for file in data/*.pdf; do // curl -X POST -F "file=@$file" /api/documents