diff --git a/README.md b/README.md index ed197e0..d32e381 100644 --- a/README.md +++ b/README.md @@ -6,11 +6,12 @@ # Serverless ChatGPT with RAG using LangChain.js [![Open project in GitHub Codespaces](https://img.shields.io/badge/Codespaces-Open-blue?style=flat-square&logo=github)](https://codespaces.new/Azure-Samples/serverless-chat-langchainjs?hide_repo_select=true&ref=main) -![Node version](https://img.shields.io/badge/Node.js->=20-grass?style=flat-square) +[![Build Status](https://img.shields.io/github/actions/workflow/status/Azure-Samples/serverless-chat-langchainjs/build-test.yaml?style=flat-square&label=Build)](https://github.com/Azure-Samples/serverless-chat-langchainjs/actions) +![Node version](https://img.shields.io/badge/Node.js->=20-3c873a?style=flat-square) +[![Ollama + Mistral](https://img.shields.io/badge/Ollama-Mistral-ff7000?style=flat-square)](https://ollama.com/library/mistral) [![TypeScript](https://img.shields.io/badge/TypeScript-blue?style=flat-square&logo=typescript&logoColor=white)](https://www.typescriptlang.org) -[![License](https://img.shields.io/badge/License-MIT-orange?style=flat-square)](LICENSE) +[![License](https://img.shields.io/badge/License-MIT-yellow?style=flat-square)](LICENSE) - :star: If you like this sample, star it on GitHub — it helps a lot! diff --git a/packages/api/src/functions/chat-post.ts b/packages/api/src/functions/chat-post.ts index 59c8adc..ee8b776 100644 --- a/packages/api/src/functions/chat-post.ts +++ b/packages/api/src/functions/chat-post.ts @@ -55,6 +55,7 @@ export async function postChat(request: HttpRequest, context: InvocationContext) let store: VectorStore; if (azureOpenAiEndpoint) { + // Initialize models and vector database embeddings = new AzureOpenAIEmbeddings(); model = new AzureChatOpenAI(); store = new AzureCosmosDBVectorStore(embeddings, {}); @@ -66,6 +67,7 @@ export async function postChat(request: HttpRequest, context: InvocationContext) store = await FaissStore.load(faissStoreFolder, embeddings); } + // Create the chain that combines the prompt with the documents const combineDocsChain = await createStuffDocumentsChain({ llm: model, prompt: ChatPromptTemplate.fromMessages([ @@ -74,6 +76,8 @@ export async function postChat(request: HttpRequest, context: InvocationContext) ]), documentPrompt: PromptTemplate.fromTemplate('{filename}: {page_content}\n'), }); + + // Create the chain to retrieve the documents from the database const chain = await createRetrievalChain({ retriever: store.asRetriever(), combineDocsChain, @@ -96,6 +100,7 @@ export async function postChat(request: HttpRequest, context: InvocationContext) } } +// Transform the response chunks into a JSON stream function createStream(chunks: AsyncIterable<{ context: Document[]; answer: string }>) { const buffer = new Readable({ read() {}, diff --git a/packages/api/src/functions/documents-get.ts b/packages/api/src/functions/documents-get.ts index 4f4fded..0683f4f 100644 --- a/packages/api/src/functions/documents-get.ts +++ b/packages/api/src/functions/documents-get.ts @@ -16,6 +16,7 @@ async function getDocument(request: HttpRequest, context: InvocationContext): Pr let fileData: Uint8Array; if (connectionString && containerName) { + // Retrieve the file from Azure Blob Storage context.log(`Reading blob from: "${containerName}/${fileName}"`); const blobServiceClient = BlobServiceClient.fromConnectionString(connectionString); const containerClient = blobServiceClient.getContainerClient(containerName); diff --git a/packages/api/src/functions/documents-post.ts b/packages/api/src/functions/documents-post.ts index be8b851..e7787c9 100644 --- a/packages/api/src/functions/documents-post.ts +++ b/packages/api/src/functions/documents-post.ts @@ -25,18 +25,21 @@ export async function postDocuments(request: HttpRequest, context: InvocationCon const file = parsedForm.get('file') as File; const filename = file.name; + // Extract text from the PDF const loader = new PDFLoader(file, { splitPages: false, }); const rawDocument = await loader.load(); rawDocument[0].metadata.filename = filename; + // Split the text into smaller chunks const splitter = new RecursiveCharacterTextSplitter({ chunkSize: 1500, chunkOverlap: 100, }); const documents = await splitter.splitDocuments(rawDocument); + // Generate embeddings and save in database if (azureOpenAiEndpoint) { const store = await AzureCosmosDBVectorStore.fromDocuments(documents, new AzureOpenAIEmbeddings(), {}); await store.createIndex(); @@ -50,6 +53,7 @@ export async function postDocuments(request: HttpRequest, context: InvocationCon } if (connectionString && containerName) { + // Upload the PDF file to Azure Blob Storage context.log(`Uploading file to blob storage: "${containerName}/${filename}"`); const blobServiceClient = BlobServiceClient.fromConnectionString(connectionString); const containerClient = blobServiceClient.getContainerClient(containerName); diff --git a/scripts/upload-documents.js b/scripts/upload-documents.js index 0930a78..706ebc8 100644 --- a/scripts/upload-documents.js +++ b/scripts/upload-documents.js @@ -1,6 +1,13 @@ import fs from 'node:fs/promises'; import path from 'node:path'; +// This script uploads all PDF files from the 'data' folder to the ingestion API. +// It does a Node.js equivalent of this bash script: +// ``` +// for file in data/*.pdf; do +// curl -X POST -F "file=@$file" /api/documents +// done +// ``` async function uploadDocuments(apiUrl, dataFolder) { try { const files = await fs.readdir(dataFolder);