diff --git a/package-lock.json b/package-lock.json index 5375184..bf661d6 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1753,6 +1753,14 @@ "@lit-labs/ssr-dom-shim": "^1.2.0" } }, + "node_modules/@microsoft/ai-chat-protocol": { + "version": "1.0.0-alpha.20240418.1", + "resolved": "https://registry.npmjs.org/@microsoft/ai-chat-protocol/-/ai-chat-protocol-1.0.0-alpha.20240418.1.tgz", + "integrity": "sha512-ZuwVnk1StYUbAJT8sa15dTtA+rn9uLLGYdi2vZQhhxOcy9uwgncROW52O1CjZ9l7gd+tsmpskm+ZIIXDjinJsQ==", + "dependencies": { + "@typespec/ts-http-runtime": "^1.0.0-alpha.20240416.4" + } + }, "node_modules/@mongodb-js/saslprep": { "version": "1.1.5", "resolved": "https://registry.npmjs.org/@mongodb-js/saslprep/-/saslprep-1.1.5.tgz", @@ -2375,6 +2383,19 @@ "url": "https://opencollective.com/typescript-eslint" } }, + "node_modules/@typespec/ts-http-runtime": { + "version": "1.0.0-alpha.20240429.3", + "resolved": "https://registry.npmjs.org/@typespec/ts-http-runtime/-/ts-http-runtime-1.0.0-alpha.20240429.3.tgz", + "integrity": "sha512-QDWFa9MSIGTSvvLHmmEzSYDV9byI2Jr7EQ/SkKEFQxN0Ogw4CIfUQq5ImaKLfnEXLvhMUUx7Wi1+ocymQ3Z3IQ==", + "dependencies": { + "http-proxy-agent": "^7.0.0", + "https-proxy-agent": "^7.0.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=18.0.0" + } + }, "node_modules/@ungap/structured-clone": { "version": "1.2.0", "resolved": "https://registry.npmjs.org/@ungap/structured-clone/-/structured-clone-1.2.0.tgz", @@ -11051,6 +11072,7 @@ "@langchain/azure-openai": "^0.0.8", "@langchain/community": "^0.0.53", "@langchain/core": "^0.1.61", + "@microsoft/ai-chat-protocol": "^1.0.0-alpha.20240418.1", "dotenv": "^16.4.5", "faiss-node": "^0.5.1", "langchain": "^0.1.36", @@ -11066,6 +11088,7 @@ "version": "1.0.0", "license": "MIT", "dependencies": { + "@microsoft/ai-chat-protocol": "^1.0.0-alpha.20240418.1", "lit": "^3.0.0" }, "devDependencies": { diff --git a/packages/api/api.http b/packages/api/api.http index 6266c28..ae91597 100644 --- a/packages/api/api.http +++ b/packages/api/api.http @@ -21,7 +21,7 @@ Content-Type: application/pdf GET {{api_host}}/api/documents/support.pdf ### Chat with the bot -POST {{api_host}}/api/chat +POST {{api_host}}/api/chat/stream Content-Type: application/json { @@ -31,5 +31,4 @@ Content-Type: application/json "role": "user" } ], - "stream": true } diff --git a/packages/api/package.json b/packages/api/package.json index dde7901..b553b6b 100644 --- a/packages/api/package.json +++ b/packages/api/package.json @@ -21,6 +21,7 @@ "@langchain/azure-openai": "^0.0.8", "@langchain/community": "^0.0.53", "@langchain/core": "^0.1.61", + "@microsoft/ai-chat-protocol": "^1.0.0-alpha.20240418.1", "dotenv": "^16.4.5", "faiss-node": "^0.5.1", "langchain": "^0.1.36", diff --git a/packages/api/src/functions/chat-post.ts b/packages/api/src/functions/chat-post.ts index efec393..787704a 100644 --- a/packages/api/src/functions/chat-post.ts +++ b/packages/api/src/functions/chat-post.ts @@ -1,6 +1,7 @@ import { Readable } from 'node:stream'; -import { Document } from '@langchain/core/documents'; import { HttpRequest, InvocationContext, HttpResponseInit, app } from '@azure/functions'; +import { AIChatCompletionRequest, AIChatCompletionDelta } from '@microsoft/ai-chat-protocol'; +import { Document } from '@langchain/core/documents'; import { AzureOpenAIEmbeddings, AzureChatOpenAI } from '@langchain/azure-openai'; import { Embeddings } from '@langchain/core/embeddings'; import { BaseChatModel } from '@langchain/core/language_models/chat_models'; @@ -15,7 +16,6 @@ import { createRetrievalChain } from 'langchain/chains/retrieval'; import 'dotenv/config'; import { badRequest, data, serviceUnavailable } from '../http-response'; import { ollamaChatModel, ollamaEmbeddingsModel, faissStoreFolder } from '../constants'; -import { ChatRequest, ChatResponseChunk } from '../models'; import { getCredentials } from '../security'; const systemPrompt = `Assistant helps the Consto Real Estate company customers with questions and support requests. Be brief in your answers. Format the answer in plain text. @@ -40,17 +40,13 @@ export async function postChat(request: HttpRequest, context: InvocationContext) const azureOpenAiEndpoint = process.env.AZURE_OPENAI_API_ENDPOINT; try { - const requestBody = (await request.json()) as ChatRequest; - const { messages, stream } = requestBody; + const requestBody = (await request.json()) as AIChatCompletionRequest; + const { messages } = requestBody; if (!messages || messages.length === 0 || !messages.at(-1)?.content) { return badRequest('Invalid or missing messages in the request body'); } - if (!stream) { - return badRequest('Only stream mode is supported'); - } - let embeddings: Embeddings; let model: BaseChatModel; let store: VectorStore; @@ -119,16 +115,11 @@ function createStream(chunks: AsyncIterable<{ context: Document[]; answer: strin for await (const chunk of chunks) { if (!chunk.answer) continue; - const responseChunk: ChatResponseChunk = { - choices: [ - { - index: 0, - delta: { - content: chunk.answer, - role: 'assistant', - }, - }, - ], + const responseChunk: AIChatCompletionDelta = { + delta: { + content: chunk.answer, + role: 'assistant', + }, }; // Format response chunks in Newline delimited JSON @@ -146,7 +137,7 @@ function createStream(chunks: AsyncIterable<{ context: Document[]; answer: strin app.setup({ enableHttpStream: true }); app.http('chat-post', { - route: 'chat', + route: 'chat/stream', methods: ['POST'], authLevel: 'anonymous', handler: postChat, diff --git a/packages/api/src/models.ts b/packages/api/src/models.ts deleted file mode 100644 index 83ecf96..0000000 --- a/packages/api/src/models.ts +++ /dev/null @@ -1,16 +0,0 @@ -export type Message = { - content: string; - role: string; -}; - -export type ChatRequest = { - messages: Message[]; - stream: boolean; -}; - -export type ChatResponseChunk = { - choices: Array<{ - index: number; - delta: Message; - }>; -}; diff --git a/packages/webapp/package.json b/packages/webapp/package.json index 1c7730d..0f4c29d 100644 --- a/packages/webapp/package.json +++ b/packages/webapp/package.json @@ -14,11 +14,14 @@ "author": "Microsoft", "license": "MIT", "dependencies": { + "@microsoft/ai-chat-protocol": "^1.0.0-alpha.20240418.1", "lit": "^3.0.0" }, "devDependencies": { "lit-analyzer": "^2.0.1", "vite": "^5.0.12" }, - "files": ["dist"] + "files": [ + "dist" + ] } diff --git a/packages/webapp/src/api.ts b/packages/webapp/src/api.ts index 330a2a6..33df036 100644 --- a/packages/webapp/src/api.ts +++ b/packages/webapp/src/api.ts @@ -1,78 +1,33 @@ -import { type ChatResponse, type ChatRequestOptions, type ChatResponseChunk } from './models.js'; +import { AIChatMessage, AIChatCompletionDelta, AIChatProtocolClient } from '@microsoft/ai-chat-protocol'; export const apiBaseUrl: string = import.meta.env.VITE_API_URL || ''; -export async function getCompletion(options: ChatRequestOptions) { +export type ChatRequestOptions = { + messages: AIChatMessage[]; + chunkIntervalMs: number; + apiUrl: string; +}; + +export async function* getCompletion(options: ChatRequestOptions) { const apiUrl = options.apiUrl || apiBaseUrl; - const response = await fetch(`${apiUrl}/api/chat`, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ - messages: options.messages, - stream: options.stream, - }), + const client = new AIChatProtocolClient(`${apiUrl}/api/chat`, { + allowInsecureConnection: true, }); + const result = await client.getStreamedCompletion(options.messages); - if (response.status > 299 || !response.ok) { - let json: ChatResponse | undefined; - try { - json = await response.json(); - } catch {} - - const error = json?.error ?? response.statusText; - throw new Error(error); - } + for await (const response of result) { + if (!response.delta) { + continue; + } - if (options.stream) { - return getChunksFromResponse(response, options.chunkIntervalMs); + yield new Promise((resolve) => { + setTimeout(() => { + resolve(response); + }, options.chunkIntervalMs); + }); } - - return response.json(); } export function getCitationUrl(citation: string): string { return `${apiBaseUrl}/api/documents/${citation}`; } - -export class NdJsonParserStream extends TransformStream { - private buffer = ''; - constructor() { - let controller: TransformStreamDefaultController; - super({ - start(_controller) { - controller = _controller; - }, - transform: (chunk) => { - const jsonChunks = chunk.split('\n').filter(Boolean); - for (const jsonChunk of jsonChunks) { - try { - this.buffer += jsonChunk; - controller.enqueue(JSON.parse(this.buffer)); - this.buffer = ''; - } catch { - // Invalid JSON, wait for next chunk - } - } - }, - }); - } -} - -export async function* getChunksFromResponse(response: Response, intervalMs: number): AsyncGenerator { - const reader = response.body?.pipeThrough(new TextDecoderStream()).pipeThrough(new NdJsonParserStream()).getReader(); - if (!reader) { - throw new Error('No response body or body is not readable'); - } - - let value: JSON | undefined; - let done: boolean; - // eslint-disable-next-line no-await-in-loop - while ((({ value, done } = await reader.read()), !done)) { - const chunk = value as T; - yield new Promise((resolve) => { - setTimeout(() => { - resolve(chunk); - }, intervalMs); - }); - } -} diff --git a/packages/webapp/src/components/chat.ts b/packages/webapp/src/components/chat.ts index 5e1b9e6..750e24c 100644 --- a/packages/webapp/src/components/chat.ts +++ b/packages/webapp/src/components/chat.ts @@ -3,8 +3,8 @@ import { map } from 'lit/directives/map.js'; import { repeat } from 'lit/directives/repeat.js'; import { unsafeSVG } from 'lit/directives/unsafe-svg.js'; import { customElement, property, state, query } from 'lit/decorators.js'; -import { type ChatRequestOptions, type ChatResponse, type ChatMessage, type ChatResponseChunk } from '../models.js'; -import { getCitationUrl, getCompletion } from '../api.js'; +import { AIChatCompletionDelta, AIChatMessage } from '@microsoft/ai-chat-protocol'; +import { type ChatRequestOptions, getCitationUrl, getCompletion } from '../api.js'; import { type ParsedMessage, parseMessageIntoHtml } from '../message-parser.js'; import sendSvg from '../../assets/send.svg?raw'; import questionSvg from '../../assets/question.svg?raw'; @@ -35,7 +35,6 @@ export type ChatComponentOptions = ChatRequestOptions & { }; export const defaultOptions: ChatComponentOptions = { - stream: true, chunkIntervalMs: 30, apiUrl: '', enablePromptSuggestions: true, @@ -78,7 +77,7 @@ export class ChatComponent extends LitElement { options: ChatComponentOptions = defaultOptions; @property() question = ''; - @property({ type: Array }) messages: ChatMessage[] = []; + @property({ type: Array }) messages: AIChatMessage[] = []; @state() protected hasError = false; @state() protected isLoading = false; @state() protected isStreaming = false; @@ -122,26 +121,20 @@ export class ChatComponent extends LitElement { this.isLoading = true; this.scrollToLastMessage(); try { - const response = await getCompletion({ ...this.options, messages: this.messages }); - if (this.options.stream) { - const chunks = response as AsyncGenerator; - const { messages } = this; - const message: ChatMessage = { - content: '', - role: 'assistant', - }; - for await (const chunk of chunks) { - if (chunk.choices[0].delta.content) { - this.isStreaming = true; - message.content += chunk.choices[0].delta.content; - this.messages = [...messages, message]; - this.scrollToLastMessage(); - } + const response = getCompletion({ ...this.options, messages: this.messages }); + const chunks = response as AsyncGenerator; + const { messages } = this; + const message: AIChatMessage = { + content: '', + role: 'assistant', + }; + for await (const chunk of chunks) { + if (chunk.delta.content) { + this.isStreaming = true; + message.content += chunk.delta.content; + this.messages = [...messages, message]; + this.scrollToLastMessage(); } - } else { - const chatResponse = response as ChatResponse; - this.messages = [...this.messages, chatResponse.choices[0].message]; - this.scrollToLastMessage(); } this.isLoading = false; diff --git a/packages/webapp/src/index.ts b/packages/webapp/src/index.ts index 186b4d4..dbf60cf 100644 --- a/packages/webapp/src/index.ts +++ b/packages/webapp/src/index.ts @@ -1,4 +1,3 @@ export * from './api.js'; export * from './components/chat.js'; export * from './message-parser.js'; -export * from './models.js'; diff --git a/packages/webapp/src/message-parser.ts b/packages/webapp/src/message-parser.ts index 2fa069d..ffedd77 100644 --- a/packages/webapp/src/message-parser.ts +++ b/packages/webapp/src/message-parser.ts @@ -1,16 +1,16 @@ import { type HTMLTemplateResult, html, nothing } from 'lit'; -import { type ChatMessage, type ChatMessageContext } from './models.js'; +import { AIChatMessage } from '@microsoft/ai-chat-protocol'; export type ParsedMessage = { html: HTMLTemplateResult; citations: string[]; followupQuestions: string[]; role: string; - context?: ChatMessageContext; + context?: Record; }; export function parseMessageIntoHtml( - message: ChatMessage, + message: AIChatMessage, renderCitationReference: (citation: string, index: number) => HTMLTemplateResult, ): ParsedMessage { if (message.role === 'user') { diff --git a/packages/webapp/src/models.ts b/packages/webapp/src/models.ts deleted file mode 100644 index 204d826..0000000 --- a/packages/webapp/src/models.ts +++ /dev/null @@ -1,33 +0,0 @@ -export type Message = { - content: string; - role: string; -}; - -export type ChatMessageContext = Record; - -export type ChatMessage = Message & { - context?: ChatMessageContext; -}; - -export type ChatResponse = { - choices: Array<{ - index: number; - message: ChatMessage; - }>; - error?: string; -}; - -export type ChatResponseChunk = { - choices: Array<{ - index: number; - delta: Partial; - }>; - error?: string; -}; - -export type ChatRequestOptions = { - messages: Message[]; - stream: boolean; - chunkIntervalMs: number; - apiUrl: string; -};