Azure-Samples · glaucia86 · May 3, 2024 · Apr 30, 2024
diff --git a/package-lock.json b/package-lock.json
diff --git a/packages/api/api.http b/packages/api/api.http
@@ -21,7 +21,7 @@ Content-Type: application/pdf
 GET {{api_host}}/api/documents/support.pdf
 
 ### Chat with the bot
-POST {{api_host}}/api/chat
+POST {{api_host}}/api/chat/stream
 Content-Type: application/json
 
 {
@@ -31,5 +31,4 @@ Content-Type: application/json
       "role": "user"
     }
   ],
-  "stream": true
 }
diff --git a/packages/api/package.json b/packages/api/package.json
@@ -21,6 +21,7 @@
     "@langchain/azure-openai": "^0.0.8",
     "@langchain/community": "^0.0.53",
     "@langchain/core": "^0.1.61",
+    "@microsoft/ai-chat-protocol": "^1.0.0-alpha.20240418.1",
     "dotenv": "^16.4.5",
     "faiss-node": "^0.5.1",
     "langchain": "^0.1.36",

diff --git a/packages/api/src/functions/chat-post.ts b/packages/api/src/functions/chat-post.ts
@@ -1,6 +1,7 @@
 import { Readable } from 'node:stream';
-import { Document } from '@langchain/core/documents';
 import { HttpRequest, InvocationContext, HttpResponseInit, app } from '@azure/functions';
+import { AIChatCompletionRequest, AIChatCompletionDelta } from '@microsoft/ai-chat-protocol';
+import { Document } from '@langchain/core/documents';
 import { AzureOpenAIEmbeddings, AzureChatOpenAI } from '@langchain/azure-openai';
 import { Embeddings } from '@langchain/core/embeddings';
 import { BaseChatModel } from '@langchain/core/language_models/chat_models';
@@ -15,7 +16,6 @@ import { createRetrievalChain } from 'langchain/chains/retrieval';
 import 'dotenv/config';
 import { badRequest, data, serviceUnavailable } from '../http-response';
 import { ollamaChatModel, ollamaEmbeddingsModel, faissStoreFolder } from '../constants';
-import { ChatRequest, ChatResponseChunk } from '../models';
 import { getCredentials } from '../security';
 
 const systemPrompt = `Assistant helps the Consto Real Estate company customers with questions and support requests. Be brief in your answers. Format the answer in plain text.
@@ -40,17 +40,13 @@ export async function postChat(request: HttpRequest, context: InvocationContext)
   const azureOpenAiEndpoint = process.env.AZURE_OPENAI_API_ENDPOINT;
 
   try {
-    const requestBody = (await request.json()) as ChatRequest;
-    const { messages, stream } = requestBody;
+    const requestBody = (await request.json()) as AIChatCompletionRequest;
+    const { messages } = requestBody;
 
     if (!messages || messages.length === 0 || !messages.at(-1)?.content) {
       return badRequest('Invalid or missing messages in the request body');
     }
 
-    if (!stream) {
-      return badRequest('Only stream mode is supported');
-    }
-
     let embeddings: Embeddings;
     let model: BaseChatModel;
     let store: VectorStore;
@@ -119,16 +115,11 @@ function createStream(chunks: AsyncIterable<{ context: Document[]; answer: strin
     for await (const chunk of chunks) {
       if (!chunk.answer) continue;
 
-      const responseChunk: ChatResponseChunk = {
-        choices: [
-          {
-            index: 0,
-            delta: {
-              content: chunk.answer,
-              role: 'assistant',
-            },
-          },
-        ],
+      const responseChunk: AIChatCompletionDelta = {
+        delta: {
+          content: chunk.answer,
+          role: 'assistant',
+        },
       };
 
       // Format response chunks in Newline delimited JSON
@@ -146,7 +137,7 @@ function createStream(chunks: AsyncIterable<{ context: Document[]; answer: strin
 
 app.setup({ enableHttpStream: true });
 app.http('chat-post', {
-  route: 'chat',
+  route: 'chat/stream',
   methods: ['POST'],
   authLevel: 'anonymous',
   handler: postChat,

diff --git a/packages/api/src/models.ts b/packages/api/src/models.ts
diff --git a/packages/webapp/package.json b/packages/webapp/package.json
@@ -14,11 +14,14 @@
   "author": "Microsoft",
   "license": "MIT",
   "dependencies": {
+    "@microsoft/ai-chat-protocol": "^1.0.0-alpha.20240418.1",
     "lit": "^3.0.0"
   },
   "devDependencies": {
     "lit-analyzer": "^2.0.1",
     "vite": "^5.0.12"
   },
-  "files": ["dist"]
+  "files": [
+    "dist"
+  ]
 }
diff --git a/packages/webapp/src/api.ts b/packages/webapp/src/api.ts
@@ -1,78 +1,33 @@
-import { type ChatResponse, type ChatRequestOptions, type ChatResponseChunk } from './models.js';
+import { AIChatMessage, AIChatCompletionDelta, AIChatProtocolClient } from '@microsoft/ai-chat-protocol';
 
 export const apiBaseUrl: string = import.meta.env.VITE_API_URL || '';
 
-export async function getCompletion(options: ChatRequestOptions) {
+export type ChatRequestOptions = {
+  messages: AIChatMessage[];
+  chunkIntervalMs: number;
+  apiUrl: string;
+};
+
+export async function* getCompletion(options: ChatRequestOptions) {
   const apiUrl = options.apiUrl || apiBaseUrl;
-  const response = await fetch(`${apiUrl}/api/chat`, {
-    method: 'POST',
-    headers: { 'Content-Type': 'application/json' },
-    body: JSON.stringify({
-      messages: options.messages,
-      stream: options.stream,
-    }),
+  const client = new AIChatProtocolClient(`${apiUrl}/api/chat`, {
+    allowInsecureConnection: true,
   });
+  const result = await client.getStreamedCompletion(options.messages);
 
-  if (response.status > 299 || !response.ok) {
-    let json: ChatResponse | undefined;
-    try {
-      json = await response.json();
-    } catch {}
-
-    const error = json?.error ?? response.statusText;
-    throw new Error(error);
-  }
+  for await (const response of result) {
+    if (!response.delta) {
+      continue;
+    }
 
-  if (options.stream) {
-    return getChunksFromResponse<ChatResponseChunk>(response, options.chunkIntervalMs);
+    yield new Promise<AIChatCompletionDelta>((resolve) => {
+      setTimeout(() => {
+        resolve(response);
+      }, options.chunkIntervalMs);
+    });
   }
-
-  return response.json();
 }
 
 export function getCitationUrl(citation: string): string {
   return `${apiBaseUrl}/api/documents/${citation}`;
 }
-
-export class NdJsonParserStream extends TransformStream<string, JSON> {
-  private buffer = '';
-  constructor() {
-    let controller: TransformStreamDefaultController<JSON>;
-    super({
-      start(_controller) {
-        controller = _controller;
-      },
-      transform: (chunk) => {
-        const jsonChunks = chunk.split('\n').filter(Boolean);
-        for (const jsonChunk of jsonChunks) {
-          try {
-            this.buffer += jsonChunk;
-            controller.enqueue(JSON.parse(this.buffer));
-            this.buffer = '';
-          } catch {
-            // Invalid JSON, wait for next chunk
-          }
-        }
-      },
-    });
-  }
-}
-
-export async function* getChunksFromResponse<T>(response: Response, intervalMs: number): AsyncGenerator<T, void> {
-  const reader = response.body?.pipeThrough(new TextDecoderStream()).pipeThrough(new NdJsonParserStream()).getReader();
-  if (!reader) {
-    throw new Error('No response body or body is not readable');
-  }
-
-  let value: JSON | undefined;
-  let done: boolean;
-  // eslint-disable-next-line no-await-in-loop
-  while ((({ value, done } = await reader.read()), !done)) {
-    const chunk = value as T;
-    yield new Promise<T>((resolve) => {
-      setTimeout(() => {
-        resolve(chunk);
-      }, intervalMs);
-    });
-  }
-}
diff --git a/packages/webapp/src/components/chat.ts b/packages/webapp/src/components/chat.ts
@@ -3,8 +3,8 @@ import { map } from 'lit/directives/map.js';
 import { repeat } from 'lit/directives/repeat.js';
 import { unsafeSVG } from 'lit/directives/unsafe-svg.js';
 import { customElement, property, state, query } from 'lit/decorators.js';
-import { type ChatRequestOptions, type ChatResponse, type ChatMessage, type ChatResponseChunk } from '../models.js';
-import { getCitationUrl, getCompletion } from '../api.js';
+import { AIChatCompletionDelta, AIChatMessage } from '@microsoft/ai-chat-protocol';
+import { type ChatRequestOptions, getCitationUrl, getCompletion } from '../api.js';
 import { type ParsedMessage, parseMessageIntoHtml } from '../message-parser.js';
 import sendSvg from '../../assets/send.svg?raw';
 import questionSvg from '../../assets/question.svg?raw';
@@ -35,7 +35,6 @@ export type ChatComponentOptions = ChatRequestOptions & {
 };
 
 export const defaultOptions: ChatComponentOptions = {
-  stream: true,
   chunkIntervalMs: 30,
   apiUrl: '',
   enablePromptSuggestions: true,
@@ -78,7 +77,7 @@ export class ChatComponent extends LitElement {
   options: ChatComponentOptions = defaultOptions;
 
   @property() question = '';
-  @property({ type: Array }) messages: ChatMessage[] = [];
+  @property({ type: Array }) messages: AIChatMessage[] = [];
   @state() protected hasError = false;
   @state() protected isLoading = false;
   @state() protected isStreaming = false;
@@ -122,26 +121,20 @@ export class ChatComponent extends LitElement {
     this.isLoading = true;
     this.scrollToLastMessage();
     try {
-      const response = await getCompletion({ ...this.options, messages: this.messages });
-      if (this.options.stream) {
-        const chunks = response as AsyncGenerator<ChatResponseChunk>;
-        const { messages } = this;
-        const message: ChatMessage = {
-          content: '',
-          role: 'assistant',
-        };
-        for await (const chunk of chunks) {
-          if (chunk.choices[0].delta.content) {
-            this.isStreaming = true;
-            message.content += chunk.choices[0].delta.content;
-            this.messages = [...messages, message];
-            this.scrollToLastMessage();
-          }
+      const response = getCompletion({ ...this.options, messages: this.messages });
+      const chunks = response as AsyncGenerator<AIChatCompletionDelta>;
+      const { messages } = this;
+      const message: AIChatMessage = {
+        content: '',
+        role: 'assistant',
+      };
+      for await (const chunk of chunks) {
+        if (chunk.delta.content) {
+          this.isStreaming = true;
+          message.content += chunk.delta.content;
+          this.messages = [...messages, message];
+          this.scrollToLastMessage();
         }
-      } else {
-        const chatResponse = response as ChatResponse;
-        this.messages = [...this.messages, chatResponse.choices[0].message];
-        this.scrollToLastMessage();
       }
 
       this.isLoading = false;

diff --git a/packages/webapp/src/index.ts b/packages/webapp/src/index.ts
@@ -1,4 +1,3 @@
 export * from './api.js';
 export * from './components/chat.js';
 export * from './message-parser.js';
-export * from './models.js';
diff --git a/packages/webapp/src/message-parser.ts b/packages/webapp/src/message-parser.ts
@@ -1,16 +1,16 @@
 import { type HTMLTemplateResult, html, nothing } from 'lit';
-import { type ChatMessage, type ChatMessageContext } from './models.js';
+import { AIChatMessage } from '@microsoft/ai-chat-protocol';
 
 export type ParsedMessage = {
   html: HTMLTemplateResult;
   citations: string[];
   followupQuestions: string[];
   role: string;
-  context?: ChatMessageContext;
+  context?: Record<string, unknown>;
 };
 
 export function parseMessageIntoHtml(
-  message: ChatMessage,
+  message: AIChatMessage,
   renderCitationReference: (citation: string, index: number) => HTMLTemplateResult,
 ): ParsedMessage {
   if (message.role === 'user') {